JsonCpp project page JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5 
6 #if !defined(JSON_IS_AMALGAMATION)
7 #include <json/assertions.h>
8 #include <json/reader.h>
9 #include <json/value.h>
10 #include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
12 #include <utility>
13 #include <cstdio>
14 #include <cassert>
15 #include <cstring>
16 #include <istream>
17 #include <sstream>
18 #include <memory>
19 #include <set>
20 
21 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
22 #define snprintf _snprintf
23 #endif
24 
25 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
26 // Disable warning about strdup being deprecated.
27 #pragma warning(disable : 4996)
28 #endif
29 
30 static int const stackLimit_g = 1000;
31 static int stackDepth_g = 0; // see readValue()
32 
33 namespace Json {
34 
35 #if __cplusplus >= 201103L
36 typedef std::unique_ptr<CharReader> CharReaderPtr;
37 #else
38 typedef std::auto_ptr<CharReader> CharReaderPtr;
39 #endif
40 
41 // Implementation of class Features
42 // ////////////////////////////////
43 
45  : allowComments_(true), strictRoot_(false),
46  allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
47 
49 
51  Features features;
52  features.allowComments_ = false;
53  features.strictRoot_ = true;
54  features.allowDroppedNullPlaceholders_ = false;
55  features.allowNumericKeys_ = false;
56  return features;
57 }
58 
59 // Implementation of class Reader
60 // ////////////////////////////////
61 
63  for (; begin < end; ++begin)
64  if (*begin == '\n' || *begin == '\r')
65  return true;
66  return false;
67 }
68 
69 // Class Reader
70 // //////////////////////////////////////////////////////////////////
71 
73  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
74  lastValue_(), commentsBefore_(), features_(Features::all()),
75  collectComments_() {}
76 
77 Reader::Reader(const Features& features)
78  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
79  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
80 }
81 
82 bool
83 Reader::parse(const std::string& document, Value& root, bool collectComments) {
84  document_ = document;
85  const char* begin = document_.c_str();
86  const char* end = begin + document_.length();
87  return parse(begin, end, root, collectComments);
88 }
89 
90 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
91  // std::istream_iterator<char> begin(sin);
92  // std::istream_iterator<char> end;
93  // Those would allow streamed input from a file, if parse() were a
94  // template function.
95 
96  // Since std::string is reference-counted, this at least does not
97  // create an extra copy.
98  std::string doc;
99  std::getline(sin, doc, (char)EOF);
100  return parse(doc, root, collectComments);
101 }
102 
103 bool Reader::parse(const char* beginDoc,
104  const char* endDoc,
105  Value& root,
106  bool collectComments) {
107  if (!features_.allowComments_) {
108  collectComments = false;
109  }
110 
111  begin_ = beginDoc;
112  end_ = endDoc;
113  collectComments_ = collectComments;
114  current_ = begin_;
115  lastValueEnd_ = 0;
116  lastValue_ = 0;
117  commentsBefore_ = "";
118  errors_.clear();
119  while (!nodes_.empty())
120  nodes_.pop();
121  nodes_.push(&root);
122 
123  stackDepth_g = 0; // Yes, this is bad coding, but options are limited.
124  bool successful = readValue();
125  Token token;
126  skipCommentTokens(token);
127  if (collectComments_ && !commentsBefore_.empty())
128  root.setComment(commentsBefore_, commentAfter);
129  if (features_.strictRoot_) {
130  if (!root.isArray() && !root.isObject()) {
131  // Set error location to start of doc, ideally should be first token found
132  // in doc
133  token.type_ = tokenError;
134  token.start_ = beginDoc;
135  token.end_ = endDoc;
136  addError(
137  "A valid JSON document must be either an array or an object value.",
138  token);
139  return false;
140  }
141  }
142  return successful;
143 }
144 
145 bool Reader::readValue() {
146  // This is a non-reentrant way to support a stackLimit. Terrible!
147  // But this deprecated class has a security problem: Bad input can
148  // cause a seg-fault. This seems like a fair, binary-compatible way
149  // to prevent the problem.
150  if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
151  ++stackDepth_g;
152 
153  Token token;
154  skipCommentTokens(token);
155  bool successful = true;
156 
157  if (collectComments_ && !commentsBefore_.empty()) {
158  currentValue().setComment(commentsBefore_, commentBefore);
159  commentsBefore_ = "";
160  }
161 
162  switch (token.type_) {
163  case tokenObjectBegin:
164  successful = readObject(token);
165  currentValue().setOffsetLimit(current_ - begin_);
166  break;
167  case tokenArrayBegin:
168  successful = readArray(token);
169  currentValue().setOffsetLimit(current_ - begin_);
170  break;
171  case tokenNumber:
172  successful = decodeNumber(token);
173  break;
174  case tokenString:
175  successful = decodeString(token);
176  break;
177  case tokenTrue:
178  {
179  Value v(true);
180  currentValue().swapPayload(v);
181  currentValue().setOffsetStart(token.start_ - begin_);
182  currentValue().setOffsetLimit(token.end_ - begin_);
183  }
184  break;
185  case tokenFalse:
186  {
187  Value v(false);
188  currentValue().swapPayload(v);
189  currentValue().setOffsetStart(token.start_ - begin_);
190  currentValue().setOffsetLimit(token.end_ - begin_);
191  }
192  break;
193  case tokenNull:
194  {
195  Value v;
196  currentValue().swapPayload(v);
197  currentValue().setOffsetStart(token.start_ - begin_);
198  currentValue().setOffsetLimit(token.end_ - begin_);
199  }
200  break;
201  case tokenArraySeparator:
202  case tokenObjectEnd:
203  case tokenArrayEnd:
204  if (features_.allowDroppedNullPlaceholders_) {
205  // "Un-read" the current token and mark the current value as a null
206  // token.
207  current_--;
208  Value v;
209  currentValue().swapPayload(v);
210  currentValue().setOffsetStart(current_ - begin_ - 1);
211  currentValue().setOffsetLimit(current_ - begin_);
212  break;
213  } // Else, fall through...
214  default:
215  currentValue().setOffsetStart(token.start_ - begin_);
216  currentValue().setOffsetLimit(token.end_ - begin_);
217  return addError("Syntax error: value, object or array expected.", token);
218  }
219 
220  if (collectComments_) {
221  lastValueEnd_ = current_;
222  lastValue_ = &currentValue();
223  }
224 
225  --stackDepth_g;
226  return successful;
227 }
228 
229 void Reader::skipCommentTokens(Token& token) {
230  if (features_.allowComments_) {
231  do {
232  readToken(token);
233  } while (token.type_ == tokenComment);
234  } else {
235  readToken(token);
236  }
237 }
238 
239 bool Reader::readToken(Token& token) {
240  skipSpaces();
241  token.start_ = current_;
242  Char c = getNextChar();
243  bool ok = true;
244  switch (c) {
245  case '{':
246  token.type_ = tokenObjectBegin;
247  break;
248  case '}':
249  token.type_ = tokenObjectEnd;
250  break;
251  case '[':
252  token.type_ = tokenArrayBegin;
253  break;
254  case ']':
255  token.type_ = tokenArrayEnd;
256  break;
257  case '"':
258  token.type_ = tokenString;
259  ok = readString();
260  break;
261  case '/':
262  token.type_ = tokenComment;
263  ok = readComment();
264  break;
265  case '0':
266  case '1':
267  case '2':
268  case '3':
269  case '4':
270  case '5':
271  case '6':
272  case '7':
273  case '8':
274  case '9':
275  case '-':
276  token.type_ = tokenNumber;
277  readNumber();
278  break;
279  case 't':
280  token.type_ = tokenTrue;
281  ok = match("rue", 3);
282  break;
283  case 'f':
284  token.type_ = tokenFalse;
285  ok = match("alse", 4);
286  break;
287  case 'n':
288  token.type_ = tokenNull;
289  ok = match("ull", 3);
290  break;
291  case ',':
292  token.type_ = tokenArraySeparator;
293  break;
294  case ':':
295  token.type_ = tokenMemberSeparator;
296  break;
297  case 0:
298  token.type_ = tokenEndOfStream;
299  break;
300  default:
301  ok = false;
302  break;
303  }
304  if (!ok)
305  token.type_ = tokenError;
306  token.end_ = current_;
307  return true;
308 }
309 
310 void Reader::skipSpaces() {
311  while (current_ != end_) {
312  Char c = *current_;
313  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
314  ++current_;
315  else
316  break;
317  }
318 }
319 
320 bool Reader::match(Location pattern, int patternLength) {
321  if (end_ - current_ < patternLength)
322  return false;
323  int index = patternLength;
324  while (index--)
325  if (current_[index] != pattern[index])
326  return false;
327  current_ += patternLength;
328  return true;
329 }
330 
331 bool Reader::readComment() {
332  Location commentBegin = current_ - 1;
333  Char c = getNextChar();
334  bool successful = false;
335  if (c == '*')
336  successful = readCStyleComment();
337  else if (c == '/')
338  successful = readCppStyleComment();
339  if (!successful)
340  return false;
341 
342  if (collectComments_) {
343  CommentPlacement placement = commentBefore;
344  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
345  if (c != '*' || !containsNewLine(commentBegin, current_))
346  placement = commentAfterOnSameLine;
347  }
348 
349  addComment(commentBegin, current_, placement);
350  }
351  return true;
352 }
353 
354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
355  std::string normalized;
356  normalized.reserve(end - begin);
357  Reader::Location current = begin;
358  while (current != end) {
359  char c = *current++;
360  if (c == '\r') {
361  if (current != end && *current == '\n')
362  // convert dos EOL
363  ++current;
364  // convert Mac EOL
365  normalized += '\n';
366  } else {
367  normalized += c;
368  }
369  }
370  return normalized;
371 }
372 
373 void
374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
375  assert(collectComments_);
376  const std::string& normalized = normalizeEOL(begin, end);
377  if (placement == commentAfterOnSameLine) {
378  assert(lastValue_ != 0);
379  lastValue_->setComment(normalized, placement);
380  } else {
381  commentsBefore_ += normalized;
382  }
383 }
384 
385 bool Reader::readCStyleComment() {
386  while (current_ != end_) {
387  Char c = getNextChar();
388  if (c == '*' && *current_ == '/')
389  break;
390  }
391  return getNextChar() == '/';
392 }
393 
394 bool Reader::readCppStyleComment() {
395  while (current_ != end_) {
396  Char c = getNextChar();
397  if (c == '\n')
398  break;
399  if (c == '\r') {
400  // Consume DOS EOL. It will be normalized in addComment.
401  if (current_ != end_ && *current_ == '\n')
402  getNextChar();
403  // Break on Moc OS 9 EOL.
404  break;
405  }
406  }
407  return true;
408 }
409 
410 void Reader::readNumber() {
411  const char *p = current_;
412  char c = '0'; // stopgap for already consumed character
413  // integral part
414  while (c >= '0' && c <= '9')
415  c = (current_ = p) < end_ ? *p++ : 0;
416  // fractional part
417  if (c == '.') {
418  c = (current_ = p) < end_ ? *p++ : 0;
419  while (c >= '0' && c <= '9')
420  c = (current_ = p) < end_ ? *p++ : 0;
421  }
422  // exponential part
423  if (c == 'e' || c == 'E') {
424  c = (current_ = p) < end_ ? *p++ : 0;
425  if (c == '+' || c == '-')
426  c = (current_ = p) < end_ ? *p++ : 0;
427  while (c >= '0' && c <= '9')
428  c = (current_ = p) < end_ ? *p++ : 0;
429  }
430 }
431 
432 bool Reader::readString() {
433  Char c = 0;
434  while (current_ != end_) {
435  c = getNextChar();
436  if (c == '\\')
437  getNextChar();
438  else if (c == '"')
439  break;
440  }
441  return c == '"';
442 }
443 
444 bool Reader::readObject(Token& tokenStart) {
445  Token tokenName;
446  std::string name;
447  Value init(objectValue);
448  currentValue().swapPayload(init);
449  currentValue().setOffsetStart(tokenStart.start_ - begin_);
450  while (readToken(tokenName)) {
451  bool initialTokenOk = true;
452  while (tokenName.type_ == tokenComment && initialTokenOk)
453  initialTokenOk = readToken(tokenName);
454  if (!initialTokenOk)
455  break;
456  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
457  return true;
458  name = "";
459  if (tokenName.type_ == tokenString) {
460  if (!decodeString(tokenName, name))
461  return recoverFromError(tokenObjectEnd);
462  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
463  Value numberName;
464  if (!decodeNumber(tokenName, numberName))
465  return recoverFromError(tokenObjectEnd);
466  name = numberName.asString();
467  } else {
468  break;
469  }
470 
471  Token colon;
472  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
473  return addErrorAndRecover(
474  "Missing ':' after object member name", colon, tokenObjectEnd);
475  }
476  Value& value = currentValue()[name];
477  nodes_.push(&value);
478  bool ok = readValue();
479  nodes_.pop();
480  if (!ok) // error already set
481  return recoverFromError(tokenObjectEnd);
482 
483  Token comma;
484  if (!readToken(comma) ||
485  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
486  comma.type_ != tokenComment)) {
487  return addErrorAndRecover(
488  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
489  }
490  bool finalizeTokenOk = true;
491  while (comma.type_ == tokenComment && finalizeTokenOk)
492  finalizeTokenOk = readToken(comma);
493  if (comma.type_ == tokenObjectEnd)
494  return true;
495  }
496  return addErrorAndRecover(
497  "Missing '}' or object member name", tokenName, tokenObjectEnd);
498 }
499 
500 bool Reader::readArray(Token& tokenStart) {
501  Value init(arrayValue);
502  currentValue().swapPayload(init);
503  currentValue().setOffsetStart(tokenStart.start_ - begin_);
504  skipSpaces();
505  if (*current_ == ']') // empty array
506  {
507  Token endArray;
508  readToken(endArray);
509  return true;
510  }
511  int index = 0;
512  for (;;) {
513  Value& value = currentValue()[index++];
514  nodes_.push(&value);
515  bool ok = readValue();
516  nodes_.pop();
517  if (!ok) // error already set
518  return recoverFromError(tokenArrayEnd);
519 
520  Token token;
521  // Accept Comment after last item in the array.
522  ok = readToken(token);
523  while (token.type_ == tokenComment && ok) {
524  ok = readToken(token);
525  }
526  bool badTokenType =
527  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
528  if (!ok || badTokenType) {
529  return addErrorAndRecover(
530  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
531  }
532  if (token.type_ == tokenArrayEnd)
533  break;
534  }
535  return true;
536 }
537 
538 bool Reader::decodeNumber(Token& token) {
539  Value decoded;
540  if (!decodeNumber(token, decoded))
541  return false;
542  currentValue().swapPayload(decoded);
543  currentValue().setOffsetStart(token.start_ - begin_);
544  currentValue().setOffsetLimit(token.end_ - begin_);
545  return true;
546 }
547 
548 bool Reader::decodeNumber(Token& token, Value& decoded) {
549  // Attempts to parse the number as an integer. If the number is
550  // larger than the maximum supported value of an integer then
551  // we decode the number as a double.
552  Location current = token.start_;
553  bool isNegative = *current == '-';
554  if (isNegative)
555  ++current;
556  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
557  Value::LargestUInt maxIntegerValue =
559  : Value::maxLargestUInt;
560  Value::LargestUInt threshold = maxIntegerValue / 10;
561  Value::LargestUInt value = 0;
562  while (current < token.end_) {
563  Char c = *current++;
564  if (c < '0' || c > '9')
565  return decodeDouble(token, decoded);
566  Value::UInt digit(c - '0');
567  if (value >= threshold) {
568  // We've hit or exceeded the max value divided by 10 (rounded down). If
569  // a) we've only just touched the limit, b) this is the last digit, and
570  // c) it's small enough to fit in that rounding delta, we're okay.
571  // Otherwise treat this number as a double to avoid overflow.
572  if (value > threshold || current != token.end_ ||
573  digit > maxIntegerValue % 10) {
574  return decodeDouble(token, decoded);
575  }
576  }
577  value = value * 10 + digit;
578  }
579  if (isNegative)
580  decoded = -Value::LargestInt(value);
581  else if (value <= Value::LargestUInt(Value::maxInt))
582  decoded = Value::LargestInt(value);
583  else
584  decoded = value;
585  return true;
586 }
587 
588 bool Reader::decodeDouble(Token& token) {
589  Value decoded;
590  if (!decodeDouble(token, decoded))
591  return false;
592  currentValue().swapPayload(decoded);
593  currentValue().setOffsetStart(token.start_ - begin_);
594  currentValue().setOffsetLimit(token.end_ - begin_);
595  return true;
596 }
597 
598 bool Reader::decodeDouble(Token& token, Value& decoded) {
599  double value = 0;
600  const int bufferSize = 32;
601  int count;
602  int length = int(token.end_ - token.start_);
603 
604  // Sanity check to avoid buffer overflow exploits.
605  if (length < 0) {
606  return addError("Unable to parse token length", token);
607  }
608 
609  // Avoid using a string constant for the format control string given to
610  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
611  // info:
612  //
613  // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
614  char format[] = "%lf";
615 
616  if (length <= bufferSize) {
617  Char buffer[bufferSize + 1];
618  memcpy(buffer, token.start_, length);
619  buffer[length] = 0;
620  count = sscanf(buffer, format, &value);
621  } else {
622  std::string buffer(token.start_, token.end_);
623  count = sscanf(buffer.c_str(), format, &value);
624  }
625 
626  if (count != 1)
627  return addError("'" + std::string(token.start_, token.end_) +
628  "' is not a number.",
629  token);
630  decoded = value;
631  return true;
632 }
633 
634 bool Reader::decodeString(Token& token) {
635  std::string decoded_string;
636  if (!decodeString(token, decoded_string))
637  return false;
638  Value decoded(decoded_string);
639  currentValue().swapPayload(decoded);
640  currentValue().setOffsetStart(token.start_ - begin_);
641  currentValue().setOffsetLimit(token.end_ - begin_);
642  return true;
643 }
644 
645 bool Reader::decodeString(Token& token, std::string& decoded) {
646  decoded.reserve(token.end_ - token.start_ - 2);
647  Location current = token.start_ + 1; // skip '"'
648  Location end = token.end_ - 1; // do not include '"'
649  while (current != end) {
650  Char c = *current++;
651  if (c == '"')
652  break;
653  else if (c == '\\') {
654  if (current == end)
655  return addError("Empty escape sequence in string", token, current);
656  Char escape = *current++;
657  switch (escape) {
658  case '"':
659  decoded += '"';
660  break;
661  case '/':
662  decoded += '/';
663  break;
664  case '\\':
665  decoded += '\\';
666  break;
667  case 'b':
668  decoded += '\b';
669  break;
670  case 'f':
671  decoded += '\f';
672  break;
673  case 'n':
674  decoded += '\n';
675  break;
676  case 'r':
677  decoded += '\r';
678  break;
679  case 't':
680  decoded += '\t';
681  break;
682  case 'u': {
683  unsigned int unicode;
684  if (!decodeUnicodeCodePoint(token, current, end, unicode))
685  return false;
686  decoded += codePointToUTF8(unicode);
687  } break;
688  default:
689  return addError("Bad escape sequence in string", token, current);
690  }
691  } else {
692  decoded += c;
693  }
694  }
695  return true;
696 }
697 
698 bool Reader::decodeUnicodeCodePoint(Token& token,
699  Location& current,
700  Location end,
701  unsigned int& unicode) {
702 
703  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
704  return false;
705  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
706  // surrogate pairs
707  if (end - current < 6)
708  return addError(
709  "additional six characters expected to parse unicode surrogate pair.",
710  token,
711  current);
712  unsigned int surrogatePair;
713  if (*(current++) == '\\' && *(current++) == 'u') {
714  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
715  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
716  } else
717  return false;
718  } else
719  return addError("expecting another \\u token to begin the second half of "
720  "a unicode surrogate pair",
721  token,
722  current);
723  }
724  return true;
725 }
726 
727 bool Reader::decodeUnicodeEscapeSequence(Token& token,
728  Location& current,
729  Location end,
730  unsigned int& unicode) {
731  if (end - current < 4)
732  return addError(
733  "Bad unicode escape sequence in string: four digits expected.",
734  token,
735  current);
736  unicode = 0;
737  for (int index = 0; index < 4; ++index) {
738  Char c = *current++;
739  unicode *= 16;
740  if (c >= '0' && c <= '9')
741  unicode += c - '0';
742  else if (c >= 'a' && c <= 'f')
743  unicode += c - 'a' + 10;
744  else if (c >= 'A' && c <= 'F')
745  unicode += c - 'A' + 10;
746  else
747  return addError(
748  "Bad unicode escape sequence in string: hexadecimal digit expected.",
749  token,
750  current);
751  }
752  return true;
753 }
754 
755 bool
756 Reader::addError(const std::string& message, Token& token, Location extra) {
757  ErrorInfo info;
758  info.token_ = token;
759  info.message_ = message;
760  info.extra_ = extra;
761  errors_.push_back(info);
762  return false;
763 }
764 
765 bool Reader::recoverFromError(TokenType skipUntilToken) {
766  int errorCount = int(errors_.size());
767  Token skip;
768  for (;;) {
769  if (!readToken(skip))
770  errors_.resize(errorCount); // discard errors caused by recovery
771  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
772  break;
773  }
774  errors_.resize(errorCount);
775  return false;
776 }
777 
778 bool Reader::addErrorAndRecover(const std::string& message,
779  Token& token,
780  TokenType skipUntilToken) {
781  addError(message, token);
782  return recoverFromError(skipUntilToken);
783 }
784 
785 Value& Reader::currentValue() { return *(nodes_.top()); }
786 
787 Reader::Char Reader::getNextChar() {
788  if (current_ == end_)
789  return 0;
790  return *current_++;
791 }
792 
793 void Reader::getLocationLineAndColumn(Location location,
794  int& line,
795  int& column) const {
796  Location current = begin_;
797  Location lastLineStart = current;
798  line = 0;
799  while (current < location && current != end_) {
800  Char c = *current++;
801  if (c == '\r') {
802  if (*current == '\n')
803  ++current;
804  lastLineStart = current;
805  ++line;
806  } else if (c == '\n') {
807  lastLineStart = current;
808  ++line;
809  }
810  }
811  // column & line start at 1
812  column = int(location - lastLineStart) + 1;
813  ++line;
814 }
815 
816 std::string Reader::getLocationLineAndColumn(Location location) const {
817  int line, column;
818  getLocationLineAndColumn(location, line, column);
819  char buffer[18 + 16 + 16 + 1];
820 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
821 #if defined(WINCE)
822  _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
823 #else
824  sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
825 #endif
826 #else
827  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
828 #endif
829  return buffer;
830 }
831 
832 // Deprecated. Preserved for backward compatibility
833 std::string Reader::getFormatedErrorMessages() const {
834  return getFormattedErrorMessages();
835 }
836 
838  std::string formattedMessage;
839  for (Errors::const_iterator itError = errors_.begin();
840  itError != errors_.end();
841  ++itError) {
842  const ErrorInfo& error = *itError;
843  formattedMessage +=
844  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
845  formattedMessage += " " + error.message_ + "\n";
846  if (error.extra_)
847  formattedMessage +=
848  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
849  }
850  return formattedMessage;
851 }
852 
853 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
854  std::vector<Reader::StructuredError> allErrors;
855  for (Errors::const_iterator itError = errors_.begin();
856  itError != errors_.end();
857  ++itError) {
858  const ErrorInfo& error = *itError;
859  Reader::StructuredError structured;
860  structured.offset_start = error.token_.start_ - begin_;
861  structured.offset_limit = error.token_.end_ - begin_;
862  structured.message = error.message_;
863  allErrors.push_back(structured);
864  }
865  return allErrors;
866 }
867 
868 bool Reader::pushError(const Value& value, const std::string& message) {
869  size_t length = end_ - begin_;
870  if(value.getOffsetStart() > length
871  || value.getOffsetLimit() > length)
872  return false;
873  Token token;
874  token.type_ = tokenError;
875  token.start_ = begin_ + value.getOffsetStart();
876  token.end_ = end_ + value.getOffsetLimit();
877  ErrorInfo info;
878  info.token_ = token;
879  info.message_ = message;
880  info.extra_ = 0;
881  errors_.push_back(info);
882  return true;
883 }
884 
885 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
886  size_t length = end_ - begin_;
887  if(value.getOffsetStart() > length
888  || value.getOffsetLimit() > length
889  || extra.getOffsetLimit() > length)
890  return false;
891  Token token;
892  token.type_ = tokenError;
893  token.start_ = begin_ + value.getOffsetStart();
894  token.end_ = begin_ + value.getOffsetLimit();
895  ErrorInfo info;
896  info.token_ = token;
897  info.message_ = message;
898  info.extra_ = begin_ + extra.getOffsetStart();
899  errors_.push_back(info);
900  return true;
901 }
902 
903 bool Reader::good() const {
904  return !errors_.size();
905 }
906 
907 // exact copy of Features
908 class OurFeatures {
909 public:
910  static OurFeatures all();
911  OurFeatures();
912  bool allowComments_;
913  bool strictRoot_;
914  bool allowDroppedNullPlaceholders_;
915  bool allowNumericKeys_;
916  bool allowSingleQuotes_;
917  bool failIfExtra_;
918  bool rejectDupKeys_;
919  int stackLimit_;
920 }; // OurFeatures
921 
922 // exact copy of Implementation of class Features
923 // ////////////////////////////////
924 
925 OurFeatures::OurFeatures()
926  : allowComments_(true), strictRoot_(false)
927  , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false)
928  , allowSingleQuotes_(false)
929  , failIfExtra_(false)
930 {
931 }
932 
933 OurFeatures OurFeatures::all() { return OurFeatures(); }
934 
935 // Implementation of class Reader
936 // ////////////////////////////////
937 
938 // exact copy of Reader, renamed to OurReader
939 class OurReader {
940 public:
941  typedef char Char;
942  typedef const Char* Location;
943  struct StructuredError {
944  size_t offset_start;
945  size_t offset_limit;
946  std::string message;
947  };
948 
949  OurReader(OurFeatures const& features);
950  bool parse(const char* beginDoc,
951  const char* endDoc,
952  Value& root,
953  bool collectComments = true);
954  std::string getFormattedErrorMessages() const;
955  std::vector<StructuredError> getStructuredErrors() const;
956  bool pushError(const Value& value, const std::string& message);
957  bool pushError(const Value& value, const std::string& message, const Value& extra);
958  bool good() const;
959 
960 private:
961  OurReader(OurReader const&); // no impl
962  void operator=(OurReader const&); // no impl
963 
964  enum TokenType {
965  tokenEndOfStream = 0,
966  tokenObjectBegin,
967  tokenObjectEnd,
968  tokenArrayBegin,
969  tokenArrayEnd,
970  tokenString,
971  tokenNumber,
972  tokenTrue,
973  tokenFalse,
974  tokenNull,
975  tokenArraySeparator,
976  tokenMemberSeparator,
977  tokenComment,
978  tokenError
979  };
980 
981  class Token {
982  public:
983  TokenType type_;
984  Location start_;
985  Location end_;
986  };
987 
988  class ErrorInfo {
989  public:
990  Token token_;
991  std::string message_;
992  Location extra_;
993  };
994 
995  typedef std::deque<ErrorInfo> Errors;
996 
997  bool readToken(Token& token);
998  void skipSpaces();
999  bool match(Location pattern, int patternLength);
1000  bool readComment();
1001  bool readCStyleComment();
1002  bool readCppStyleComment();
1003  bool readString();
1004  bool readStringSingleQuote();
1005  void readNumber();
1006  bool readValue();
1007  bool readObject(Token& token);
1008  bool readArray(Token& token);
1009  bool decodeNumber(Token& token);
1010  bool decodeNumber(Token& token, Value& decoded);
1011  bool decodeString(Token& token);
1012  bool decodeString(Token& token, std::string& decoded);
1013  bool decodeDouble(Token& token);
1014  bool decodeDouble(Token& token, Value& decoded);
1015  bool decodeUnicodeCodePoint(Token& token,
1016  Location& current,
1017  Location end,
1018  unsigned int& unicode);
1019  bool decodeUnicodeEscapeSequence(Token& token,
1020  Location& current,
1021  Location end,
1022  unsigned int& unicode);
1023  bool addError(const std::string& message, Token& token, Location extra = 0);
1024  bool recoverFromError(TokenType skipUntilToken);
1025  bool addErrorAndRecover(const std::string& message,
1026  Token& token,
1027  TokenType skipUntilToken);
1028  void skipUntilSpace();
1029  Value& currentValue();
1030  Char getNextChar();
1031  void
1032  getLocationLineAndColumn(Location location, int& line, int& column) const;
1033  std::string getLocationLineAndColumn(Location location) const;
1034  void addComment(Location begin, Location end, CommentPlacement placement);
1035  void skipCommentTokens(Token& token);
1036 
1037  typedef std::stack<Value*> Nodes;
1038  Nodes nodes_;
1039  Errors errors_;
1040  std::string document_;
1041  Location begin_;
1042  Location end_;
1043  Location current_;
1044  Location lastValueEnd_;
1045  Value* lastValue_;
1046  std::string commentsBefore_;
1047  int stackDepth_;
1048 
1049  OurFeatures const features_;
1050  bool collectComments_;
1051 }; // OurReader
1052 
1053 // complete copy of Read impl, for OurReader
1054 
1055 OurReader::OurReader(OurFeatures const& features)
1056  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1057  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1058 }
1059 
1060 bool OurReader::parse(const char* beginDoc,
1061  const char* endDoc,
1062  Value& root,
1063  bool collectComments) {
1064  if (!features_.allowComments_) {
1065  collectComments = false;
1066  }
1067 
1068  begin_ = beginDoc;
1069  end_ = endDoc;
1070  collectComments_ = collectComments;
1071  current_ = begin_;
1072  lastValueEnd_ = 0;
1073  lastValue_ = 0;
1074  commentsBefore_ = "";
1075  errors_.clear();
1076  while (!nodes_.empty())
1077  nodes_.pop();
1078  nodes_.push(&root);
1079 
1080  stackDepth_ = 0;
1081  bool successful = readValue();
1082  Token token;
1083  skipCommentTokens(token);
1084  if (features_.failIfExtra_) {
1085  if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
1086  addError("Extra non-whitespace after JSON value.", token);
1087  return false;
1088  }
1089  }
1090  if (collectComments_ && !commentsBefore_.empty())
1091  root.setComment(commentsBefore_, commentAfter);
1092  if (features_.strictRoot_) {
1093  if (!root.isArray() && !root.isObject()) {
1094  // Set error location to start of doc, ideally should be first token found
1095  // in doc
1096  token.type_ = tokenError;
1097  token.start_ = beginDoc;
1098  token.end_ = endDoc;
1099  addError(
1100  "A valid JSON document must be either an array or an object value.",
1101  token);
1102  return false;
1103  }
1104  }
1105  return successful;
1106 }
1107 
1108 bool OurReader::readValue() {
1109  if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
1110  ++stackDepth_;
1111  Token token;
1112  skipCommentTokens(token);
1113  bool successful = true;
1114 
1115  if (collectComments_ && !commentsBefore_.empty()) {
1116  currentValue().setComment(commentsBefore_, commentBefore);
1117  commentsBefore_ = "";
1118  }
1119 
1120  switch (token.type_) {
1121  case tokenObjectBegin:
1122  successful = readObject(token);
1123  currentValue().setOffsetLimit(current_ - begin_);
1124  break;
1125  case tokenArrayBegin:
1126  successful = readArray(token);
1127  currentValue().setOffsetLimit(current_ - begin_);
1128  break;
1129  case tokenNumber:
1130  successful = decodeNumber(token);
1131  break;
1132  case tokenString:
1133  successful = decodeString(token);
1134  break;
1135  case tokenTrue:
1136  {
1137  Value v(true);
1138  currentValue().swapPayload(v);
1139  currentValue().setOffsetStart(token.start_ - begin_);
1140  currentValue().setOffsetLimit(token.end_ - begin_);
1141  }
1142  break;
1143  case tokenFalse:
1144  {
1145  Value v(false);
1146  currentValue().swapPayload(v);
1147  currentValue().setOffsetStart(token.start_ - begin_);
1148  currentValue().setOffsetLimit(token.end_ - begin_);
1149  }
1150  break;
1151  case tokenNull:
1152  {
1153  Value v;
1154  currentValue().swapPayload(v);
1155  currentValue().setOffsetStart(token.start_ - begin_);
1156  currentValue().setOffsetLimit(token.end_ - begin_);
1157  }
1158  break;
1159  case tokenArraySeparator:
1160  case tokenObjectEnd:
1161  case tokenArrayEnd:
1162  if (features_.allowDroppedNullPlaceholders_) {
1163  // "Un-read" the current token and mark the current value as a null
1164  // token.
1165  current_--;
1166  Value v;
1167  currentValue().swapPayload(v);
1168  currentValue().setOffsetStart(current_ - begin_ - 1);
1169  currentValue().setOffsetLimit(current_ - begin_);
1170  break;
1171  } // else, fall through ...
1172  default:
1173  currentValue().setOffsetStart(token.start_ - begin_);
1174  currentValue().setOffsetLimit(token.end_ - begin_);
1175  return addError("Syntax error: value, object or array expected.", token);
1176  }
1177 
1178  if (collectComments_) {
1179  lastValueEnd_ = current_;
1180  lastValue_ = &currentValue();
1181  }
1182 
1183  --stackDepth_;
1184  return successful;
1185 }
1186 
1187 void OurReader::skipCommentTokens(Token& token) {
1188  if (features_.allowComments_) {
1189  do {
1190  readToken(token);
1191  } while (token.type_ == tokenComment);
1192  } else {
1193  readToken(token);
1194  }
1195 }
1196 
1197 bool OurReader::readToken(Token& token) {
1198  skipSpaces();
1199  token.start_ = current_;
1200  Char c = getNextChar();
1201  bool ok = true;
1202  switch (c) {
1203  case '{':
1204  token.type_ = tokenObjectBegin;
1205  break;
1206  case '}':
1207  token.type_ = tokenObjectEnd;
1208  break;
1209  case '[':
1210  token.type_ = tokenArrayBegin;
1211  break;
1212  case ']':
1213  token.type_ = tokenArrayEnd;
1214  break;
1215  case '"':
1216  token.type_ = tokenString;
1217  ok = readString();
1218  break;
1219  case '\'':
1220  if (features_.allowSingleQuotes_) {
1221  token.type_ = tokenString;
1222  ok = readStringSingleQuote();
1223  break;
1224  } // else continue
1225  case '/':
1226  token.type_ = tokenComment;
1227  ok = readComment();
1228  break;
1229  case '0':
1230  case '1':
1231  case '2':
1232  case '3':
1233  case '4':
1234  case '5':
1235  case '6':
1236  case '7':
1237  case '8':
1238  case '9':
1239  case '-':
1240  token.type_ = tokenNumber;
1241  readNumber();
1242  break;
1243  case 't':
1244  token.type_ = tokenTrue;
1245  ok = match("rue", 3);
1246  break;
1247  case 'f':
1248  token.type_ = tokenFalse;
1249  ok = match("alse", 4);
1250  break;
1251  case 'n':
1252  token.type_ = tokenNull;
1253  ok = match("ull", 3);
1254  break;
1255  case ',':
1256  token.type_ = tokenArraySeparator;
1257  break;
1258  case ':':
1259  token.type_ = tokenMemberSeparator;
1260  break;
1261  case 0:
1262  token.type_ = tokenEndOfStream;
1263  break;
1264  default:
1265  ok = false;
1266  break;
1267  }
1268  if (!ok)
1269  token.type_ = tokenError;
1270  token.end_ = current_;
1271  return true;
1272 }
1273 
1274 void OurReader::skipSpaces() {
1275  while (current_ != end_) {
1276  Char c = *current_;
1277  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1278  ++current_;
1279  else
1280  break;
1281  }
1282 }
1283 
1284 bool OurReader::match(Location pattern, int patternLength) {
1285  if (end_ - current_ < patternLength)
1286  return false;
1287  int index = patternLength;
1288  while (index--)
1289  if (current_[index] != pattern[index])
1290  return false;
1291  current_ += patternLength;
1292  return true;
1293 }
1294 
1295 bool OurReader::readComment() {
1296  Location commentBegin = current_ - 1;
1297  Char c = getNextChar();
1298  bool successful = false;
1299  if (c == '*')
1300  successful = readCStyleComment();
1301  else if (c == '/')
1302  successful = readCppStyleComment();
1303  if (!successful)
1304  return false;
1305 
1306  if (collectComments_) {
1307  CommentPlacement placement = commentBefore;
1308  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1309  if (c != '*' || !containsNewLine(commentBegin, current_))
1310  placement = commentAfterOnSameLine;
1311  }
1312 
1313  addComment(commentBegin, current_, placement);
1314  }
1315  return true;
1316 }
1317 
1318 void
1319 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
1320  assert(collectComments_);
1321  const std::string& normalized = normalizeEOL(begin, end);
1322  if (placement == commentAfterOnSameLine) {
1323  assert(lastValue_ != 0);
1324  lastValue_->setComment(normalized, placement);
1325  } else {
1326  commentsBefore_ += normalized;
1327  }
1328 }
1329 
1330 bool OurReader::readCStyleComment() {
1331  while (current_ != end_) {
1332  Char c = getNextChar();
1333  if (c == '*' && *current_ == '/')
1334  break;
1335  }
1336  return getNextChar() == '/';
1337 }
1338 
1339 bool OurReader::readCppStyleComment() {
1340  while (current_ != end_) {
1341  Char c = getNextChar();
1342  if (c == '\n')
1343  break;
1344  if (c == '\r') {
1345  // Consume DOS EOL. It will be normalized in addComment.
1346  if (current_ != end_ && *current_ == '\n')
1347  getNextChar();
1348  // Break on Moc OS 9 EOL.
1349  break;
1350  }
1351  }
1352  return true;
1353 }
1354 
1355 void OurReader::readNumber() {
1356  const char *p = current_;
1357  char c = '0'; // stopgap for already consumed character
1358  // integral part
1359  while (c >= '0' && c <= '9')
1360  c = (current_ = p) < end_ ? *p++ : 0;
1361  // fractional part
1362  if (c == '.') {
1363  c = (current_ = p) < end_ ? *p++ : 0;
1364  while (c >= '0' && c <= '9')
1365  c = (current_ = p) < end_ ? *p++ : 0;
1366  }
1367  // exponential part
1368  if (c == 'e' || c == 'E') {
1369  c = (current_ = p) < end_ ? *p++ : 0;
1370  if (c == '+' || c == '-')
1371  c = (current_ = p) < end_ ? *p++ : 0;
1372  while (c >= '0' && c <= '9')
1373  c = (current_ = p) < end_ ? *p++ : 0;
1374  }
1375 }
1376 bool OurReader::readString() {
1377  Char c = 0;
1378  while (current_ != end_) {
1379  c = getNextChar();
1380  if (c == '\\')
1381  getNextChar();
1382  else if (c == '"')
1383  break;
1384  }
1385  return c == '"';
1386 }
1387 
1388 
1389 bool OurReader::readStringSingleQuote() {
1390  Char c = 0;
1391  while (current_ != end_) {
1392  c = getNextChar();
1393  if (c == '\\')
1394  getNextChar();
1395  else if (c == '\'')
1396  break;
1397  }
1398  return c == '\'';
1399 }
1400 
1401 bool OurReader::readObject(Token& tokenStart) {
1402  Token tokenName;
1403  std::string name;
1404  Value init(objectValue);
1405  currentValue().swapPayload(init);
1406  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1407  while (readToken(tokenName)) {
1408  bool initialTokenOk = true;
1409  while (tokenName.type_ == tokenComment && initialTokenOk)
1410  initialTokenOk = readToken(tokenName);
1411  if (!initialTokenOk)
1412  break;
1413  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1414  return true;
1415  name = "";
1416  if (tokenName.type_ == tokenString) {
1417  if (!decodeString(tokenName, name))
1418  return recoverFromError(tokenObjectEnd);
1419  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1420  Value numberName;
1421  if (!decodeNumber(tokenName, numberName))
1422  return recoverFromError(tokenObjectEnd);
1423  name = numberName.asString();
1424  } else {
1425  break;
1426  }
1427 
1428  Token colon;
1429  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1430  return addErrorAndRecover(
1431  "Missing ':' after object member name", colon, tokenObjectEnd);
1432  }
1433  if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
1434  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1435  std::string msg = "Duplicate key: '" + name + "'";
1436  return addErrorAndRecover(
1437  msg, tokenName, tokenObjectEnd);
1438  }
1439  Value& value = currentValue()[name];
1440  nodes_.push(&value);
1441  bool ok = readValue();
1442  nodes_.pop();
1443  if (!ok) // error already set
1444  return recoverFromError(tokenObjectEnd);
1445 
1446  Token comma;
1447  if (!readToken(comma) ||
1448  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1449  comma.type_ != tokenComment)) {
1450  return addErrorAndRecover(
1451  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
1452  }
1453  bool finalizeTokenOk = true;
1454  while (comma.type_ == tokenComment && finalizeTokenOk)
1455  finalizeTokenOk = readToken(comma);
1456  if (comma.type_ == tokenObjectEnd)
1457  return true;
1458  }
1459  return addErrorAndRecover(
1460  "Missing '}' or object member name", tokenName, tokenObjectEnd);
1461 }
1462 
1463 bool OurReader::readArray(Token& tokenStart) {
1464  Value init(arrayValue);
1465  currentValue().swapPayload(init);
1466  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1467  skipSpaces();
1468  if (*current_ == ']') // empty array
1469  {
1470  Token endArray;
1471  readToken(endArray);
1472  return true;
1473  }
1474  int index = 0;
1475  for (;;) {
1476  Value& value = currentValue()[index++];
1477  nodes_.push(&value);
1478  bool ok = readValue();
1479  nodes_.pop();
1480  if (!ok) // error already set
1481  return recoverFromError(tokenArrayEnd);
1482 
1483  Token token;
1484  // Accept Comment after last item in the array.
1485  ok = readToken(token);
1486  while (token.type_ == tokenComment && ok) {
1487  ok = readToken(token);
1488  }
1489  bool badTokenType =
1490  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
1491  if (!ok || badTokenType) {
1492  return addErrorAndRecover(
1493  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
1494  }
1495  if (token.type_ == tokenArrayEnd)
1496  break;
1497  }
1498  return true;
1499 }
1500 
1501 bool OurReader::decodeNumber(Token& token) {
1502  Value decoded;
1503  if (!decodeNumber(token, decoded))
1504  return false;
1505  currentValue().swapPayload(decoded);
1506  currentValue().setOffsetStart(token.start_ - begin_);
1507  currentValue().setOffsetLimit(token.end_ - begin_);
1508  return true;
1509 }
1510 
1511 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1512  // Attempts to parse the number as an integer. If the number is
1513  // larger than the maximum supported value of an integer then
1514  // we decode the number as a double.
1515  Location current = token.start_;
1516  bool isNegative = *current == '-';
1517  if (isNegative)
1518  ++current;
1519  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
1520  Value::LargestUInt maxIntegerValue =
1522  : Value::maxLargestUInt;
1523  Value::LargestUInt threshold = maxIntegerValue / 10;
1524  Value::LargestUInt value = 0;
1525  while (current < token.end_) {
1526  Char c = *current++;
1527  if (c < '0' || c > '9')
1528  return decodeDouble(token, decoded);
1529  Value::UInt digit(c - '0');
1530  if (value >= threshold) {
1531  // We've hit or exceeded the max value divided by 10 (rounded down). If
1532  // a) we've only just touched the limit, b) this is the last digit, and
1533  // c) it's small enough to fit in that rounding delta, we're okay.
1534  // Otherwise treat this number as a double to avoid overflow.
1535  if (value > threshold || current != token.end_ ||
1536  digit > maxIntegerValue % 10) {
1537  return decodeDouble(token, decoded);
1538  }
1539  }
1540  value = value * 10 + digit;
1541  }
1542  if (isNegative)
1543  decoded = -Value::LargestInt(value);
1544  else if (value <= Value::LargestUInt(Value::maxInt))
1545  decoded = Value::LargestInt(value);
1546  else
1547  decoded = value;
1548  return true;
1549 }
1550 
1551 bool OurReader::decodeDouble(Token& token) {
1552  Value decoded;
1553  if (!decodeDouble(token, decoded))
1554  return false;
1555  currentValue().swapPayload(decoded);
1556  currentValue().setOffsetStart(token.start_ - begin_);
1557  currentValue().setOffsetLimit(token.end_ - begin_);
1558  return true;
1559 }
1560 
1561 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1562  double value = 0;
1563  const int bufferSize = 32;
1564  int count;
1565  int length = int(token.end_ - token.start_);
1566 
1567  // Sanity check to avoid buffer overflow exploits.
1568  if (length < 0) {
1569  return addError("Unable to parse token length", token);
1570  }
1571 
1572  // Avoid using a string constant for the format control string given to
1573  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1574  // info:
1575  //
1576  // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1577  char format[] = "%lf";
1578 
1579  if (length <= bufferSize) {
1580  Char buffer[bufferSize + 1];
1581  memcpy(buffer, token.start_, length);
1582  buffer[length] = 0;
1583  count = sscanf(buffer, format, &value);
1584  } else {
1585  std::string buffer(token.start_, token.end_);
1586  count = sscanf(buffer.c_str(), format, &value);
1587  }
1588 
1589  if (count != 1)
1590  return addError("'" + std::string(token.start_, token.end_) +
1591  "' is not a number.",
1592  token);
1593  decoded = value;
1594  return true;
1595 }
1596 
1597 bool OurReader::decodeString(Token& token) {
1598  std::string decoded_string;
1599  if (!decodeString(token, decoded_string))
1600  return false;
1601  Value decoded(decoded_string);
1602  currentValue().swapPayload(decoded);
1603  currentValue().setOffsetStart(token.start_ - begin_);
1604  currentValue().setOffsetLimit(token.end_ - begin_);
1605  return true;
1606 }
1607 
1608 bool OurReader::decodeString(Token& token, std::string& decoded) {
1609  decoded.reserve(token.end_ - token.start_ - 2);
1610  Location current = token.start_ + 1; // skip '"'
1611  Location end = token.end_ - 1; // do not include '"'
1612  while (current != end) {
1613  Char c = *current++;
1614  if (c == '"')
1615  break;
1616  else if (c == '\\') {
1617  if (current == end)
1618  return addError("Empty escape sequence in string", token, current);
1619  Char escape = *current++;
1620  switch (escape) {
1621  case '"':
1622  decoded += '"';
1623  break;
1624  case '/':
1625  decoded += '/';
1626  break;
1627  case '\\':
1628  decoded += '\\';
1629  break;
1630  case 'b':
1631  decoded += '\b';
1632  break;
1633  case 'f':
1634  decoded += '\f';
1635  break;
1636  case 'n':
1637  decoded += '\n';
1638  break;
1639  case 'r':
1640  decoded += '\r';
1641  break;
1642  case 't':
1643  decoded += '\t';
1644  break;
1645  case 'u': {
1646  unsigned int unicode;
1647  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1648  return false;
1649  decoded += codePointToUTF8(unicode);
1650  } break;
1651  default:
1652  return addError("Bad escape sequence in string", token, current);
1653  }
1654  } else {
1655  decoded += c;
1656  }
1657  }
1658  return true;
1659 }
1660 
1661 bool OurReader::decodeUnicodeCodePoint(Token& token,
1662  Location& current,
1663  Location end,
1664  unsigned int& unicode) {
1665 
1666  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1667  return false;
1668  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1669  // surrogate pairs
1670  if (end - current < 6)
1671  return addError(
1672  "additional six characters expected to parse unicode surrogate pair.",
1673  token,
1674  current);
1675  unsigned int surrogatePair;
1676  if (*(current++) == '\\' && *(current++) == 'u') {
1677  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1678  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1679  } else
1680  return false;
1681  } else
1682  return addError("expecting another \\u token to begin the second half of "
1683  "a unicode surrogate pair",
1684  token,
1685  current);
1686  }
1687  return true;
1688 }
1689 
1690 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1691  Location& current,
1692  Location end,
1693  unsigned int& unicode) {
1694  if (end - current < 4)
1695  return addError(
1696  "Bad unicode escape sequence in string: four digits expected.",
1697  token,
1698  current);
1699  unicode = 0;
1700  for (int index = 0; index < 4; ++index) {
1701  Char c = *current++;
1702  unicode *= 16;
1703  if (c >= '0' && c <= '9')
1704  unicode += c - '0';
1705  else if (c >= 'a' && c <= 'f')
1706  unicode += c - 'a' + 10;
1707  else if (c >= 'A' && c <= 'F')
1708  unicode += c - 'A' + 10;
1709  else
1710  return addError(
1711  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1712  token,
1713  current);
1714  }
1715  return true;
1716 }
1717 
1718 bool
1719 OurReader::addError(const std::string& message, Token& token, Location extra) {
1720  ErrorInfo info;
1721  info.token_ = token;
1722  info.message_ = message;
1723  info.extra_ = extra;
1724  errors_.push_back(info);
1725  return false;
1726 }
1727 
1728 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1729  int errorCount = int(errors_.size());
1730  Token skip;
1731  for (;;) {
1732  if (!readToken(skip))
1733  errors_.resize(errorCount); // discard errors caused by recovery
1734  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1735  break;
1736  }
1737  errors_.resize(errorCount);
1738  return false;
1739 }
1740 
1741 bool OurReader::addErrorAndRecover(const std::string& message,
1742  Token& token,
1743  TokenType skipUntilToken) {
1744  addError(message, token);
1745  return recoverFromError(skipUntilToken);
1746 }
1747 
1748 Value& OurReader::currentValue() { return *(nodes_.top()); }
1749 
1750 OurReader::Char OurReader::getNextChar() {
1751  if (current_ == end_)
1752  return 0;
1753  return *current_++;
1754 }
1755 
1756 void OurReader::getLocationLineAndColumn(Location location,
1757  int& line,
1758  int& column) const {
1759  Location current = begin_;
1760  Location lastLineStart = current;
1761  line = 0;
1762  while (current < location && current != end_) {
1763  Char c = *current++;
1764  if (c == '\r') {
1765  if (*current == '\n')
1766  ++current;
1767  lastLineStart = current;
1768  ++line;
1769  } else if (c == '\n') {
1770  lastLineStart = current;
1771  ++line;
1772  }
1773  }
1774  // column & line start at 1
1775  column = int(location - lastLineStart) + 1;
1776  ++line;
1777 }
1778 
1779 std::string OurReader::getLocationLineAndColumn(Location location) const {
1780  int line, column;
1781  getLocationLineAndColumn(location, line, column);
1782  char buffer[18 + 16 + 16 + 1];
1783 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
1784 #if defined(WINCE)
1785  _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1786 #else
1787  sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1788 #endif
1789 #else
1790  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1791 #endif
1792  return buffer;
1793 }
1794 
1795 std::string OurReader::getFormattedErrorMessages() const {
1796  std::string formattedMessage;
1797  for (Errors::const_iterator itError = errors_.begin();
1798  itError != errors_.end();
1799  ++itError) {
1800  const ErrorInfo& error = *itError;
1801  formattedMessage +=
1802  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1803  formattedMessage += " " + error.message_ + "\n";
1804  if (error.extra_)
1805  formattedMessage +=
1806  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1807  }
1808  return formattedMessage;
1809 }
1810 
1811 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1812  std::vector<OurReader::StructuredError> allErrors;
1813  for (Errors::const_iterator itError = errors_.begin();
1814  itError != errors_.end();
1815  ++itError) {
1816  const ErrorInfo& error = *itError;
1817  OurReader::StructuredError structured;
1818  structured.offset_start = error.token_.start_ - begin_;
1819  structured.offset_limit = error.token_.end_ - begin_;
1820  structured.message = error.message_;
1821  allErrors.push_back(structured);
1822  }
1823  return allErrors;
1824 }
1825 
1826 bool OurReader::pushError(const Value& value, const std::string& message) {
1827  size_t length = end_ - begin_;
1828  if(value.getOffsetStart() > length
1829  || value.getOffsetLimit() > length)
1830  return false;
1831  Token token;
1832  token.type_ = tokenError;
1833  token.start_ = begin_ + value.getOffsetStart();
1834  token.end_ = end_ + value.getOffsetLimit();
1835  ErrorInfo info;
1836  info.token_ = token;
1837  info.message_ = message;
1838  info.extra_ = 0;
1839  errors_.push_back(info);
1840  return true;
1841 }
1842 
1843 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
1844  size_t length = end_ - begin_;
1845  if(value.getOffsetStart() > length
1846  || value.getOffsetLimit() > length
1847  || extra.getOffsetLimit() > length)
1848  return false;
1849  Token token;
1850  token.type_ = tokenError;
1851  token.start_ = begin_ + value.getOffsetStart();
1852  token.end_ = begin_ + value.getOffsetLimit();
1853  ErrorInfo info;
1854  info.token_ = token;
1855  info.message_ = message;
1856  info.extra_ = begin_ + extra.getOffsetStart();
1857  errors_.push_back(info);
1858  return true;
1859 }
1860 
1861 bool OurReader::good() const {
1862  return !errors_.size();
1863 }
1864 
1865 
1866 class OurCharReader : public CharReader {
1867  bool const collectComments_;
1868  OurReader reader_;
1869 public:
1870  OurCharReader(
1871  bool collectComments,
1872  OurFeatures const& features)
1873  : collectComments_(collectComments)
1874  , reader_(features)
1875  {}
1876  virtual bool parse(
1877  char const* beginDoc, char const* endDoc,
1878  Value* root, std::string* errs) {
1879  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1880  if (errs) {
1881  *errs = reader_.getFormattedErrorMessages();
1882  }
1883  return ok;
1884  }
1885 };
1886 
1888 {
1890 }
1892 {}
1894 {
1895  bool collectComments = settings_["collectComments"].asBool();
1896  OurFeatures features = OurFeatures::all();
1897  features.allowComments_ = settings_["allowComments"].asBool();
1898  features.strictRoot_ = settings_["strictRoot"].asBool();
1899  features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
1900  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1901  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1902  features.stackLimit_ = settings_["stackLimit"].asInt();
1903  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1904  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1905  return new OurCharReader(collectComments, features);
1906 }
1907 static void getValidReaderKeys(std::set<std::string>* valid_keys)
1908 {
1909  valid_keys->clear();
1910  valid_keys->insert("collectComments");
1911  valid_keys->insert("allowComments");
1912  valid_keys->insert("strictRoot");
1913  valid_keys->insert("allowDroppedNullPlaceholders");
1914  valid_keys->insert("allowNumericKeys");
1915  valid_keys->insert("allowSingleQuotes");
1916  valid_keys->insert("stackLimit");
1917  valid_keys->insert("failIfExtra");
1918  valid_keys->insert("rejectDupKeys");
1919 }
1921 {
1922  Json::Value my_invalid;
1923  if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL
1924  Json::Value& inv = *invalid;
1925  std::set<std::string> valid_keys;
1926  getValidReaderKeys(&valid_keys);
1928  size_t n = keys.size();
1929  for (size_t i = 0; i < n; ++i) {
1930  std::string const& key = keys[i];
1931  if (valid_keys.find(key) == valid_keys.end()) {
1932  inv[key] = settings_[key];
1933  }
1934  }
1935  return 0u == inv.size();
1936 }
1938 {
1939  return settings_[key];
1940 }
1941 // static
1943 {
1945  (*settings)["allowComments"] = false;
1946  (*settings)["strictRoot"] = true;
1947  (*settings)["allowDroppedNullPlaceholders"] = false;
1948  (*settings)["allowNumericKeys"] = false;
1949  (*settings)["allowSingleQuotes"] = false;
1950  (*settings)["failIfExtra"] = true;
1951  (*settings)["rejectDupKeys"] = true;
1953 }
1954 // static
1956 {
1958  (*settings)["collectComments"] = true;
1959  (*settings)["allowComments"] = true;
1960  (*settings)["strictRoot"] = false;
1961  (*settings)["allowDroppedNullPlaceholders"] = false;
1962  (*settings)["allowNumericKeys"] = false;
1963  (*settings)["allowSingleQuotes"] = false;
1964  (*settings)["stackLimit"] = 1000;
1965  (*settings)["failIfExtra"] = false;
1966  (*settings)["rejectDupKeys"] = false;
1968 }
1969 
1971 // global functions
1972 
1974  CharReader::Factory const& fact, std::istream& sin,
1975  Value* root, std::string* errs)
1976 {
1977  std::ostringstream ssin;
1978  ssin << sin.rdbuf();
1979  std::string doc = ssin.str();
1980  char const* begin = doc.data();
1981  char const* end = begin + doc.size();
1982  // Note that we do not actually need a null-terminator.
1983  CharReaderPtr const reader(fact.newCharReader());
1984  return reader->parse(begin, end, root, errs);
1985 }
1986 
1987 std::istream& operator>>(std::istream& sin, Value& root) {
1989  std::string errs;
1990  bool ok = parseFromStream(b, sin, &root, &errs);
1991  if (!ok) {
1992  fprintf(stderr,
1993  "Error from reader: %s",
1994  errs.c_str());
1995 
1996  throwRuntimeError("reader error");
1997  }
1998  return sin;
1999 }
2000 
2001 } // namespace Json
static std::string codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:18
Int asInt() const
Definition: json_value.cpp:653
std::vector< std::string > Members
Definition: value.h:150
virtual CharReader * newCharReader() const
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
array value (ordered list)
Definition: value.h:70
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:38
bool parseFromStream(CharReader::Factory const &, std::istream &, Value *root, std::string *errs)
Consume entire stream and use its begin/end.
Json::Value settings_
Configuration of this builder.
Definition: reader.h:330
object value (collection of name/value pairs).
Definition: value.h:71
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
char Char
Definition: reader.h:35
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:474
void setOffsetStart(size_t start)
Value & operator[](std::string key)
A simple way to update a specific setting.
static const Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:175
Json::LargestUInt LargestUInt
Definition: value.h:160
Features()
Initialize the configuration like JsonConfig::allFeatures;.
Definition: json_reader.cpp:44
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:44
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured erros encounted while parsing.
bool asBool() const
Definition: json_value.cpp:802
bool isObject() const
void setComment(const char *comment, CommentPlacement placement)
static const LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:166
bool allowComments_
true if comments are allowed. Default: true.
Definition: features.h:42
CommentPlacement
Definition: value.h:74
const Char * Location
Definition: reader.h:36
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: features.h:52
size_t getOffsetLimit() const
bool good() const
Return whether there are any errors.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:83
JSON (JavaScript Object Notation).
Definition: config.h:87
Members getMemberNames() const
Return a list of the member names.
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: features.h:49
bool validate(Json::Value *invalid) const
Json::LargestInt LargestInt
Definition: value.h:159
static int const stackLimit_g
Definition: json_reader.cpp:30
void throwRuntimeError(std::string const &msg)
used internally
Definition: json_value.cpp:187
Json::UInt UInt
Definition: value.h:153
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
Interface for reading JSON from a char array.
Definition: reader.h:247
ArrayIndex size() const
Number of values in array or object.
Definition: json_value.cpp:854
Represents a JSON value.
Definition: value.h:147
void setOffsetLimit(size_t limit)
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:48
static std::string normalizeEOL(Reader::Location begin, Reader::Location end)
a comment on the line after a value (only make sense for
Definition: value.h:77
bool pushError(const Value &value, const std::string &message)
Add a semantic error message.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:50
bool strictRoot_
true if root must be either an array or an object value.
Definition: features.h:46
bool isArray() const
Build a CharReader implementation.
Definition: reader.h:293
size_t getOffsetStart() const
static int stackDepth_g
Definition: json_reader.cpp:31
#define snprintf
Definition: json_reader.cpp:22
static void getValidReaderKeys(std::set< std::string > *valid_keys)
static bool containsNewLine(Reader::Location begin, Reader::Location end)
Definition: json_reader.cpp:62
Configuration passed to reader and writer.
Definition: features.h:19
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
a comment placed on the line before a value
Definition: value.h:75
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:72
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
a comment just after a value on the same line
Definition: value.h:76