00001 #include <bwi_tools/json/reader.h>
00002 #include <bwi_tools/json/value.h>
00003 #include <utility>
00004 #include <cstdio>
00005 #include <cassert>
00006 #include <cstring>
00007 #include <iostream>
00008 #include <stdexcept>
00009
00010 #if _MSC_VER >= 1400 // VC++ 8.0
00011 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
00012 #endif
00013
00014 namespace Json {
00015
00016
00017
00018
00019 Features::Features()
00020 : allowComments_( true )
00021 , strictRoot_( false )
00022 {
00023 }
00024
00025
00026 Features
00027 Features::all()
00028 {
00029 return Features();
00030 }
00031
00032
00033 Features
00034 Features::strictMode()
00035 {
00036 Features features;
00037 features.allowComments_ = false;
00038 features.strictRoot_ = true;
00039 return features;
00040 }
00041
00042
00043
00044
00045
00046 static inline bool
00047 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
00048 {
00049 return c == c1 || c == c2 || c == c3 || c == c4;
00050 }
00051
00052 static inline bool
00053 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
00054 {
00055 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
00056 }
00057
00058
00059 static bool
00060 containsNewLine( Reader::Location begin,
00061 Reader::Location end )
00062 {
00063 for ( ;begin < end; ++begin )
00064 if ( *begin == '\n' || *begin == '\r' )
00065 return true;
00066 return false;
00067 }
00068
00069 static std::string codePointToUTF8(unsigned int cp)
00070 {
00071 std::string result;
00072
00073
00074
00075 if (cp <= 0x7f)
00076 {
00077 result.resize(1);
00078 result[0] = static_cast<char>(cp);
00079 }
00080 else if (cp <= 0x7FF)
00081 {
00082 result.resize(2);
00083 result[1] = static_cast<char>(0x80 | (0x3f & cp));
00084 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
00085 }
00086 else if (cp <= 0xFFFF)
00087 {
00088 result.resize(3);
00089 result[2] = static_cast<char>(0x80 | (0x3f & cp));
00090 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
00091 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
00092 }
00093 else if (cp <= 0x10FFFF)
00094 {
00095 result.resize(4);
00096 result[3] = static_cast<char>(0x80 | (0x3f & cp));
00097 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
00098 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
00099 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
00100 }
00101
00102 return result;
00103 }
00104
00105
00106
00107
00108
00109 Reader::Reader()
00110 : features_( Features::all() )
00111 {
00112 }
00113
00114
00115 Reader::Reader( const Features &features )
00116 : features_( features )
00117 {
00118 }
00119
00120
00121 bool
00122 Reader::parse( const std::string &document,
00123 Value &root,
00124 bool collectComments )
00125 {
00126 document_ = document;
00127 const char *begin = document_.c_str();
00128 const char *end = begin + document_.length();
00129 return parse( begin, end, root, collectComments );
00130 }
00131
00132
00133 bool
00134 Reader::parse( std::istream& sin,
00135 Value &root,
00136 bool collectComments )
00137 {
00138
00139
00140
00141
00142
00143
00144
00145 std::string doc;
00146 std::getline(sin, doc, (char)EOF);
00147 return parse( doc, root, collectComments );
00148 }
00149
00150 bool
00151 Reader::parse( const char *beginDoc, const char *endDoc,
00152 Value &root,
00153 bool collectComments )
00154 {
00155 if ( !features_.allowComments_ )
00156 {
00157 collectComments = false;
00158 }
00159
00160 begin_ = beginDoc;
00161 end_ = endDoc;
00162 collectComments_ = collectComments;
00163 current_ = begin_;
00164 lastValueEnd_ = 0;
00165 lastValue_ = 0;
00166 commentsBefore_ = "";
00167 errors_.clear();
00168 while ( !nodes_.empty() )
00169 nodes_.pop();
00170 nodes_.push( &root );
00171
00172 bool successful = readValue();
00173 Token token;
00174 skipCommentTokens( token );
00175 if ( collectComments_ && !commentsBefore_.empty() )
00176 root.setComment( commentsBefore_, commentAfter );
00177 if ( features_.strictRoot_ )
00178 {
00179 if ( !root.isArray() && !root.isObject() )
00180 {
00181
00182 token.type_ = tokenError;
00183 token.start_ = beginDoc;
00184 token.end_ = endDoc;
00185 addError( "A valid JSON document must be either an array or an object value.",
00186 token );
00187 return false;
00188 }
00189 }
00190 return successful;
00191 }
00192
00193
00194 bool
00195 Reader::readValue()
00196 {
00197 Token token;
00198 skipCommentTokens( token );
00199 bool successful = true;
00200
00201 if ( collectComments_ && !commentsBefore_.empty() )
00202 {
00203 currentValue().setComment( commentsBefore_, commentBefore );
00204 commentsBefore_ = "";
00205 }
00206
00207
00208 switch ( token.type_ )
00209 {
00210 case tokenObjectBegin:
00211 successful = readObject( token );
00212 break;
00213 case tokenArrayBegin:
00214 successful = readArray( token );
00215 break;
00216 case tokenNumber:
00217 successful = decodeNumber( token );
00218 break;
00219 case tokenString:
00220 successful = decodeString( token );
00221 break;
00222 case tokenTrue:
00223 currentValue() = true;
00224 break;
00225 case tokenFalse:
00226 currentValue() = false;
00227 break;
00228 case tokenNull:
00229 currentValue() = Value();
00230 break;
00231 default:
00232 return addError( "Syntax error: value, object or array expected.", token );
00233 }
00234
00235 if ( collectComments_ )
00236 {
00237 lastValueEnd_ = current_;
00238 lastValue_ = ¤tValue();
00239 }
00240
00241 return successful;
00242 }
00243
00244
00245 void
00246 Reader::skipCommentTokens( Token &token )
00247 {
00248 if ( features_.allowComments_ )
00249 {
00250 do
00251 {
00252 readToken( token );
00253 }
00254 while ( token.type_ == tokenComment );
00255 }
00256 else
00257 {
00258 readToken( token );
00259 }
00260 }
00261
00262
00263 bool
00264 Reader::expectToken( TokenType type, Token &token, const char *message )
00265 {
00266 readToken( token );
00267 if ( token.type_ != type )
00268 return addError( message, token );
00269 return true;
00270 }
00271
00272
00273 bool
00274 Reader::readToken( Token &token )
00275 {
00276 skipSpaces();
00277 token.start_ = current_;
00278 Char c = getNextChar();
00279 bool ok = true;
00280 switch ( c )
00281 {
00282 case '{':
00283 token.type_ = tokenObjectBegin;
00284 break;
00285 case '}':
00286 token.type_ = tokenObjectEnd;
00287 break;
00288 case '[':
00289 token.type_ = tokenArrayBegin;
00290 break;
00291 case ']':
00292 token.type_ = tokenArrayEnd;
00293 break;
00294 case '"':
00295 token.type_ = tokenString;
00296 ok = readString();
00297 break;
00298 case '/':
00299 token.type_ = tokenComment;
00300 ok = readComment();
00301 break;
00302 case '0':
00303 case '1':
00304 case '2':
00305 case '3':
00306 case '4':
00307 case '5':
00308 case '6':
00309 case '7':
00310 case '8':
00311 case '9':
00312 case '-':
00313 token.type_ = tokenNumber;
00314 readNumber();
00315 break;
00316 case 't':
00317 token.type_ = tokenTrue;
00318 ok = match( "rue", 3 );
00319 break;
00320 case 'f':
00321 token.type_ = tokenFalse;
00322 ok = match( "alse", 4 );
00323 break;
00324 case 'n':
00325 token.type_ = tokenNull;
00326 ok = match( "ull", 3 );
00327 break;
00328 case ',':
00329 token.type_ = tokenArraySeparator;
00330 break;
00331 case ':':
00332 token.type_ = tokenMemberSeparator;
00333 break;
00334 case 0:
00335 token.type_ = tokenEndOfStream;
00336 break;
00337 default:
00338 ok = false;
00339 break;
00340 }
00341 if ( !ok )
00342 token.type_ = tokenError;
00343 token.end_ = current_;
00344 return true;
00345 }
00346
00347
00348 void
00349 Reader::skipSpaces()
00350 {
00351 while ( current_ != end_ )
00352 {
00353 Char c = *current_;
00354 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
00355 ++current_;
00356 else
00357 break;
00358 }
00359 }
00360
00361
00362 bool
00363 Reader::match( Location pattern,
00364 int patternLength )
00365 {
00366 if ( end_ - current_ < patternLength )
00367 return false;
00368 int index = patternLength;
00369 while ( index-- )
00370 if ( current_[index] != pattern[index] )
00371 return false;
00372 current_ += patternLength;
00373 return true;
00374 }
00375
00376
00377 bool
00378 Reader::readComment()
00379 {
00380 Location commentBegin = current_ - 1;
00381 Char c = getNextChar();
00382 bool successful = false;
00383 if ( c == '*' )
00384 successful = readCStyleComment();
00385 else if ( c == '/' )
00386 successful = readCppStyleComment();
00387 if ( !successful )
00388 return false;
00389
00390 if ( collectComments_ )
00391 {
00392 CommentPlacement placement = commentBefore;
00393 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
00394 {
00395 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
00396 placement = commentAfterOnSameLine;
00397 }
00398
00399 addComment( commentBegin, current_, placement );
00400 }
00401 return true;
00402 }
00403
00404
00405 void
00406 Reader::addComment( Location begin,
00407 Location end,
00408 CommentPlacement placement )
00409 {
00410 assert( collectComments_ );
00411 if ( placement == commentAfterOnSameLine )
00412 {
00413 assert( lastValue_ != 0 );
00414 lastValue_->setComment( std::string( begin, end ), placement );
00415 }
00416 else
00417 {
00418 if ( !commentsBefore_.empty() )
00419 commentsBefore_ += "\n";
00420 commentsBefore_ += std::string( begin, end );
00421 }
00422 }
00423
00424
00425 bool
00426 Reader::readCStyleComment()
00427 {
00428 while ( current_ != end_ )
00429 {
00430 Char c = getNextChar();
00431 if ( c == '*' && *current_ == '/' )
00432 break;
00433 }
00434 return getNextChar() == '/';
00435 }
00436
00437
00438 bool
00439 Reader::readCppStyleComment()
00440 {
00441 while ( current_ != end_ )
00442 {
00443 Char c = getNextChar();
00444 if ( c == '\r' || c == '\n' )
00445 break;
00446 }
00447 return true;
00448 }
00449
00450
00451 void
00452 Reader::readNumber()
00453 {
00454 while ( current_ != end_ )
00455 {
00456 if ( !(*current_ >= '0' && *current_ <= '9') &&
00457 !in( *current_, '.', 'e', 'E', '+', '-' ) )
00458 break;
00459 ++current_;
00460 }
00461 }
00462
00463 bool
00464 Reader::readString()
00465 {
00466 Char c = 0;
00467 while ( current_ != end_ )
00468 {
00469 c = getNextChar();
00470 if ( c == '\\' )
00471 getNextChar();
00472 else if ( c == '"' )
00473 break;
00474 }
00475 return c == '"';
00476 }
00477
00478
00479 bool
00480 Reader::readObject( Token &tokenStart )
00481 {
00482 Token tokenName;
00483 std::string name;
00484 currentValue() = Value( objectValue );
00485 while ( readToken( tokenName ) )
00486 {
00487 bool initialTokenOk = true;
00488 while ( tokenName.type_ == tokenComment && initialTokenOk )
00489 initialTokenOk = readToken( tokenName );
00490 if ( !initialTokenOk )
00491 break;
00492 if ( tokenName.type_ == tokenObjectEnd && name.empty() )
00493 return true;
00494 if ( tokenName.type_ != tokenString )
00495 break;
00496
00497 name = "";
00498 if ( !decodeString( tokenName, name ) )
00499 return recoverFromError( tokenObjectEnd );
00500
00501 Token colon;
00502 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
00503 {
00504 return addErrorAndRecover( "Missing ':' after object member name",
00505 colon,
00506 tokenObjectEnd );
00507 }
00508 Value &value = currentValue()[ name ];
00509 nodes_.push( &value );
00510 bool ok = readValue();
00511 nodes_.pop();
00512 if ( !ok )
00513 return recoverFromError( tokenObjectEnd );
00514
00515 Token comma;
00516 if ( !readToken( comma )
00517 || ( comma.type_ != tokenObjectEnd &&
00518 comma.type_ != tokenArraySeparator &&
00519 comma.type_ != tokenComment ) )
00520 {
00521 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
00522 comma,
00523 tokenObjectEnd );
00524 }
00525 bool finalizeTokenOk = true;
00526 while ( comma.type_ == tokenComment &&
00527 finalizeTokenOk )
00528 finalizeTokenOk = readToken( comma );
00529 if ( comma.type_ == tokenObjectEnd )
00530 return true;
00531 }
00532 return addErrorAndRecover( "Missing '}' or object member name",
00533 tokenName,
00534 tokenObjectEnd );
00535 }
00536
00537
00538 bool
00539 Reader::readArray( Token &tokenStart )
00540 {
00541 currentValue() = Value( arrayValue );
00542 skipSpaces();
00543 if ( *current_ == ']' )
00544 {
00545 Token endArray;
00546 readToken( endArray );
00547 return true;
00548 }
00549 int index = 0;
00550 while ( true )
00551 {
00552 Value &value = currentValue()[ index++ ];
00553 nodes_.push( &value );
00554 bool ok = readValue();
00555 nodes_.pop();
00556 if ( !ok )
00557 return recoverFromError( tokenArrayEnd );
00558
00559 Token token;
00560
00561 ok = readToken( token );
00562 while ( token.type_ == tokenComment && ok )
00563 {
00564 ok = readToken( token );
00565 }
00566 bool badTokenType = ( token.type_ == tokenArraySeparator &&
00567 token.type_ == tokenArrayEnd );
00568 if ( !ok || badTokenType )
00569 {
00570 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
00571 token,
00572 tokenArrayEnd );
00573 }
00574 if ( token.type_ == tokenArrayEnd )
00575 break;
00576 }
00577 return true;
00578 }
00579
00580
00581 bool
00582 Reader::decodeNumber( Token &token )
00583 {
00584 bool isDouble = false;
00585 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
00586 {
00587 isDouble = isDouble
00588 || in( *inspect, '.', 'e', 'E', '+' )
00589 || ( *inspect == '-' && inspect != token.start_ );
00590 }
00591 if ( isDouble )
00592 return decodeDouble( token );
00593 Location current = token.start_;
00594 bool isNegative = *current == '-';
00595 if ( isNegative )
00596 ++current;
00597 Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt)
00598 : Value::maxUInt) / 10;
00599 Value::UInt value = 0;
00600 while ( current < token.end_ )
00601 {
00602 Char c = *current++;
00603 if ( c < '0' || c > '9' )
00604 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
00605 if ( value >= threshold )
00606 return decodeDouble( token );
00607 value = value * 10 + Value::UInt(c - '0');
00608 }
00609 if ( isNegative )
00610 currentValue() = -Value::Int( value );
00611 else if ( value <= Value::UInt(Value::maxInt) )
00612 currentValue() = Value::Int( value );
00613 else
00614 currentValue() = value;
00615 return true;
00616 }
00617
00618
00619 bool
00620 Reader::decodeDouble( Token &token )
00621 {
00622 double value = 0;
00623 const int bufferSize = 32;
00624 int count;
00625 int length = int(token.end_ - token.start_);
00626 if ( length <= bufferSize )
00627 {
00628 Char buffer[bufferSize];
00629 memcpy( buffer, token.start_, length );
00630 buffer[length] = 0;
00631 count = sscanf( buffer, "%lf", &value );
00632 }
00633 else
00634 {
00635 std::string buffer( token.start_, token.end_ );
00636 count = sscanf( buffer.c_str(), "%lf", &value );
00637 }
00638
00639 if ( count != 1 )
00640 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
00641 currentValue() = value;
00642 return true;
00643 }
00644
00645
00646 bool
00647 Reader::decodeString( Token &token )
00648 {
00649 std::string decoded;
00650 if ( !decodeString( token, decoded ) )
00651 return false;
00652 currentValue() = decoded;
00653 return true;
00654 }
00655
00656
00657 bool
00658 Reader::decodeString( Token &token, std::string &decoded )
00659 {
00660 decoded.reserve( token.end_ - token.start_ - 2 );
00661 Location current = token.start_ + 1;
00662 Location end = token.end_ - 1;
00663 while ( current != end )
00664 {
00665 Char c = *current++;
00666 if ( c == '"' )
00667 break;
00668 else if ( c == '\\' )
00669 {
00670 if ( current == end )
00671 return addError( "Empty escape sequence in string", token, current );
00672 Char escape = *current++;
00673 switch ( escape )
00674 {
00675 case '"': decoded += '"'; break;
00676 case '/': decoded += '/'; break;
00677 case '\\': decoded += '\\'; break;
00678 case 'b': decoded += '\b'; break;
00679 case 'f': decoded += '\f'; break;
00680 case 'n': decoded += '\n'; break;
00681 case 'r': decoded += '\r'; break;
00682 case 't': decoded += '\t'; break;
00683 case 'u':
00684 {
00685 unsigned int unicode;
00686 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
00687 return false;
00688 decoded += codePointToUTF8(unicode);
00689 }
00690 break;
00691 default:
00692 return addError( "Bad escape sequence in string", token, current );
00693 }
00694 }
00695 else
00696 {
00697 decoded += c;
00698 }
00699 }
00700 return true;
00701 }
00702
00703 bool
00704 Reader::decodeUnicodeCodePoint( Token &token,
00705 Location ¤t,
00706 Location end,
00707 unsigned int &unicode )
00708 {
00709
00710 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
00711 return false;
00712 if (unicode >= 0xD800 && unicode <= 0xDBFF)
00713 {
00714
00715 if (end - current < 6)
00716 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
00717 unsigned int surrogatePair;
00718 if (*(current++) == '\\' && *(current++)== 'u')
00719 {
00720 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
00721 {
00722 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00723 }
00724 else
00725 return false;
00726 }
00727 else
00728 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
00729 }
00730 return true;
00731 }
00732
00733 bool
00734 Reader::decodeUnicodeEscapeSequence( Token &token,
00735 Location ¤t,
00736 Location end,
00737 unsigned int &unicode )
00738 {
00739 if ( end - current < 4 )
00740 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
00741 unicode = 0;
00742 for ( int index =0; index < 4; ++index )
00743 {
00744 Char c = *current++;
00745 unicode *= 16;
00746 if ( c >= '0' && c <= '9' )
00747 unicode += c - '0';
00748 else if ( c >= 'a' && c <= 'f' )
00749 unicode += c - 'a' + 10;
00750 else if ( c >= 'A' && c <= 'F' )
00751 unicode += c - 'A' + 10;
00752 else
00753 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
00754 }
00755 return true;
00756 }
00757
00758
00759 bool
00760 Reader::addError( const std::string &message,
00761 Token &token,
00762 Location extra )
00763 {
00764 ErrorInfo info;
00765 info.token_ = token;
00766 info.message_ = message;
00767 info.extra_ = extra;
00768 errors_.push_back( info );
00769 return false;
00770 }
00771
00772
00773 bool
00774 Reader::recoverFromError( TokenType skipUntilToken )
00775 {
00776 int errorCount = int(errors_.size());
00777 Token skip;
00778 while ( true )
00779 {
00780 if ( !readToken(skip) )
00781 errors_.resize( errorCount );
00782 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
00783 break;
00784 }
00785 errors_.resize( errorCount );
00786 return false;
00787 }
00788
00789
00790 bool
00791 Reader::addErrorAndRecover( const std::string &message,
00792 Token &token,
00793 TokenType skipUntilToken )
00794 {
00795 addError( message, token );
00796 return recoverFromError( skipUntilToken );
00797 }
00798
00799
00800 Value &
00801 Reader::currentValue()
00802 {
00803 return *(nodes_.top());
00804 }
00805
00806
00807 Reader::Char
00808 Reader::getNextChar()
00809 {
00810 if ( current_ == end_ )
00811 return 0;
00812 return *current_++;
00813 }
00814
00815
00816 void
00817 Reader::getLocationLineAndColumn( Location location,
00818 int &line,
00819 int &column ) const
00820 {
00821 Location current = begin_;
00822 Location lastLineStart = current;
00823 line = 0;
00824 while ( current < location && current != end_ )
00825 {
00826 Char c = *current++;
00827 if ( c == '\r' )
00828 {
00829 if ( *current == '\n' )
00830 ++current;
00831 lastLineStart = current;
00832 ++line;
00833 }
00834 else if ( c == '\n' )
00835 {
00836 lastLineStart = current;
00837 ++line;
00838 }
00839 }
00840
00841 column = int(location - lastLineStart) + 1;
00842 ++line;
00843 }
00844
00845
00846 std::string
00847 Reader::getLocationLineAndColumn( Location location ) const
00848 {
00849 int line, column;
00850 getLocationLineAndColumn( location, line, column );
00851 char buffer[18+16+16+1];
00852 sprintf( buffer, "Line %d, Column %d", line, column );
00853 return buffer;
00854 }
00855
00856
00857 std::string
00858 Reader::getFormatedErrorMessages() const
00859 {
00860 std::string formattedMessage;
00861 for ( Errors::const_iterator itError = errors_.begin();
00862 itError != errors_.end();
00863 ++itError )
00864 {
00865 const ErrorInfo &error = *itError;
00866 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
00867 formattedMessage += " " + error.message_ + "\n";
00868 if ( error.extra_ )
00869 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
00870 }
00871 return formattedMessage;
00872 }
00873
00874
00875 std::istream& operator>>( std::istream &sin, Value &root )
00876 {
00877 Json::Reader reader;
00878 bool ok = reader.parse(sin, root, true);
00879
00880 if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages());
00881 return sin;
00882 }
00883
00884
00885 }