00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <ctype.h>
00026 #include <stddef.h>
00027
00028 #include "tinyxml/tinyxml.h"
00029
00030
00031 #if defined( DEBUG_PARSER )
00032 # if defined( DEBUG ) && defined( _MSC_VER )
00033 # include <windows.h>
00034 # define TIXML_LOG OutputDebugString
00035 # else
00036 # define TIXML_LOG printf
00037 # endif
00038 #endif
00039
00040
00041
00042
00043 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
00044 {
00045 { "&", 5, '&' },
00046 { "<", 4, '<' },
00047 { ">", 4, '>' },
00048 { """, 6, '\"' },
00049 { "'", 6, '\'' }
00050 };
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065
00066 const int TiXmlBase::utf8ByteTable[256] =
00067 {
00068
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00080 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00081 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00082 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00083 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00084 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00085 };
00086
00087
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090 const unsigned long BYTE_MASK = 0xBF;
00091 const unsigned long BYTE_MARK = 0x80;
00092 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093
00094 if (input < 0x80)
00095 *length = 1;
00096 else if ( input < 0x800 )
00097 *length = 2;
00098 else if ( input < 0x10000 )
00099 *length = 3;
00100 else if ( input < 0x200000 )
00101 *length = 4;
00102 else
00103 { *length = 0; return; }
00104
00105 output += *length;
00106
00107
00108 switch (*length)
00109 {
00110 case 4:
00111 --output;
00112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00113 input >>= 6;
00114 case 3:
00115 --output;
00116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00117 input >>= 6;
00118 case 2:
00119 --output;
00120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00121 input >>= 6;
00122 case 1:
00123 --output;
00124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125 }
00126 }
00127
00128
00129 int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
00130 {
00131
00132
00133
00134
00135
00136
00137
00138 if ( anyByte < 127 )
00139 return isalpha( anyByte );
00140 else
00141 return 1;
00142
00143
00144
00145
00146
00147 }
00148
00149
00150 int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
00151 {
00152
00153
00154
00155
00156
00157
00158
00159 if ( anyByte < 127 )
00160 return isalnum( anyByte );
00161 else
00162 return 1;
00163
00164
00165
00166
00167
00168 }
00169
00170
00171 class TiXmlParsingData
00172 {
00173 friend class TiXmlDocument;
00174 public:
00175 void Stamp( const char* now, TiXmlEncoding encoding );
00176
00177 const TiXmlCursor& Cursor() const { return cursor; }
00178
00179 private:
00180
00181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182 {
00183 assert( start );
00184 stamp = start;
00185 tabsize = _tabsize;
00186 cursor.row = row;
00187 cursor.col = col;
00188 }
00189
00190 TiXmlCursor cursor;
00191 const char* stamp;
00192 int tabsize;
00193 };
00194
00195
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198 assert( now );
00199
00200
00201 if ( tabsize < 1 )
00202 {
00203 return;
00204 }
00205
00206
00207 int row = cursor.row;
00208 int col = cursor.col;
00209 const char* p = stamp;
00210 assert( p );
00211
00212 while ( p < now )
00213 {
00214
00215 const unsigned char* pU = (const unsigned char*)p;
00216
00217
00218 switch (*pU) {
00219 case 0:
00220
00221
00222 return;
00223
00224 case '\r':
00225
00226 ++row;
00227 col = 0;
00228
00229 ++p;
00230
00231
00232 if (*p == '\n') {
00233 ++p;
00234 }
00235 break;
00236
00237 case '\n':
00238
00239 ++row;
00240 col = 0;
00241
00242
00243 ++p;
00244
00245
00246
00247
00248 if (*p == '\r') {
00249 ++p;
00250 }
00251 break;
00252
00253 case '\t':
00254
00255 ++p;
00256
00257
00258 col = (col / tabsize + 1) * tabsize;
00259 break;
00260
00261 case TIXML_UTF_LEAD_0:
00262 if ( encoding == TIXML_ENCODING_UTF8 )
00263 {
00264 if ( *(p+1) && *(p+2) )
00265 {
00266
00267
00268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269 p += 3;
00270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271 p += 3;
00272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273 p += 3;
00274 else
00275 { p +=3; ++col; }
00276 }
00277 }
00278 else
00279 {
00280 ++p;
00281 ++col;
00282 }
00283 break;
00284
00285 default:
00286 if ( encoding == TIXML_ENCODING_UTF8 )
00287 {
00288
00289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290 if ( step == 0 )
00291 step = 1;
00292 p += step;
00293
00294
00295 ++col;
00296 }
00297 else
00298 {
00299 ++p;
00300 ++col;
00301 }
00302 break;
00303 }
00304 }
00305 cursor.row = row;
00306 cursor.col = col;
00307 assert( cursor.row >= -1 );
00308 assert( cursor.col >= -1 );
00309 stamp = p;
00310 assert( stamp );
00311 }
00312
00313
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316 if ( !p || !*p )
00317 {
00318 return 0;
00319 }
00320 if ( encoding == TIXML_ENCODING_UTF8 )
00321 {
00322 while ( *p )
00323 {
00324 const unsigned char* pU = (const unsigned char*)p;
00325
00326
00327 if ( *(pU+0)==TIXML_UTF_LEAD_0
00328 && *(pU+1)==TIXML_UTF_LEAD_1
00329 && *(pU+2)==TIXML_UTF_LEAD_2 )
00330 {
00331 p += 3;
00332 continue;
00333 }
00334 else if(*(pU+0)==TIXML_UTF_LEAD_0
00335 && *(pU+1)==0xbfU
00336 && *(pU+2)==0xbeU )
00337 {
00338 p += 3;
00339 continue;
00340 }
00341 else if(*(pU+0)==TIXML_UTF_LEAD_0
00342 && *(pU+1)==0xbfU
00343 && *(pU+2)==0xbfU )
00344 {
00345 p += 3;
00346 continue;
00347 }
00348
00349 if ( IsWhiteSpace( *p ) )
00350 ++p;
00351 else
00352 break;
00353 }
00354 }
00355 else
00356 {
00357 while ( *p && IsWhiteSpace( *p ) )
00358 ++p;
00359 }
00360
00361 return p;
00362 }
00363
00364 #ifdef TIXML_USE_STL
00365 bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367 for( ;; )
00368 {
00369 if ( !in->good() ) return false;
00370
00371 int c = in->peek();
00372
00373 if ( !IsWhiteSpace( c ) || c <= 0 )
00374 return true;
00375
00376 *tag += (char) in->get();
00377 }
00378 }
00379
00380 bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382
00383 while ( in->good() )
00384 {
00385 int c = in->peek();
00386 if ( c == character )
00387 return true;
00388 if ( c <= 0 )
00389 return false;
00390
00391 in->get();
00392 *tag += (char) c;
00393 }
00394 return false;
00395 }
00396 #endif
00397
00398
00399
00400
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403
00404
00405
00406 *name = "";
00407 assert( p );
00408
00409
00410
00411
00412
00413
00414
00415
00416 if ( p && *p
00417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418 {
00419 const char* start = p;
00420 while( p && *p
00421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
00422 || *p == '_'
00423 || *p == '-'
00424 || *p == '.'
00425 || *p == ':' ) )
00426 {
00427
00428 ++p;
00429 }
00430 if ( p-start > 0 ) {
00431 name->assign( start, p-start );
00432 }
00433 return p;
00434 }
00435 return 0;
00436 }
00437
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440
00441 TIXML_STRING ent;
00442 int i;
00443 *length = 0;
00444
00445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446 {
00447 unsigned long ucs = 0;
00448 ptrdiff_t delta = 0;
00449 unsigned mult = 1;
00450
00451 if ( *(p+2) == 'x' )
00452 {
00453
00454 if ( !*(p+3) ) return 0;
00455
00456 const char* q = p+3;
00457 q = strchr( q, ';' );
00458
00459 if ( !q || !*q ) return 0;
00460
00461 delta = q-p;
00462 --q;
00463
00464 while ( *q != 'x' )
00465 {
00466 if ( *q >= '0' && *q <= '9' )
00467 ucs += mult * (*q - '0');
00468 else if ( *q >= 'a' && *q <= 'f' )
00469 ucs += mult * (*q - 'a' + 10);
00470 else if ( *q >= 'A' && *q <= 'F' )
00471 ucs += mult * (*q - 'A' + 10 );
00472 else
00473 return 0;
00474 mult *= 16;
00475 --q;
00476 }
00477 }
00478 else
00479 {
00480
00481 if ( !*(p+2) ) return 0;
00482
00483 const char* q = p+2;
00484 q = strchr( q, ';' );
00485
00486 if ( !q || !*q ) return 0;
00487
00488 delta = q-p;
00489 --q;
00490
00491 while ( *q != '#' )
00492 {
00493 if ( *q >= '0' && *q <= '9' )
00494 ucs += mult * (*q - '0');
00495 else
00496 return 0;
00497 mult *= 10;
00498 --q;
00499 }
00500 }
00501 if ( encoding == TIXML_ENCODING_UTF8 )
00502 {
00503
00504 ConvertUTF32ToUTF8( ucs, value, length );
00505 }
00506 else
00507 {
00508 *value = (char)ucs;
00509 *length = 1;
00510 }
00511 return p + delta + 1;
00512 }
00513
00514
00515 for( i=0; i<NUM_ENTITY; ++i )
00516 {
00517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518 {
00519 assert( strlen( entity[i].str ) == entity[i].strLength );
00520 *value = entity[i].chr;
00521 *length = 1;
00522 return ( p + entity[i].strLength );
00523 }
00524 }
00525
00526
00527 *value = *p;
00528
00529
00530 return p+1;
00531 }
00532
00533
00534 bool TiXmlBase::StringEqual( const char* p,
00535 const char* tag,
00536 bool ignoreCase,
00537 TiXmlEncoding encoding )
00538 {
00539 assert( p );
00540 assert( tag );
00541 if ( !p || !*p )
00542 {
00543 assert( 0 );
00544 return false;
00545 }
00546
00547 const char* q = p;
00548
00549 if ( ignoreCase )
00550 {
00551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552 {
00553 ++q;
00554 ++tag;
00555 }
00556
00557 if ( *tag == 0 )
00558 return true;
00559 }
00560 else
00561 {
00562 while ( *q && *tag && *q == *tag )
00563 {
00564 ++q;
00565 ++tag;
00566 }
00567
00568 if ( *tag == 0 )
00569 return true;
00570 }
00571 return false;
00572 }
00573
00574 const char* TiXmlBase::ReadText( const char* p,
00575 TIXML_STRING * text,
00576 bool trimWhiteSpace,
00577 const char* endTag,
00578 bool caseInsensitive,
00579 TiXmlEncoding encoding )
00580 {
00581 *text = "";
00582 if ( !trimWhiteSpace
00583 || !condenseWhiteSpace )
00584 {
00585
00586 while ( p && *p
00587 && !StringEqual( p, endTag, caseInsensitive, encoding )
00588 )
00589 {
00590 int len;
00591 char cArr[4] = { 0, 0, 0, 0 };
00592 p = GetChar( p, cArr, &len, encoding );
00593 text->append( cArr, len );
00594 }
00595 }
00596 else
00597 {
00598 bool whitespace = false;
00599
00600
00601 p = SkipWhiteSpace( p, encoding );
00602 while ( p && *p
00603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604 {
00605 if ( *p == '\r' || *p == '\n' )
00606 {
00607 whitespace = true;
00608 ++p;
00609 }
00610 else if ( IsWhiteSpace( *p ) )
00611 {
00612 whitespace = true;
00613 ++p;
00614 }
00615 else
00616 {
00617
00618
00619 if ( whitespace )
00620 {
00621 (*text) += ' ';
00622 whitespace = false;
00623 }
00624 int len;
00625 char cArr[4] = { 0, 0, 0, 0 };
00626 p = GetChar( p, cArr, &len, encoding );
00627 if ( len == 1 )
00628 (*text) += cArr[0];
00629 else
00630 text->append( cArr, len );
00631 }
00632 }
00633 }
00634 if ( p && *p )
00635 p += strlen( endTag );
00636 return ( p && *p ) ? p : 0;
00637 }
00638
00639 #ifdef TIXML_USE_STL
00640
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643
00644
00645
00646
00647
00648
00649
00650 if ( !StreamTo( in, '<', tag ) )
00651 {
00652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653 return;
00654 }
00655
00656 while ( in->good() )
00657 {
00658 int tagIndex = (int) tag->length();
00659 while ( in->good() && in->peek() != '>' )
00660 {
00661 int c = in->get();
00662 if ( c <= 0 )
00663 {
00664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665 break;
00666 }
00667 (*tag) += (char) c;
00668 }
00669
00670 if ( in->good() )
00671 {
00672
00673
00674
00675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676
00677 if ( node )
00678 {
00679 node->StreamIn( in, tag );
00680 bool isElement = node->ToElement() != 0;
00681 delete node;
00682 node = 0;
00683
00684
00685
00686 if ( isElement )
00687 {
00688 return;
00689 }
00690 }
00691 else
00692 {
00693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694 return;
00695 }
00696 }
00697 }
00698
00699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701
00702 #endif
00703
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706 ClearError();
00707
00708
00709
00710
00711 if ( !p || !*p )
00712 {
00713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714 return 0;
00715 }
00716
00717
00718
00719
00720 location.Clear();
00721 if ( prevData )
00722 {
00723 location.row = prevData->cursor.row;
00724 location.col = prevData->cursor.col;
00725 }
00726 else
00727 {
00728 location.row = 0;
00729 location.col = 0;
00730 }
00731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732 location = data.Cursor();
00733
00734 if ( encoding == TIXML_ENCODING_UNKNOWN )
00735 {
00736
00737 const unsigned char* pU = (const unsigned char*)p;
00738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741 {
00742 encoding = TIXML_ENCODING_UTF8;
00743 useMicrosoftBOM = true;
00744 }
00745 }
00746
00747 p = SkipWhiteSpace( p, encoding );
00748 if ( !p )
00749 {
00750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751 return 0;
00752 }
00753
00754 while ( p && *p )
00755 {
00756 TiXmlNode* node = Identify( p, encoding );
00757 if ( node )
00758 {
00759 p = node->Parse( p, &data, encoding );
00760 LinkEndChild( node );
00761 }
00762 else
00763 {
00764 break;
00765 }
00766
00767
00768 if ( encoding == TIXML_ENCODING_UNKNOWN
00769 && node->ToDeclaration() )
00770 {
00771 TiXmlDeclaration* dec = node->ToDeclaration();
00772 const char* enc = dec->Encoding();
00773 assert( enc );
00774
00775 if ( *enc == 0 )
00776 encoding = TIXML_ENCODING_UTF8;
00777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778 encoding = TIXML_ENCODING_UTF8;
00779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780 encoding = TIXML_ENCODING_UTF8;
00781 else
00782 encoding = TIXML_ENCODING_LEGACY;
00783 }
00784
00785 p = SkipWhiteSpace( p, encoding );
00786 }
00787
00788
00789 if ( !firstChild ) {
00790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791 return 0;
00792 }
00793
00794
00795 return p;
00796 }
00797
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 {
00800
00801 if ( error )
00802 return;
00803
00804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805 error = true;
00806 errorId = err;
00807 errorDesc = errorString[ errorId ];
00808
00809 errorLocation.Clear();
00810 if ( pError && data )
00811 {
00812 data->Stamp( pError, encoding );
00813 errorLocation = data->Cursor();
00814 }
00815 }
00816
00817
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820 TiXmlNode* returnNode = 0;
00821
00822 p = SkipWhiteSpace( p, encoding );
00823 if( !p || !*p || *p != '<' )
00824 {
00825 return 0;
00826 }
00827
00828 p = SkipWhiteSpace( p, encoding );
00829
00830 if ( !p || !*p )
00831 {
00832 return 0;
00833 }
00834
00835
00836
00837
00838
00839
00840
00841
00842 const char* xmlHeader = { "<?xml" };
00843 const char* commentHeader = { "<!--" };
00844 const char* dtdHeader = { "<!" };
00845 const char* cdataHeader = { "<![CDATA[" };
00846
00847 if ( StringEqual( p, xmlHeader, true, encoding ) )
00848 {
00849 #ifdef DEBUG_PARSER
00850 TIXML_LOG( "XML parsing Declaration\n" );
00851 #endif
00852 returnNode = new TiXmlDeclaration();
00853 }
00854 else if ( StringEqual( p, commentHeader, false, encoding ) )
00855 {
00856 #ifdef DEBUG_PARSER
00857 TIXML_LOG( "XML parsing Comment\n" );
00858 #endif
00859 returnNode = new TiXmlComment();
00860 }
00861 else if ( StringEqual( p, cdataHeader, false, encoding ) )
00862 {
00863 #ifdef DEBUG_PARSER
00864 TIXML_LOG( "XML parsing CDATA\n" );
00865 #endif
00866 TiXmlText* text = new TiXmlText( "" );
00867 text->SetCDATA( true );
00868 returnNode = text;
00869 }
00870 else if ( StringEqual( p, dtdHeader, false, encoding ) )
00871 {
00872 #ifdef DEBUG_PARSER
00873 TIXML_LOG( "XML parsing Unknown(1)\n" );
00874 #endif
00875 returnNode = new TiXmlUnknown();
00876 }
00877 else if ( IsAlpha( *(p+1), encoding )
00878 || *(p+1) == '_' )
00879 {
00880 #ifdef DEBUG_PARSER
00881 TIXML_LOG( "XML parsing Element\n" );
00882 #endif
00883 returnNode = new TiXmlElement( "" );
00884 }
00885 else
00886 {
00887 #ifdef DEBUG_PARSER
00888 TIXML_LOG( "XML parsing Unknown(2)\n" );
00889 #endif
00890 returnNode = new TiXmlUnknown();
00891 }
00892
00893 if ( returnNode )
00894 {
00895
00896 returnNode->parent = this;
00897 }
00898 return returnNode;
00899 }
00900
00901 #ifdef TIXML_USE_STL
00902
00903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00904 {
00905
00906
00907 while( in->good() )
00908 {
00909 int c = in->get();
00910 if ( c <= 0 )
00911 {
00912 TiXmlDocument* document = GetDocument();
00913 if ( document )
00914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00915 return;
00916 }
00917 (*tag) += (char) c ;
00918
00919 if ( c == '>' )
00920 break;
00921 }
00922
00923 if ( tag->length() < 3 ) return;
00924
00925
00926
00927
00928 if ( tag->at( tag->length() - 1 ) == '>'
00929 && tag->at( tag->length() - 2 ) == '/' )
00930 {
00931
00932 return;
00933 }
00934 else if ( tag->at( tag->length() - 1 ) == '>' )
00935 {
00936
00937
00938
00939
00940
00941 for ( ;; )
00942 {
00943 StreamWhiteSpace( in, tag );
00944
00945
00946 if ( in->good() && in->peek() != '<' )
00947 {
00948
00949 TiXmlText text( "" );
00950 text.StreamIn( in, tag );
00951
00952
00953
00954 continue;
00955 }
00956
00957
00958
00959 if ( !in->good() ) return;
00960 assert( in->peek() == '<' );
00961 int tagIndex = (int) tag->length();
00962
00963 bool closingTag = false;
00964 bool firstCharFound = false;
00965
00966 for( ;; )
00967 {
00968 if ( !in->good() )
00969 return;
00970
00971 int c = in->peek();
00972 if ( c <= 0 )
00973 {
00974 TiXmlDocument* document = GetDocument();
00975 if ( document )
00976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00977 return;
00978 }
00979
00980 if ( c == '>' )
00981 break;
00982
00983 *tag += (char) c;
00984 in->get();
00985
00986
00987 if ( c == '[' && tag->size() >= 9 )
00988 {
00989 size_t len = tag->size();
00990 const char* start = tag->c_str() + len - 9;
00991 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00992 assert( !closingTag );
00993 break;
00994 }
00995 }
00996
00997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00998 {
00999 firstCharFound = true;
01000 if ( c == '/' )
01001 closingTag = true;
01002 }
01003 }
01004
01005
01006 if ( closingTag )
01007 {
01008 if ( !in->good() )
01009 return;
01010
01011 int c = in->get();
01012 if ( c <= 0 )
01013 {
01014 TiXmlDocument* document = GetDocument();
01015 if ( document )
01016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01017 return;
01018 }
01019 assert( c == '>' );
01020 *tag += (char) c;
01021
01022
01023 return;
01024 }
01025 else
01026 {
01027
01028 const char* tagloc = tag->c_str() + tagIndex;
01029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01030 if ( !node )
01031 return;
01032 node->StreamIn( in, tag );
01033 delete node;
01034 node = 0;
01035
01036
01037 }
01038 }
01039 }
01040 }
01041 #endif
01042
01043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01044 {
01045 p = SkipWhiteSpace( p, encoding );
01046 TiXmlDocument* document = GetDocument();
01047
01048 if ( !p || !*p )
01049 {
01050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01051 return 0;
01052 }
01053
01054 if ( data )
01055 {
01056 data->Stamp( p, encoding );
01057 location = data->Cursor();
01058 }
01059
01060 if ( *p != '<' )
01061 {
01062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01063 return 0;
01064 }
01065
01066 p = SkipWhiteSpace( p+1, encoding );
01067
01068
01069 const char* pErr = p;
01070
01071 p = ReadName( p, &value, encoding );
01072 if ( !p || !*p )
01073 {
01074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01075 return 0;
01076 }
01077
01078 TIXML_STRING endTag ("</");
01079 endTag += value;
01080
01081
01082
01083 while ( p && *p )
01084 {
01085 pErr = p;
01086 p = SkipWhiteSpace( p, encoding );
01087 if ( !p || !*p )
01088 {
01089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01090 return 0;
01091 }
01092 if ( *p == '/' )
01093 {
01094 ++p;
01095
01096 if ( *p != '>' )
01097 {
01098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01099 return 0;
01100 }
01101 return (p+1);
01102 }
01103 else if ( *p == '>' )
01104 {
01105
01106
01107
01108 ++p;
01109 p = ReadValue( p, data, encoding );
01110 if ( !p || !*p ) {
01111
01112
01113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01114 return 0;
01115 }
01116
01117
01118
01119
01120
01121
01122 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01123 {
01124 p += endTag.length();
01125 p = SkipWhiteSpace( p, encoding );
01126 if ( p && *p && *p == '>' ) {
01127 ++p;
01128 return p;
01129 }
01130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01131 return 0;
01132 }
01133 else
01134 {
01135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01136 return 0;
01137 }
01138 }
01139 else
01140 {
01141
01142 TiXmlAttribute* attrib = new TiXmlAttribute();
01143 if ( !attrib )
01144 {
01145 return 0;
01146 }
01147
01148 attrib->SetDocument( document );
01149 pErr = p;
01150 p = attrib->Parse( p, data, encoding );
01151
01152 if ( !p || !*p )
01153 {
01154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01155 delete attrib;
01156 return 0;
01157 }
01158
01159
01160 #ifdef TIXML_USE_STL
01161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01162 #else
01163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01164 #endif
01165 if ( node )
01166 {
01167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01168 delete attrib;
01169 return 0;
01170 }
01171
01172 attributeSet.Add( attrib );
01173 }
01174 }
01175 return p;
01176 }
01177
01178
01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01180 {
01181 TiXmlDocument* document = GetDocument();
01182
01183
01184 const char* pWithWhiteSpace = p;
01185 p = SkipWhiteSpace( p, encoding );
01186
01187 while ( p && *p )
01188 {
01189 if ( *p != '<' )
01190 {
01191
01192 TiXmlText* textNode = new TiXmlText( "" );
01193
01194 if ( !textNode )
01195 {
01196 return 0;
01197 }
01198
01199 if ( TiXmlBase::IsWhiteSpaceCondensed() )
01200 {
01201 p = textNode->Parse( p, data, encoding );
01202 }
01203 else
01204 {
01205
01206
01207 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01208 }
01209
01210 if ( !textNode->Blank() )
01211 LinkEndChild( textNode );
01212 else
01213 delete textNode;
01214 }
01215 else
01216 {
01217
01218
01219
01220 if ( StringEqual( p, "</", false, encoding ) )
01221 {
01222 return p;
01223 }
01224 else
01225 {
01226 TiXmlNode* node = Identify( p, encoding );
01227 if ( node )
01228 {
01229 p = node->Parse( p, data, encoding );
01230 LinkEndChild( node );
01231 }
01232 else
01233 {
01234 return 0;
01235 }
01236 }
01237 }
01238 pWithWhiteSpace = p;
01239 p = SkipWhiteSpace( p, encoding );
01240 }
01241
01242 if ( !p )
01243 {
01244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01245 }
01246 return p;
01247 }
01248
01249
01250 #ifdef TIXML_USE_STL
01251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01252 {
01253 while ( in->good() )
01254 {
01255 int c = in->get();
01256 if ( c <= 0 )
01257 {
01258 TiXmlDocument* document = GetDocument();
01259 if ( document )
01260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01261 return;
01262 }
01263 (*tag) += (char) c;
01264
01265 if ( c == '>' )
01266 {
01267
01268 return;
01269 }
01270 }
01271 }
01272 #endif
01273
01274
01275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01276 {
01277 TiXmlDocument* document = GetDocument();
01278 p = SkipWhiteSpace( p, encoding );
01279
01280 if ( data )
01281 {
01282 data->Stamp( p, encoding );
01283 location = data->Cursor();
01284 }
01285 if ( !p || !*p || *p != '<' )
01286 {
01287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01288 return 0;
01289 }
01290 ++p;
01291 value = "";
01292
01293 while ( p && *p && *p != '>' )
01294 {
01295 value += *p;
01296 ++p;
01297 }
01298
01299 if ( !p )
01300 {
01301 if ( document )
01302 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01303 }
01304 if ( p && *p == '>' )
01305 return p+1;
01306 return p;
01307 }
01308
01309 #ifdef TIXML_USE_STL
01310 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01311 {
01312 while ( in->good() )
01313 {
01314 int c = in->get();
01315 if ( c <= 0 )
01316 {
01317 TiXmlDocument* document = GetDocument();
01318 if ( document )
01319 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01320 return;
01321 }
01322
01323 (*tag) += (char) c;
01324
01325 if ( c == '>'
01326 && tag->at( tag->length() - 2 ) == '-'
01327 && tag->at( tag->length() - 3 ) == '-' )
01328 {
01329
01330 return;
01331 }
01332 }
01333 }
01334 #endif
01335
01336
01337 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01338 {
01339 TiXmlDocument* document = GetDocument();
01340 value = "";
01341
01342 p = SkipWhiteSpace( p, encoding );
01343
01344 if ( data )
01345 {
01346 data->Stamp( p, encoding );
01347 location = data->Cursor();
01348 }
01349 const char* startTag = "<!--";
01350 const char* endTag = "-->";
01351
01352 if ( !StringEqual( p, startTag, false, encoding ) )
01353 {
01354 if ( document )
01355 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01356 return 0;
01357 }
01358 p += strlen( startTag );
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378 value = "";
01379
01380 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01381 {
01382 value.append( p, 1 );
01383 ++p;
01384 }
01385 if ( p && *p )
01386 p += strlen( endTag );
01387
01388 return p;
01389 }
01390
01391
01392 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01393 {
01394 p = SkipWhiteSpace( p, encoding );
01395 if ( !p || !*p ) return 0;
01396
01397 if ( data )
01398 {
01399 data->Stamp( p, encoding );
01400 location = data->Cursor();
01401 }
01402
01403 const char* pErr = p;
01404 p = ReadName( p, &name, encoding );
01405 if ( !p || !*p )
01406 {
01407 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01408 return 0;
01409 }
01410 p = SkipWhiteSpace( p, encoding );
01411 if ( !p || !*p || *p != '=' )
01412 {
01413 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01414 return 0;
01415 }
01416
01417 ++p;
01418 p = SkipWhiteSpace( p, encoding );
01419 if ( !p || !*p )
01420 {
01421 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01422 return 0;
01423 }
01424
01425 const char* end;
01426 const char SINGLE_QUOTE = '\'';
01427 const char DOUBLE_QUOTE = '\"';
01428
01429 if ( *p == SINGLE_QUOTE )
01430 {
01431 ++p;
01432 end = "\'";
01433 p = ReadText( p, &value, false, end, false, encoding );
01434 }
01435 else if ( *p == DOUBLE_QUOTE )
01436 {
01437 ++p;
01438 end = "\"";
01439 p = ReadText( p, &value, false, end, false, encoding );
01440 }
01441 else
01442 {
01443
01444
01445
01446 value = "";
01447 while ( p && *p
01448 && !IsWhiteSpace( *p )
01449 && *p != '/' && *p != '>' )
01450 {
01451 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01452
01453
01454
01455 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01456 return 0;
01457 }
01458 value += *p;
01459 ++p;
01460 }
01461 }
01462 return p;
01463 }
01464
01465 #ifdef TIXML_USE_STL
01466 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01467 {
01468 while ( in->good() )
01469 {
01470 int c = in->peek();
01471 if ( !cdata && (c == '<' ) )
01472 {
01473 return;
01474 }
01475 if ( c <= 0 )
01476 {
01477 TiXmlDocument* document = GetDocument();
01478 if ( document )
01479 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01480 return;
01481 }
01482
01483 (*tag) += (char) c;
01484 in->get();
01485
01486 if ( cdata && c == '>' && tag->size() >= 3 ) {
01487 size_t len = tag->size();
01488 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01489
01490 return;
01491 }
01492 }
01493 }
01494 }
01495 #endif
01496
01497 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01498 {
01499 value = "";
01500 TiXmlDocument* document = GetDocument();
01501
01502 if ( data )
01503 {
01504 data->Stamp( p, encoding );
01505 location = data->Cursor();
01506 }
01507
01508 const char* const startTag = "<![CDATA[";
01509 const char* const endTag = "]]>";
01510
01511 if ( cdata || StringEqual( p, startTag, false, encoding ) )
01512 {
01513 cdata = true;
01514
01515 if ( !StringEqual( p, startTag, false, encoding ) )
01516 {
01517 if ( document )
01518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01519 return 0;
01520 }
01521 p += strlen( startTag );
01522
01523
01524 while ( p && *p
01525 && !StringEqual( p, endTag, false, encoding )
01526 )
01527 {
01528 value += *p;
01529 ++p;
01530 }
01531
01532 TIXML_STRING dummy;
01533 p = ReadText( p, &dummy, false, endTag, false, encoding );
01534 return p;
01535 }
01536 else
01537 {
01538 bool ignoreWhite = true;
01539
01540 const char* end = "<";
01541 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01542 if ( p && *p )
01543 return p-1;
01544 return 0;
01545 }
01546 }
01547
01548 #ifdef TIXML_USE_STL
01549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01550 {
01551 while ( in->good() )
01552 {
01553 int c = in->get();
01554 if ( c <= 0 )
01555 {
01556 TiXmlDocument* document = GetDocument();
01557 if ( document )
01558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01559 return;
01560 }
01561 (*tag) += (char) c;
01562
01563 if ( c == '>' )
01564 {
01565
01566 return;
01567 }
01568 }
01569 }
01570 #endif
01571
01572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01573 {
01574 p = SkipWhiteSpace( p, _encoding );
01575
01576
01577 TiXmlDocument* document = GetDocument();
01578 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01579 {
01580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01581 return 0;
01582 }
01583 if ( data )
01584 {
01585 data->Stamp( p, _encoding );
01586 location = data->Cursor();
01587 }
01588 p += 5;
01589
01590 version = "";
01591 encoding = "";
01592 standalone = "";
01593
01594 while ( p && *p )
01595 {
01596 if ( *p == '>' )
01597 {
01598 ++p;
01599 return p;
01600 }
01601
01602 p = SkipWhiteSpace( p, _encoding );
01603 if ( StringEqual( p, "version", true, _encoding ) )
01604 {
01605 TiXmlAttribute attrib;
01606 p = attrib.Parse( p, data, _encoding );
01607 version = attrib.Value();
01608 }
01609 else if ( StringEqual( p, "encoding", true, _encoding ) )
01610 {
01611 TiXmlAttribute attrib;
01612 p = attrib.Parse( p, data, _encoding );
01613 encoding = attrib.Value();
01614 }
01615 else if ( StringEqual( p, "standalone", true, _encoding ) )
01616 {
01617 TiXmlAttribute attrib;
01618 p = attrib.Parse( p, data, _encoding );
01619 standalone = attrib.Value();
01620 }
01621 else
01622 {
01623
01624 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01625 ++p;
01626 }
01627 }
01628 return 0;
01629 }
01630
01631 bool TiXmlText::Blank() const
01632 {
01633 for ( unsigned i=0; i<value.length(); i++ )
01634 if ( !IsWhiteSpace( value[i] ) )
01635 return false;
01636 return true;
01637 }
01638