00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <ctype.h>
00026 #include <stddef.h>
00027
00028 #include "tinyxml.h"
00029
00030
00031 #if defined( DEBUG_PARSER )
00032 # if defined( DEBUG ) && defined( _MSC_VER )
00033 # include <windows.h>
00034 # define TIXML_LOG OutputDebugString
00035 # else
00036 # define TIXML_LOG printf
00037 # endif
00038 #endif
00039
00040
00041
00042
00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00044 {
00045 { "&", 5, '&' },
00046 { "<", 4, '<' },
00047 { ">", 4, '>' },
00048 { """, 6, '\"' },
00049 { "'", 6, '\'' }
00050 };
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065
00066 const int TiXmlBase::utf8ByteTable[256] =
00067 {
00068
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00080 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00081 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00082 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00083 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00084 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00085 };
00086
00087
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090 const unsigned long BYTE_MASK = 0xBF;
00091 const unsigned long BYTE_MARK = 0x80;
00092 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093
00094 if (input < 0x80)
00095 *length = 1;
00096 else if ( input < 0x800 )
00097 *length = 2;
00098 else if ( input < 0x10000 )
00099 *length = 3;
00100 else if ( input < 0x200000 )
00101 *length = 4;
00102 else
00103 { *length = 0; return; }
00104
00105 output += *length;
00106
00107
00108 switch (*length)
00109 {
00110 case 4:
00111 --output;
00112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00113 input >>= 6;
00114 case 3:
00115 --output;
00116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00117 input >>= 6;
00118 case 2:
00119 --output;
00120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00121 input >>= 6;
00122 case 1:
00123 --output;
00124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125 }
00126 }
00127
00128
00129 int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
00130 {
00131
00132
00133
00134
00135
00136
00137
00138 if ( anyByte < 127 )
00139 return isalpha( anyByte );
00140 else
00141 return 1;
00142
00143
00144
00145
00146
00147 }
00148
00149
00150 int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
00151 {
00152
00153
00154
00155
00156
00157
00158
00159 if ( anyByte < 127 )
00160 return isalnum( anyByte );
00161 else
00162 return 1;
00163
00164
00165
00166
00167
00168 }
00169
00170
00171 class TiXmlParsingData
00172 {
00173 friend class TiXmlDocument;
00174 public:
00175 void Stamp( const char* now, TiXmlEncoding encoding );
00176
00177 const TiXmlCursor& Cursor() { return cursor; }
00178
00179 private:
00180
00181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182 {
00183 assert( start );
00184 stamp = start;
00185 tabsize = _tabsize;
00186 cursor.row = row;
00187 cursor.col = col;
00188 }
00189
00190 TiXmlCursor cursor;
00191 const char* stamp;
00192 int tabsize;
00193 };
00194
00195
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198 assert( now );
00199
00200
00201 if ( tabsize < 1 )
00202 {
00203 return;
00204 }
00205
00206
00207 int row = cursor.row;
00208 int col = cursor.col;
00209 const char* p = stamp;
00210 assert( p );
00211
00212 while ( p < now )
00213 {
00214
00215 const unsigned char* pU = (const unsigned char*)p;
00216
00217
00218 switch (*pU) {
00219 case 0:
00220
00221
00222 return;
00223
00224 case '\r':
00225
00226 ++row;
00227 col = 0;
00228
00229 ++p;
00230
00231
00232 if (*p == '\n') {
00233 ++p;
00234 }
00235 break;
00236
00237 case '\n':
00238
00239 ++row;
00240 col = 0;
00241
00242
00243 ++p;
00244
00245
00246
00247
00248 if (*p == '\r') {
00249 ++p;
00250 }
00251 break;
00252
00253 case '\t':
00254
00255 ++p;
00256
00257
00258 col = (col / tabsize + 1) * tabsize;
00259 break;
00260
00261 case TIXML_UTF_LEAD_0:
00262 if ( encoding == TIXML_ENCODING_UTF8 )
00263 {
00264 if ( *(p+1) && *(p+2) )
00265 {
00266
00267
00268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269 p += 3;
00270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271 p += 3;
00272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273 p += 3;
00274 else
00275 { p +=3; ++col; }
00276 }
00277 }
00278 else
00279 {
00280 ++p;
00281 ++col;
00282 }
00283 break;
00284
00285 default:
00286 if ( encoding == TIXML_ENCODING_UTF8 )
00287 {
00288
00289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290 if ( step == 0 )
00291 step = 1;
00292 p += step;
00293
00294
00295 ++col;
00296 }
00297 else
00298 {
00299 ++p;
00300 ++col;
00301 }
00302 break;
00303 }
00304 }
00305 cursor.row = row;
00306 cursor.col = col;
00307 assert( cursor.row >= -1 );
00308 assert( cursor.col >= -1 );
00309 stamp = p;
00310 assert( stamp );
00311 }
00312
00313
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316 if ( !p || !*p )
00317 {
00318 return 0;
00319 }
00320 if ( encoding == TIXML_ENCODING_UTF8 )
00321 {
00322 while ( *p )
00323 {
00324 const unsigned char* pU = (const unsigned char*)p;
00325
00326
00327 if ( *(pU+0)==TIXML_UTF_LEAD_0
00328 && *(pU+1)==TIXML_UTF_LEAD_1
00329 && *(pU+2)==TIXML_UTF_LEAD_2 )
00330 {
00331 p += 3;
00332 continue;
00333 }
00334 else if(*(pU+0)==TIXML_UTF_LEAD_0
00335 && *(pU+1)==0xbfU
00336 && *(pU+2)==0xbeU )
00337 {
00338 p += 3;
00339 continue;
00340 }
00341 else if(*(pU+0)==TIXML_UTF_LEAD_0
00342 && *(pU+1)==0xbfU
00343 && *(pU+2)==0xbfU )
00344 {
00345 p += 3;
00346 continue;
00347 }
00348
00349 if ( IsWhiteSpace( *p ) )
00350 ++p;
00351 else
00352 break;
00353 }
00354 }
00355 else
00356 {
00357 while ( *p && IsWhiteSpace( *p ) )
00358 ++p;
00359 }
00360
00361 return p;
00362 }
00363
00364 #ifdef TIXML_USE_STL
00365 bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367 for( ;; )
00368 {
00369 if ( !in->good() ) return false;
00370
00371 int c = in->peek();
00372
00373 if ( !IsWhiteSpace( c ) || c <= 0 )
00374 return true;
00375
00376 *tag += (char) in->get();
00377 }
00378 }
00379
00380 bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382
00383 while ( in->good() )
00384 {
00385 int c = in->peek();
00386 if ( c == character )
00387 return true;
00388 if ( c <= 0 )
00389 return false;
00390
00391 in->get();
00392 *tag += (char) c;
00393 }
00394 return false;
00395 }
00396 #endif
00397
00398
00399
00400
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403
00404
00405
00406 *name = "";
00407 assert( p );
00408
00409
00410
00411
00412
00413
00414
00415
00416 if ( p && *p
00417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418 {
00419 const char* start = p;
00420 while( p && *p
00421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
00422 || *p == '_'
00423 || *p == '-'
00424 || *p == '.'
00425 || *p == ':' ) )
00426 {
00427
00428 ++p;
00429 }
00430 if ( p-start > 0 ) {
00431 name->assign( start, p-start );
00432 }
00433 return p;
00434 }
00435 return 0;
00436 }
00437
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440
00441 TIXML_STRING ent;
00442 int i;
00443 *length = 0;
00444
00445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446 {
00447 unsigned long ucs = 0;
00448 ptrdiff_t delta = 0;
00449 unsigned mult = 1;
00450
00451 if ( *(p+2) == 'x' )
00452 {
00453
00454 if ( !*(p+3) ) return 0;
00455
00456 const char* q = p+3;
00457 q = strchr( q, ';' );
00458
00459 if ( !q || !*q ) return 0;
00460
00461 delta = q-p;
00462 --q;
00463
00464 while ( *q != 'x' )
00465 {
00466 if ( *q >= '0' && *q <= '9' )
00467 ucs += mult * (*q - '0');
00468 else if ( *q >= 'a' && *q <= 'f' )
00469 ucs += mult * (*q - 'a' + 10);
00470 else if ( *q >= 'A' && *q <= 'F' )
00471 ucs += mult * (*q - 'A' + 10 );
00472 else
00473 return 0;
00474 mult *= 16;
00475 --q;
00476 }
00477 }
00478 else
00479 {
00480
00481 if ( !*(p+2) ) return 0;
00482
00483 const char* q = p+2;
00484 q = strchr( q, ';' );
00485
00486 if ( !q || !*q ) return 0;
00487
00488 delta = q-p;
00489 --q;
00490
00491 while ( *q != '#' )
00492 {
00493 if ( *q >= '0' && *q <= '9' )
00494 ucs += mult * (*q - '0');
00495 else
00496 return 0;
00497 mult *= 10;
00498 --q;
00499 }
00500 }
00501 if ( encoding == TIXML_ENCODING_UTF8 )
00502 {
00503
00504 ConvertUTF32ToUTF8( ucs, value, length );
00505 }
00506 else
00507 {
00508 *value = (char)ucs;
00509 *length = 1;
00510 }
00511 return p + delta + 1;
00512 }
00513
00514
00515 for( i=0; i<NUM_ENTITY; ++i )
00516 {
00517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518 {
00519 assert( strlen( entity[i].str ) == entity[i].strLength );
00520 *value = entity[i].chr;
00521 *length = 1;
00522 return ( p + entity[i].strLength );
00523 }
00524 }
00525
00526
00527 *value = *p;
00528
00529
00530 return p+1;
00531 }
00532
00533
00534 bool TiXmlBase::StringEqual( const char* p,
00535 const char* tag,
00536 bool ignoreCase,
00537 TiXmlEncoding encoding )
00538 {
00539 assert( p );
00540 assert( tag );
00541 if ( !p || !*p )
00542 {
00543 assert( 0 );
00544 return false;
00545 }
00546
00547 const char* q = p;
00548
00549 if ( ignoreCase )
00550 {
00551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552 {
00553 ++q;
00554 ++tag;
00555 }
00556
00557 if ( *tag == 0 )
00558 return true;
00559 }
00560 else
00561 {
00562 while ( *q && *tag && *q == *tag )
00563 {
00564 ++q;
00565 ++tag;
00566 }
00567
00568 if ( *tag == 0 )
00569 return true;
00570 }
00571 return false;
00572 }
00573
00574 const char* TiXmlBase::ReadText( const char* p,
00575 TIXML_STRING * text,
00576 bool trimWhiteSpace,
00577 const char* endTag,
00578 bool caseInsensitive,
00579 TiXmlEncoding encoding )
00580 {
00581 *text = "";
00582 if ( !trimWhiteSpace
00583 || !condenseWhiteSpace )
00584 {
00585
00586 while ( p && *p
00587 && !StringEqual( p, endTag, caseInsensitive, encoding )
00588 )
00589 {
00590 int len;
00591 char cArr[4] = { 0, 0, 0, 0 };
00592 p = GetChar( p, cArr, &len, encoding );
00593 text->append( cArr, len );
00594 }
00595 }
00596 else
00597 {
00598 bool whitespace = false;
00599
00600
00601 p = SkipWhiteSpace( p, encoding );
00602 while ( p && *p
00603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604 {
00605 if ( *p == '\r' || *p == '\n' )
00606 {
00607 whitespace = true;
00608 ++p;
00609 }
00610 else if ( IsWhiteSpace( *p ) )
00611 {
00612 whitespace = true;
00613 ++p;
00614 }
00615 else
00616 {
00617
00618
00619 if ( whitespace )
00620 {
00621 (*text) += ' ';
00622 whitespace = false;
00623 }
00624 int len;
00625 char cArr[4] = { 0, 0, 0, 0 };
00626 p = GetChar( p, cArr, &len, encoding );
00627 if ( len == 1 )
00628 (*text) += cArr[0];
00629 else
00630 text->append( cArr, len );
00631 }
00632 }
00633 }
00634 if ( p && *p )
00635 p += strlen( endTag );
00636 return p;
00637 }
00638
00639 #ifdef TIXML_USE_STL
00640
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643
00644
00645
00646
00647
00648
00649
00650 if ( !StreamTo( in, '<', tag ) )
00651 {
00652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653 return;
00654 }
00655
00656 while ( in->good() )
00657 {
00658 int tagIndex = (int) tag->length();
00659 while ( in->good() && in->peek() != '>' )
00660 {
00661 int c = in->get();
00662 if ( c <= 0 )
00663 {
00664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665 break;
00666 }
00667 (*tag) += (char) c;
00668 }
00669
00670 if ( in->good() )
00671 {
00672
00673
00674
00675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676
00677 if ( node )
00678 {
00679 node->StreamIn( in, tag );
00680 bool isElement = node->ToElement() != 0;
00681 delete node;
00682 node = 0;
00683
00684
00685
00686 if ( isElement )
00687 {
00688 return;
00689 }
00690 }
00691 else
00692 {
00693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694 return;
00695 }
00696 }
00697 }
00698
00699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701
00702 #endif
00703
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706 ClearError();
00707
00708
00709
00710
00711 if ( !p || !*p )
00712 {
00713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714 return 0;
00715 }
00716
00717
00718
00719
00720 location.Clear();
00721 if ( prevData )
00722 {
00723 location.row = prevData->cursor.row;
00724 location.col = prevData->cursor.col;
00725 }
00726 else
00727 {
00728 location.row = 0;
00729 location.col = 0;
00730 }
00731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732 location = data.Cursor();
00733
00734 if ( encoding == TIXML_ENCODING_UNKNOWN )
00735 {
00736
00737 const unsigned char* pU = (const unsigned char*)p;
00738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741 {
00742 encoding = TIXML_ENCODING_UTF8;
00743 useMicrosoftBOM = true;
00744 }
00745 }
00746
00747 p = SkipWhiteSpace( p, encoding );
00748 if ( !p )
00749 {
00750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751 return 0;
00752 }
00753
00754 while ( p && *p )
00755 {
00756 TiXmlNode* node = Identify( p, encoding );
00757 if ( node )
00758 {
00759 p = node->Parse( p, &data, encoding );
00760 LinkEndChild( node );
00761 }
00762 else
00763 {
00764 break;
00765 }
00766
00767
00768 if ( encoding == TIXML_ENCODING_UNKNOWN
00769 && node->ToDeclaration() )
00770 {
00771 TiXmlDeclaration* dec = node->ToDeclaration();
00772 const char* enc = dec->Encoding();
00773 assert( enc );
00774
00775 if ( *enc == 0 )
00776 encoding = TIXML_ENCODING_UTF8;
00777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778 encoding = TIXML_ENCODING_UTF8;
00779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780 encoding = TIXML_ENCODING_UTF8;
00781 else
00782 encoding = TIXML_ENCODING_LEGACY;
00783 }
00784
00785 p = SkipWhiteSpace( p, encoding );
00786 }
00787
00788
00789 if ( !firstChild ) {
00790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791 return 0;
00792 }
00793
00794
00795 return p;
00796 }
00797
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 {
00800
00801 if ( error )
00802 return;
00803
00804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805 error = true;
00806 errorId = err;
00807 errorDesc = errorString[ errorId ];
00808
00809 errorLocation.Clear();
00810 if ( pError && data )
00811 {
00812 data->Stamp( pError, encoding );
00813 errorLocation = data->Cursor();
00814 }
00815 }
00816
00817
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820 TiXmlNode* returnNode = 0;
00821
00822 p = SkipWhiteSpace( p, encoding );
00823 if( !p || !*p || *p != '<' )
00824 {
00825 return 0;
00826 }
00827
00828 p = SkipWhiteSpace( p, encoding );
00829
00830 if ( !p || !*p )
00831 {
00832 return 0;
00833 }
00834
00835
00836
00837
00838
00839
00840
00841
00842 const char* xmlHeader = { "<?xml" };
00843 const char* commentHeader = { "<!--" };
00844 const char* dtdHeader = { "<!" };
00845 const char* cdataHeader = { "<![CDATA[" };
00846
00847 if ( StringEqual( p, xmlHeader, true, encoding ) )
00848 {
00849 #ifdef DEBUG_PARSER
00850 TIXML_LOG( "XML parsing Declaration\n" );
00851 #endif
00852 returnNode = new TiXmlDeclaration();
00853 }
00854 else if ( StringEqual( p, commentHeader, false, encoding ) )
00855 {
00856 #ifdef DEBUG_PARSER
00857 TIXML_LOG( "XML parsing Comment\n" );
00858 #endif
00859 returnNode = new TiXmlComment();
00860 }
00861 else if ( StringEqual( p, cdataHeader, false, encoding ) )
00862 {
00863 #ifdef DEBUG_PARSER
00864 TIXML_LOG( "XML parsing CDATA\n" );
00865 #endif
00866 TiXmlText* text = new TiXmlText( "" );
00867 text->SetCDATA( true );
00868 returnNode = text;
00869 }
00870 else if ( StringEqual( p, dtdHeader, false, encoding ) )
00871 {
00872 #ifdef DEBUG_PARSER
00873 TIXML_LOG( "XML parsing Unknown(1)\n" );
00874 #endif
00875 returnNode = new TiXmlUnknown();
00876 }
00877 else if ( IsAlpha( *(p+1), encoding )
00878 || *(p+1) == '_' )
00879 {
00880 #ifdef DEBUG_PARSER
00881 TIXML_LOG( "XML parsing Element\n" );
00882 #endif
00883 returnNode = new TiXmlElement( "" );
00884 }
00885 else
00886 {
00887 #ifdef DEBUG_PARSER
00888 TIXML_LOG( "XML parsing Unknown(2)\n" );
00889 #endif
00890 returnNode = new TiXmlUnknown();
00891 }
00892
00893 if ( returnNode )
00894 {
00895
00896 returnNode->parent = this;
00897 }
00898 return returnNode;
00899 }
00900
00901 #ifdef TIXML_USE_STL
00902
00903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00904 {
00905
00906
00907 while( in->good() )
00908 {
00909 int c = in->get();
00910 if ( c <= 0 )
00911 {
00912 TiXmlDocument* document = GetDocument();
00913 if ( document )
00914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00915 return;
00916 }
00917 (*tag) += (char) c ;
00918
00919 if ( c == '>' )
00920 break;
00921 }
00922
00923 if ( tag->length() < 3 ) return;
00924
00925
00926
00927
00928 if ( tag->at( tag->length() - 1 ) == '>'
00929 && tag->at( tag->length() - 2 ) == '/' )
00930 {
00931
00932 return;
00933 }
00934 else if ( tag->at( tag->length() - 1 ) == '>' )
00935 {
00936
00937
00938
00939
00940
00941 for ( ;; )
00942 {
00943 StreamWhiteSpace( in, tag );
00944
00945
00946 if ( in->good() && in->peek() != '<' )
00947 {
00948
00949 TiXmlText text( "" );
00950 text.StreamIn( in, tag );
00951
00952
00953
00954 continue;
00955 }
00956
00957
00958
00959 if ( !in->good() ) return;
00960 assert( in->peek() == '<' );
00961 int tagIndex = (int) tag->length();
00962
00963 bool closingTag = false;
00964 bool firstCharFound = false;
00965
00966 for( ;; )
00967 {
00968 if ( !in->good() )
00969 return;
00970
00971 int c = in->peek();
00972 if ( c <= 0 )
00973 {
00974 TiXmlDocument* document = GetDocument();
00975 if ( document )
00976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00977 return;
00978 }
00979
00980 if ( c == '>' )
00981 break;
00982
00983 *tag += (char) c;
00984 in->get();
00985
00986
00987 if ( c == '[' && tag->size() >= 9 )
00988 {
00989 size_t len = tag->size();
00990 const char* start = tag->c_str() + len - 9;
00991 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00992 assert( !closingTag );
00993 break;
00994 }
00995 }
00996
00997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00998 {
00999 firstCharFound = true;
01000 if ( c == '/' )
01001 closingTag = true;
01002 }
01003 }
01004
01005
01006 if ( closingTag )
01007 {
01008 if ( !in->good() )
01009 return;
01010
01011 int c = in->get();
01012 if ( c <= 0 )
01013 {
01014 TiXmlDocument* document = GetDocument();
01015 if ( document )
01016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01017 return;
01018 }
01019 assert( c == '>' );
01020 *tag += (char) c;
01021
01022
01023 return;
01024 }
01025 else
01026 {
01027
01028 const char* tagloc = tag->c_str() + tagIndex;
01029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01030 if ( !node )
01031 return;
01032 node->StreamIn( in, tag );
01033 delete node;
01034 node = 0;
01035
01036
01037 }
01038 }
01039 }
01040 }
01041 #endif
01042
01043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01044 {
01045 p = SkipWhiteSpace( p, encoding );
01046 TiXmlDocument* document = GetDocument();
01047
01048 if ( !p || !*p )
01049 {
01050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01051 return 0;
01052 }
01053
01054 if ( data )
01055 {
01056 data->Stamp( p, encoding );
01057 location = data->Cursor();
01058 }
01059
01060 if ( *p != '<' )
01061 {
01062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01063 return 0;
01064 }
01065
01066 p = SkipWhiteSpace( p+1, encoding );
01067
01068
01069 const char* pErr = p;
01070
01071 p = ReadName( p, &value, encoding );
01072 if ( !p || !*p )
01073 {
01074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01075 return 0;
01076 }
01077
01078 TIXML_STRING endTag ("</");
01079 endTag += value;
01080
01081
01082
01083 while ( p && *p )
01084 {
01085 pErr = p;
01086 p = SkipWhiteSpace( p, encoding );
01087 if ( !p || !*p )
01088 {
01089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01090 return 0;
01091 }
01092 if ( *p == '/' )
01093 {
01094 ++p;
01095
01096 if ( *p != '>' )
01097 {
01098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01099 return 0;
01100 }
01101 return (p+1);
01102 }
01103 else if ( *p == '>' )
01104 {
01105
01106
01107
01108 ++p;
01109 p = ReadValue( p, data, encoding );
01110 if ( !p || !*p ) {
01111
01112
01113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01114 return 0;
01115 }
01116
01117
01118
01119
01120
01121
01122 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01123 {
01124 p += endTag.length();
01125 p = SkipWhiteSpace( p, encoding );
01126 if ( p && *p && *p == '>' ) {
01127 ++p;
01128 return p;
01129 }
01130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01131 return 0;
01132 }
01133 else
01134 {
01135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01136 return 0;
01137 }
01138 }
01139 else
01140 {
01141
01142 TiXmlAttribute* attrib = new TiXmlAttribute();
01143 if ( !attrib )
01144 {
01145 return 0;
01146 }
01147
01148 attrib->SetDocument( document );
01149 pErr = p;
01150 p = attrib->Parse( p, data, encoding );
01151
01152 if ( !p || !*p )
01153 {
01154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01155 delete attrib;
01156 return 0;
01157 }
01158
01159
01160 #ifdef TIXML_USE_STL
01161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01162 #else
01163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01164 #endif
01165 if ( node )
01166 {
01167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01168 delete attrib;
01169 return 0;
01170 }
01171
01172 attributeSet.Add( attrib );
01173 }
01174 }
01175 return p;
01176 }
01177
01178
01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01180 {
01181 TiXmlDocument* document = GetDocument();
01182
01183
01184 const char* pWithWhiteSpace = p;
01185 p = SkipWhiteSpace( p, encoding );
01186
01187 while ( p && *p )
01188 {
01189 if ( *p != '<' )
01190 {
01191
01192 TiXmlText* textNode = new TiXmlText( "" );
01193
01194 if ( !textNode )
01195 {
01196 return 0;
01197 }
01198
01199 if ( TiXmlBase::IsWhiteSpaceCondensed() )
01200 {
01201 p = textNode->Parse( p, data, encoding );
01202 }
01203 else
01204 {
01205
01206
01207 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01208 }
01209
01210 if ( !textNode->Blank() )
01211 LinkEndChild( textNode );
01212 else
01213 delete textNode;
01214 }
01215 else
01216 {
01217
01218
01219
01220 if ( StringEqual( p, "</", false, encoding ) )
01221 {
01222 return p;
01223 }
01224 else
01225 {
01226 TiXmlNode* node = Identify( p, encoding );
01227 if ( node )
01228 {
01229 p = node->Parse( p, data, encoding );
01230 LinkEndChild( node );
01231 }
01232 else
01233 {
01234 return 0;
01235 }
01236 }
01237 }
01238 pWithWhiteSpace = p;
01239 p = SkipWhiteSpace( p, encoding );
01240 }
01241
01242 if ( !p )
01243 {
01244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01245 }
01246 return p;
01247 }
01248
01249
01250 #ifdef TIXML_USE_STL
01251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01252 {
01253 while ( in->good() )
01254 {
01255 int c = in->get();
01256 if ( c <= 0 )
01257 {
01258 TiXmlDocument* document = GetDocument();
01259 if ( document )
01260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01261 return;
01262 }
01263 (*tag) += (char) c;
01264
01265 if ( c == '>' )
01266 {
01267
01268 return;
01269 }
01270 }
01271 }
01272 #endif
01273
01274
01275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01276 {
01277 TiXmlDocument* document = GetDocument();
01278 p = SkipWhiteSpace( p, encoding );
01279
01280 if ( data )
01281 {
01282 data->Stamp( p, encoding );
01283 location = data->Cursor();
01284 }
01285 if ( !p || !*p || *p != '<' )
01286 {
01287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01288 return 0;
01289 }
01290 ++p;
01291 value = "";
01292
01293 while ( p && *p && *p != '>' )
01294 {
01295 value += *p;
01296 ++p;
01297 }
01298
01299 if ( !p )
01300 {
01301 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01302 }
01303 else if ( *p == '>' )
01304 {
01305 return p+1;
01306 }
01307 return p;
01308 }
01309
01310 #ifdef TIXML_USE_STL
01311 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01312 {
01313 while ( in->good() )
01314 {
01315 int c = in->get();
01316 if ( c <= 0 )
01317 {
01318 TiXmlDocument* document = GetDocument();
01319 if ( document )
01320 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01321 return;
01322 }
01323
01324 (*tag) += (char) c;
01325
01326 if ( c == '>'
01327 && tag->at( tag->length() - 2 ) == '-'
01328 && tag->at( tag->length() - 3 ) == '-' )
01329 {
01330
01331 return;
01332 }
01333 }
01334 }
01335 #endif
01336
01337
01338 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01339 {
01340 TiXmlDocument* document = GetDocument();
01341 value = "";
01342
01343 p = SkipWhiteSpace( p, encoding );
01344
01345 if ( data )
01346 {
01347 data->Stamp( p, encoding );
01348 location = data->Cursor();
01349 }
01350 const char* startTag = "<!--";
01351 const char* endTag = "-->";
01352
01353 if ( !StringEqual( p, startTag, false, encoding ) )
01354 {
01355 if ( document )
01356 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01357 return 0;
01358 }
01359 p += strlen( startTag );
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379 value = "";
01380
01381 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01382 {
01383 value.append( p, 1 );
01384 ++p;
01385 }
01386 if ( p && *p )
01387 p += strlen( endTag );
01388
01389 return p;
01390 }
01391
01392
01393 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01394 {
01395 p = SkipWhiteSpace( p, encoding );
01396 if ( !p || !*p ) return 0;
01397
01398 if ( data )
01399 {
01400 data->Stamp( p, encoding );
01401 location = data->Cursor();
01402 }
01403
01404 const char* pErr = p;
01405 p = ReadName( p, &name, encoding );
01406 if ( !p || !*p )
01407 {
01408 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01409 return 0;
01410 }
01411 p = SkipWhiteSpace( p, encoding );
01412 if ( !p || !*p || *p != '=' )
01413 {
01414 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01415 return 0;
01416 }
01417
01418 ++p;
01419 p = SkipWhiteSpace( p, encoding );
01420 if ( !p || !*p )
01421 {
01422 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01423 return 0;
01424 }
01425
01426 const char* end;
01427 const char SINGLE_QUOTE = '\'';
01428 const char DOUBLE_QUOTE = '\"';
01429
01430 if ( *p == SINGLE_QUOTE )
01431 {
01432 ++p;
01433 end = "\'";
01434 p = ReadText( p, &value, false, end, false, encoding );
01435 }
01436 else if ( *p == DOUBLE_QUOTE )
01437 {
01438 ++p;
01439 end = "\"";
01440 p = ReadText( p, &value, false, end, false, encoding );
01441 }
01442 else
01443 {
01444
01445
01446
01447 value = "";
01448 while ( p && *p
01449 && !IsWhiteSpace( *p )
01450 && *p != '/' && *p != '>' )
01451 {
01452 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01453
01454
01455
01456 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01457 return 0;
01458 }
01459 value += *p;
01460 ++p;
01461 }
01462 }
01463 return p;
01464 }
01465
01466 #ifdef TIXML_USE_STL
01467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01468 {
01469 while ( in->good() )
01470 {
01471 int c = in->peek();
01472 if ( !cdata && (c == '<' ) )
01473 {
01474 return;
01475 }
01476 if ( c <= 0 )
01477 {
01478 TiXmlDocument* document = GetDocument();
01479 if ( document )
01480 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01481 return;
01482 }
01483
01484 (*tag) += (char) c;
01485 in->get();
01486
01487 if ( cdata && c == '>' && tag->size() >= 3 ) {
01488 size_t len = tag->size();
01489 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01490
01491 return;
01492 }
01493 }
01494 }
01495 }
01496 #endif
01497
01498 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01499 {
01500 value = "";
01501 TiXmlDocument* document = GetDocument();
01502
01503 if ( data )
01504 {
01505 data->Stamp( p, encoding );
01506 location = data->Cursor();
01507 }
01508
01509 const char* const startTag = "<![CDATA[";
01510 const char* const endTag = "]]>";
01511
01512 if ( cdata || StringEqual( p, startTag, false, encoding ) )
01513 {
01514 cdata = true;
01515
01516 if ( !StringEqual( p, startTag, false, encoding ) )
01517 {
01518 if ( document )
01519 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01520 return 0;
01521 }
01522 p += strlen( startTag );
01523
01524
01525 while ( p && *p
01526 && !StringEqual( p, endTag, false, encoding )
01527 )
01528 {
01529 value += *p;
01530 ++p;
01531 }
01532
01533 TIXML_STRING dummy;
01534 p = ReadText( p, &dummy, false, endTag, false, encoding );
01535 return p;
01536 }
01537 else
01538 {
01539 bool ignoreWhite = true;
01540
01541 const char* end = "<";
01542 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01543 if ( p )
01544 return p-1;
01545 return 0;
01546 }
01547 }
01548
01549 #ifdef TIXML_USE_STL
01550 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01551 {
01552 while ( in->good() )
01553 {
01554 int c = in->get();
01555 if ( c <= 0 )
01556 {
01557 TiXmlDocument* document = GetDocument();
01558 if ( document )
01559 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01560 return;
01561 }
01562 (*tag) += (char) c;
01563
01564 if ( c == '>' )
01565 {
01566
01567 return;
01568 }
01569 }
01570 }
01571 #endif
01572
01573 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01574 {
01575 p = SkipWhiteSpace( p, _encoding );
01576
01577
01578 TiXmlDocument* document = GetDocument();
01579 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01580 {
01581 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01582 return 0;
01583 }
01584 if ( data )
01585 {
01586 data->Stamp( p, _encoding );
01587 location = data->Cursor();
01588 }
01589 p += 5;
01590
01591 version = "";
01592 encoding = "";
01593 standalone = "";
01594
01595 while ( p && *p )
01596 {
01597 if ( *p == '>' )
01598 {
01599 ++p;
01600 return p;
01601 }
01602
01603 p = SkipWhiteSpace( p, _encoding );
01604 if ( StringEqual( p, "version", true, _encoding ) )
01605 {
01606 TiXmlAttribute attrib;
01607 p = attrib.Parse( p, data, _encoding );
01608 version = attrib.Value();
01609 }
01610 else if ( StringEqual( p, "encoding", true, _encoding ) )
01611 {
01612 TiXmlAttribute attrib;
01613 p = attrib.Parse( p, data, _encoding );
01614 encoding = attrib.Value();
01615 }
01616 else if ( StringEqual( p, "standalone", true, _encoding ) )
01617 {
01618 TiXmlAttribute attrib;
01619 p = attrib.Parse( p, data, _encoding );
01620 standalone = attrib.Value();
01621 }
01622 else
01623 {
01624
01625 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01626 ++p;
01627 }
01628 }
01629 return 0;
01630 }
01631
01632 bool TiXmlText::Blank() const
01633 {
01634 for ( unsigned i=0; i<value.length(); i++ )
01635 if ( !IsWhiteSpace( value[i] ) )
01636 return false;
01637 return true;
01638 }
01639