00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <ctype.h>
00026 #include <stddef.h>
00027
00028 #include "tinyxml.h"
00029
00030
00031 #if defined( DEBUG_PARSER )
00032 # if defined( DEBUG ) && defined( _MSC_VER )
00033 # include <windows.h>
00034 # define TIXML_LOG OutputDebugString
00035 # else
00036 # define TIXML_LOG printf
00037 # endif
00038 #endif
00039
00040
00041
00042
00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00044 {
00045 { "&", 5, '&' },
00046 { "<", 4, '<' },
00047 { ">", 4, '>' },
00048 { """, 6, '\"' },
00049 { "'", 6, '\'' }
00050 };
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065
00066 const int TiXmlBase::utf8ByteTable[256] =
00067 {
00068
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00080 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00081 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00082 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00083 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00084 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00085 };
00086
00087
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090 const unsigned long BYTE_MASK = 0xBF;
00091 const unsigned long BYTE_MARK = 0x80;
00092 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093
00094 if (input < 0x80)
00095 *length = 1;
00096 else if ( input < 0x800 )
00097 *length = 2;
00098 else if ( input < 0x10000 )
00099 *length = 3;
00100 else if ( input < 0x200000 )
00101 *length = 4;
00102 else
00103 { *length = 0; return; }
00104
00105 output += *length;
00106
00107
00108 switch (*length)
00109 {
00110 case 4:
00111 --output;
00112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00113 input >>= 6;
00114 case 3:
00115 --output;
00116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00117 input >>= 6;
00118 case 2:
00119 --output;
00120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00121 input >>= 6;
00122 case 1:
00123 --output;
00124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125 }
00126 }
00127
00128
00129 int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
00130 {
00131
00132
00133
00134
00135
00136
00137
00138 if ( anyByte < 127 )
00139 return isalpha( anyByte );
00140 else
00141 return 1;
00142
00143
00144
00145
00146
00147 }
00148
00149
00150 int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
00151 {
00152
00153
00154
00155
00156
00157
00158
00159 if ( anyByte < 127 )
00160 return isalnum( anyByte );
00161 else
00162 return 1;
00163
00164
00165
00166
00167
00168 }
00169
00170
00171 class TiXmlParsingData
00172 {
00173 friend class TiXmlDocument;
00174 public:
00175 void Stamp( const char* now, TiXmlEncoding encoding );
00176
00177 const TiXmlCursor& Cursor() { return cursor; }
00178
00179 private:
00180
00181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182 {
00183 assert( start );
00184 stamp = start;
00185 tabsize = _tabsize;
00186 cursor.row = row;
00187 cursor.col = col;
00188 }
00189
00190 TiXmlCursor cursor;
00191 const char* stamp;
00192 int tabsize;
00193 };
00194
00195
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198 assert( now );
00199
00200
00201 if ( tabsize < 1 )
00202 {
00203 return;
00204 }
00205
00206
00207 int row = cursor.row;
00208 int col = cursor.col;
00209 const char* p = stamp;
00210 assert( p );
00211
00212 while ( p < now )
00213 {
00214
00215 const unsigned char* pU = (const unsigned char*)p;
00216
00217
00218 switch (*pU) {
00219 case 0:
00220
00221
00222 return;
00223
00224 case '\r':
00225
00226 ++row;
00227 col = 0;
00228
00229 ++p;
00230
00231
00232 if (*p == '\n') {
00233 ++p;
00234 }
00235 break;
00236
00237 case '\n':
00238
00239 ++row;
00240 col = 0;
00241
00242
00243 ++p;
00244
00245
00246
00247
00248 if (*p == '\r') {
00249 ++p;
00250 }
00251 break;
00252
00253 case '\t':
00254
00255 ++p;
00256
00257
00258 col = (col / tabsize + 1) * tabsize;
00259 break;
00260
00261 case TIXML_UTF_LEAD_0:
00262 if ( encoding == TIXML_ENCODING_UTF8 )
00263 {
00264 if ( *(p+1) && *(p+2) )
00265 {
00266
00267
00268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269 p += 3;
00270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271 p += 3;
00272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273 p += 3;
00274 else
00275 { p +=3; ++col; }
00276 }
00277 }
00278 else
00279 {
00280 ++p;
00281 ++col;
00282 }
00283 break;
00284
00285 default:
00286 if ( encoding == TIXML_ENCODING_UTF8 )
00287 {
00288
00289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290 if ( step == 0 )
00291 step = 1;
00292 p += step;
00293
00294
00295 ++col;
00296 }
00297 else
00298 {
00299 ++p;
00300 ++col;
00301 }
00302 break;
00303 }
00304 }
00305 cursor.row = row;
00306 cursor.col = col;
00307 assert( cursor.row >= -1 );
00308 assert( cursor.col >= -1 );
00309 stamp = p;
00310 assert( stamp );
00311 }
00312
00313
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316 if ( !p || !*p )
00317 {
00318 return 0;
00319 }
00320 if ( encoding == TIXML_ENCODING_UTF8 )
00321 {
00322 while ( *p )
00323 {
00324 const unsigned char* pU = (const unsigned char*)p;
00325
00326
00327 if ( *(pU+0)==TIXML_UTF_LEAD_0
00328 && *(pU+1)==TIXML_UTF_LEAD_1
00329 && *(pU+2)==TIXML_UTF_LEAD_2 )
00330 {
00331 p += 3;
00332 continue;
00333 }
00334 else if(*(pU+0)==TIXML_UTF_LEAD_0
00335 && *(pU+1)==0xbfU
00336 && *(pU+2)==0xbeU )
00337 {
00338 p += 3;
00339 continue;
00340 }
00341 else if(*(pU+0)==TIXML_UTF_LEAD_0
00342 && *(pU+1)==0xbfU
00343 && *(pU+2)==0xbfU )
00344 {
00345 p += 3;
00346 continue;
00347 }
00348
00349 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00350 ++p;
00351 else
00352 break;
00353 }
00354 }
00355 else
00356 {
00357 while (*p && (IsWhiteSpace( *p ) || *p == '\n' || *p =='\r'))
00358 ++p;
00359 }
00360
00361 return p;
00362 }
00363
00364 #ifdef TIXML_USE_STL
00365 bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367 for( ;; )
00368 {
00369 if ( !in->good() ) return false;
00370
00371 int c = in->peek();
00372
00373 if ( !IsWhiteSpace( c ) || c <= 0 )
00374 return true;
00375
00376 *tag += (char) in->get();
00377 }
00378 }
00379
00380 bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382
00383 while ( in->good() )
00384 {
00385 int c = in->peek();
00386 if ( c == character )
00387 return true;
00388 if ( c <= 0 )
00389 return false;
00390
00391 in->get();
00392 *tag += (char) c;
00393 }
00394 return false;
00395 }
00396 #endif
00397
00398
00399
00400
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403
00404
00405
00406 *name = "";
00407 assert( p );
00408
00409
00410
00411
00412
00413
00414
00415
00416 if ( p && *p
00417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418 {
00419 const char* start = p;
00420 while( p && *p
00421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
00422 || *p == '_'
00423 || *p == '-'
00424 || *p == '.'
00425 || *p == ':' ) )
00426 {
00427
00428 ++p;
00429 }
00430 if ( p-start > 0 ) {
00431 name->assign( start, p-start );
00432 }
00433 return p;
00434 }
00435 return 0;
00436 }
00437
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440
00441 TIXML_STRING ent;
00442 int i;
00443 *length = 0;
00444
00445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446 {
00447 unsigned long ucs = 0;
00448 ptrdiff_t delta = 0;
00449 unsigned mult = 1;
00450
00451 if ( *(p+2) == 'x' )
00452 {
00453
00454 if ( !*(p+3) ) return 0;
00455
00456 const char* q = p+3;
00457 q = strchr( q, ';' );
00458
00459 if ( !q || !*q ) return 0;
00460
00461 delta = q-p;
00462 --q;
00463
00464 while ( *q != 'x' )
00465 {
00466 if ( *q >= '0' && *q <= '9' )
00467 ucs += mult * (*q - '0');
00468 else if ( *q >= 'a' && *q <= 'f' )
00469 ucs += mult * (*q - 'a' + 10);
00470 else if ( *q >= 'A' && *q <= 'F' )
00471 ucs += mult * (*q - 'A' + 10 );
00472 else
00473 return 0;
00474 mult *= 16;
00475 --q;
00476 }
00477 }
00478 else
00479 {
00480
00481 if ( !*(p+2) ) return 0;
00482
00483 const char* q = p+2;
00484 q = strchr( q, ';' );
00485
00486 if ( !q || !*q ) return 0;
00487
00488 delta = q-p;
00489 --q;
00490
00491 while ( *q != '#' )
00492 {
00493 if ( *q >= '0' && *q <= '9' )
00494 ucs += mult * (*q - '0');
00495 else
00496 return 0;
00497 mult *= 10;
00498 --q;
00499 }
00500 }
00501 if ( encoding == TIXML_ENCODING_UTF8 )
00502 {
00503
00504 ConvertUTF32ToUTF8( ucs, value, length );
00505 }
00506 else
00507 {
00508 *value = (char)ucs;
00509 *length = 1;
00510 }
00511 return p + delta + 1;
00512 }
00513
00514
00515 for( i=0; i<NUM_ENTITY; ++i )
00516 {
00517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518 {
00519 assert( strlen( entity[i].str ) == entity[i].strLength );
00520 *value = entity[i].chr;
00521 *length = 1;
00522 return ( p + entity[i].strLength );
00523 }
00524 }
00525
00526
00527 *value = *p;
00528
00529
00530 return p+1;
00531 }
00532
00533
00534 bool TiXmlBase::StringEqual( const char* p,
00535 const char* tag,
00536 bool ignoreCase,
00537 TiXmlEncoding encoding )
00538 {
00539 assert( p );
00540 assert( tag );
00541 if ( !p || !*p )
00542 {
00543 assert( 0 );
00544 return false;
00545 }
00546
00547 const char* q = p;
00548
00549 if ( ignoreCase )
00550 {
00551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552 {
00553 ++q;
00554 ++tag;
00555 }
00556
00557 if ( *tag == 0 )
00558 return true;
00559 }
00560 else
00561 {
00562 while ( *q && *tag && *q == *tag )
00563 {
00564 ++q;
00565 ++tag;
00566 }
00567
00568 if ( *tag == 0 )
00569 return true;
00570 }
00571 return false;
00572 }
00573
00574 const char* TiXmlBase::ReadText( const char* p,
00575 TIXML_STRING * text,
00576 bool trimWhiteSpace,
00577 const char* endTag,
00578 bool caseInsensitive,
00579 TiXmlEncoding encoding )
00580 {
00581 *text = "";
00582 if ( !trimWhiteSpace
00583 || !condenseWhiteSpace )
00584 {
00585
00586 while ( p && *p
00587 && !StringEqual( p, endTag, caseInsensitive, encoding )
00588 )
00589 {
00590 int len;
00591 char cArr[4] = { 0, 0, 0, 0 };
00592 p = GetChar( p, cArr, &len, encoding );
00593 text->append( cArr, len );
00594 }
00595 }
00596 else
00597 {
00598 bool whitespace = false;
00599
00600
00601 p = SkipWhiteSpace( p, encoding );
00602 while ( p && *p
00603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604 {
00605 if ( *p == '\r' || *p == '\n' )
00606 {
00607 whitespace = true;
00608 ++p;
00609 }
00610 else if ( IsWhiteSpace( *p ) )
00611 {
00612 whitespace = true;
00613 ++p;
00614 }
00615 else
00616 {
00617
00618
00619 if ( whitespace )
00620 {
00621 (*text) += ' ';
00622 whitespace = false;
00623 }
00624 int len;
00625 char cArr[4] = { 0, 0, 0, 0 };
00626 p = GetChar( p, cArr, &len, encoding );
00627 if ( len == 1 )
00628 (*text) += cArr[0];
00629 else
00630 text->append( cArr, len );
00631 }
00632 }
00633 }
00634 if ( p )
00635 p += strlen( endTag );
00636 return p;
00637 }
00638
00639 #ifdef TIXML_USE_STL
00640
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643
00644
00645
00646
00647
00648
00649
00650 if ( !StreamTo( in, '<', tag ) )
00651 {
00652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653 return;
00654 }
00655
00656 while ( in->good() )
00657 {
00658 int tagIndex = (int) tag->length();
00659 while ( in->good() && in->peek() != '>' )
00660 {
00661 int c = in->get();
00662 if ( c <= 0 )
00663 {
00664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665 break;
00666 }
00667 (*tag) += (char) c;
00668 }
00669
00670 if ( in->good() )
00671 {
00672
00673
00674
00675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676
00677 if ( node )
00678 {
00679 node->StreamIn( in, tag );
00680 bool isElement = node->ToElement() != 0;
00681 delete node;
00682 node = 0;
00683
00684
00685
00686 if ( isElement )
00687 {
00688 return;
00689 }
00690 }
00691 else
00692 {
00693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694 return;
00695 }
00696 }
00697 }
00698
00699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701
00702 #endif
00703
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706 ClearError();
00707
00708
00709
00710
00711 if ( !p || !*p )
00712 {
00713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714 return 0;
00715 }
00716
00717
00718
00719
00720 location.Clear();
00721 if ( prevData )
00722 {
00723 location.row = prevData->cursor.row;
00724 location.col = prevData->cursor.col;
00725 }
00726 else
00727 {
00728 location.row = 0;
00729 location.col = 0;
00730 }
00731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732 location = data.Cursor();
00733
00734 if ( encoding == TIXML_ENCODING_UNKNOWN )
00735 {
00736
00737 const unsigned char* pU = (const unsigned char*)p;
00738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741 {
00742 encoding = TIXML_ENCODING_UTF8;
00743 useMicrosoftBOM = true;
00744 }
00745 }
00746
00747 p = SkipWhiteSpace( p, encoding );
00748 if ( !p )
00749 {
00750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751 return 0;
00752 }
00753
00754 while ( p && *p )
00755 {
00756 TiXmlNode* node = Identify( p, encoding );
00757 if ( node )
00758 {
00759 p = node->Parse( p, &data, encoding );
00760 LinkEndChild( node );
00761 }
00762 else
00763 {
00764 break;
00765 }
00766
00767
00768 if ( encoding == TIXML_ENCODING_UNKNOWN
00769 && node->ToDeclaration() )
00770 {
00771 TiXmlDeclaration* dec = node->ToDeclaration();
00772 const char* enc = dec->Encoding();
00773 assert( enc );
00774
00775 if ( *enc == 0 )
00776 encoding = TIXML_ENCODING_UTF8;
00777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778 encoding = TIXML_ENCODING_UTF8;
00779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780 encoding = TIXML_ENCODING_UTF8;
00781 else
00782 encoding = TIXML_ENCODING_LEGACY;
00783 }
00784
00785 p = SkipWhiteSpace( p, encoding );
00786 }
00787
00788
00789 if ( !firstChild ) {
00790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791 return 0;
00792 }
00793
00794
00795 return p;
00796 }
00797
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 {
00800
00801 if ( error )
00802 return;
00803
00804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805 error = true;
00806 errorId = err;
00807 errorDesc = errorString[ errorId ];
00808
00809 errorLocation.Clear();
00810 if ( pError && data )
00811 {
00812 data->Stamp( pError, encoding );
00813 errorLocation = data->Cursor();
00814 }
00815 }
00816
00817
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820 TiXmlNode* returnNode = 0;
00821
00822 p = SkipWhiteSpace( p, encoding );
00823 if( !p || !*p || *p != '<' )
00824 {
00825 return 0;
00826 }
00827
00828 TiXmlDocument* doc = GetDocument();
00829 p = SkipWhiteSpace( p, encoding );
00830
00831 if ( !p || !*p )
00832 {
00833 return 0;
00834 }
00835
00836
00837
00838
00839
00840
00841
00842
00843 const char* xmlHeader = { "<?xml" };
00844 const char* commentHeader = { "<!--" };
00845 const char* dtdHeader = { "<!" };
00846 const char* cdataHeader = { "<![CDATA[" };
00847
00848 if ( StringEqual( p, xmlHeader, true, encoding ) )
00849 {
00850 #ifdef DEBUG_PARSER
00851 TIXML_LOG( "XML parsing Declaration\n" );
00852 #endif
00853 returnNode = new TiXmlDeclaration();
00854 }
00855 else if ( StringEqual( p, commentHeader, false, encoding ) )
00856 {
00857 #ifdef DEBUG_PARSER
00858 TIXML_LOG( "XML parsing Comment\n" );
00859 #endif
00860 returnNode = new TiXmlComment();
00861 }
00862 else if ( StringEqual( p, cdataHeader, false, encoding ) )
00863 {
00864 #ifdef DEBUG_PARSER
00865 TIXML_LOG( "XML parsing CDATA\n" );
00866 #endif
00867 TiXmlText* text = new TiXmlText( "" );
00868 text->SetCDATA( true );
00869 returnNode = text;
00870 }
00871 else if ( StringEqual( p, dtdHeader, false, encoding ) )
00872 {
00873 #ifdef DEBUG_PARSER
00874 TIXML_LOG( "XML parsing Unknown(1)\n" );
00875 #endif
00876 returnNode = new TiXmlUnknown();
00877 }
00878 else if ( IsAlpha( *(p+1), encoding )
00879 || *(p+1) == '_' )
00880 {
00881 #ifdef DEBUG_PARSER
00882 TIXML_LOG( "XML parsing Element\n" );
00883 #endif
00884 returnNode = new TiXmlElement( "" );
00885 }
00886 else
00887 {
00888 #ifdef DEBUG_PARSER
00889 TIXML_LOG( "XML parsing Unknown(2)\n" );
00890 #endif
00891 returnNode = new TiXmlUnknown();
00892 }
00893
00894 if ( returnNode )
00895 {
00896
00897 returnNode->parent = this;
00898 }
00899 else
00900 {
00901 if ( doc )
00902 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00903 }
00904 return returnNode;
00905 }
00906
00907 #ifdef TIXML_USE_STL
00908
00909 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00910 {
00911
00912
00913 while( in->good() )
00914 {
00915 int c = in->get();
00916 if ( c <= 0 )
00917 {
00918 TiXmlDocument* document = GetDocument();
00919 if ( document )
00920 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00921 return;
00922 }
00923 (*tag) += (char) c ;
00924
00925 if ( c == '>' )
00926 break;
00927 }
00928
00929 if ( tag->length() < 3 ) return;
00930
00931
00932
00933
00934 if ( tag->at( tag->length() - 1 ) == '>'
00935 && tag->at( tag->length() - 2 ) == '/' )
00936 {
00937
00938 return;
00939 }
00940 else if ( tag->at( tag->length() - 1 ) == '>' )
00941 {
00942
00943
00944
00945
00946
00947 for ( ;; )
00948 {
00949 StreamWhiteSpace( in, tag );
00950
00951
00952 if ( in->good() && in->peek() != '<' )
00953 {
00954
00955 TiXmlText text( "" );
00956 text.StreamIn( in, tag );
00957
00958
00959
00960 continue;
00961 }
00962
00963
00964
00965 if ( !in->good() ) return;
00966 assert( in->peek() == '<' );
00967 int tagIndex = (int) tag->length();
00968
00969 bool closingTag = false;
00970 bool firstCharFound = false;
00971
00972 for( ;; )
00973 {
00974 if ( !in->good() )
00975 return;
00976
00977 int c = in->peek();
00978 if ( c <= 0 )
00979 {
00980 TiXmlDocument* document = GetDocument();
00981 if ( document )
00982 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00983 return;
00984 }
00985
00986 if ( c == '>' )
00987 break;
00988
00989 *tag += (char) c;
00990 in->get();
00991
00992
00993 if ( c == '[' && tag->size() >= 9 )
00994 {
00995 size_t len = tag->size();
00996 const char* start = tag->c_str() + len - 9;
00997 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00998 assert( !closingTag );
00999 break;
01000 }
01001 }
01002
01003 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01004 {
01005 firstCharFound = true;
01006 if ( c == '/' )
01007 closingTag = true;
01008 }
01009 }
01010
01011
01012 if ( closingTag )
01013 {
01014 if ( !in->good() )
01015 return;
01016
01017 int c = in->get();
01018 if ( c <= 0 )
01019 {
01020 TiXmlDocument* document = GetDocument();
01021 if ( document )
01022 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01023 return;
01024 }
01025 assert( c == '>' );
01026 *tag += (char) c;
01027
01028
01029 return;
01030 }
01031 else
01032 {
01033
01034 const char* tagloc = tag->c_str() + tagIndex;
01035 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01036 if ( !node )
01037 return;
01038 node->StreamIn( in, tag );
01039 delete node;
01040 node = 0;
01041
01042
01043 }
01044 }
01045 }
01046 }
01047 #endif
01048
01049 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01050 {
01051 p = SkipWhiteSpace( p, encoding );
01052 TiXmlDocument* document = GetDocument();
01053
01054 if ( !p || !*p )
01055 {
01056 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01057 return 0;
01058 }
01059
01060 if ( data )
01061 {
01062 data->Stamp( p, encoding );
01063 location = data->Cursor();
01064 }
01065
01066 if ( *p != '<' )
01067 {
01068 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01069 return 0;
01070 }
01071
01072 p = SkipWhiteSpace( p+1, encoding );
01073
01074
01075 const char* pErr = p;
01076
01077 p = ReadName( p, &value, encoding );
01078 if ( !p || !*p )
01079 {
01080 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01081 return 0;
01082 }
01083
01084 TIXML_STRING endTag ("</");
01085 endTag += value;
01086 endTag += ">";
01087
01088
01089
01090 while ( p && *p )
01091 {
01092 pErr = p;
01093 p = SkipWhiteSpace( p, encoding );
01094 if ( !p || !*p )
01095 {
01096 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01097 return 0;
01098 }
01099 if ( *p == '/' )
01100 {
01101 ++p;
01102
01103 if ( *p != '>' )
01104 {
01105 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01106 return 0;
01107 }
01108 return (p+1);
01109 }
01110 else if ( *p == '>' )
01111 {
01112
01113
01114
01115 ++p;
01116 p = ReadValue( p, data, encoding );
01117 if ( !p || !*p ) {
01118
01119
01120 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01121 return 0;
01122 }
01123
01124
01125 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01126 {
01127 p += endTag.length();
01128 return p;
01129 }
01130 else
01131 {
01132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01133 return 0;
01134 }
01135 }
01136 else
01137 {
01138
01139 TiXmlAttribute* attrib = new TiXmlAttribute();
01140 if ( !attrib )
01141 {
01142 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01143 return 0;
01144 }
01145
01146 attrib->SetDocument( document );
01147 pErr = p;
01148 p = attrib->Parse( p, data, encoding );
01149
01150 if ( !p || !*p )
01151 {
01152 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01153 delete attrib;
01154 return 0;
01155 }
01156
01157
01158 #ifdef TIXML_USE_STL
01159 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01160 #else
01161 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01162 #endif
01163 if ( node )
01164 {
01165 node->SetValue( attrib->Value() );
01166 delete attrib;
01167 return 0;
01168 }
01169
01170 attributeSet.Add( attrib );
01171 }
01172 }
01173 return p;
01174 }
01175
01176
01177 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01178 {
01179 TiXmlDocument* document = GetDocument();
01180
01181
01182 const char* pWithWhiteSpace = p;
01183 p = SkipWhiteSpace( p, encoding );
01184
01185 while ( p && *p )
01186 {
01187 if ( *p != '<' )
01188 {
01189
01190 TiXmlText* textNode = new TiXmlText( "" );
01191
01192 if ( !textNode )
01193 {
01194 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01195 return 0;
01196 }
01197
01198 if ( TiXmlBase::IsWhiteSpaceCondensed() )
01199 {
01200 p = textNode->Parse( p, data, encoding );
01201 }
01202 else
01203 {
01204
01205
01206 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01207 }
01208
01209 if ( !textNode->Blank() )
01210 LinkEndChild( textNode );
01211 else
01212 delete textNode;
01213 }
01214 else
01215 {
01216
01217
01218
01219 if ( StringEqual( p, "</", false, encoding ) )
01220 {
01221 return p;
01222 }
01223 else
01224 {
01225 TiXmlNode* node = Identify( p, encoding );
01226 if ( node )
01227 {
01228 p = node->Parse( p, data, encoding );
01229 LinkEndChild( node );
01230 }
01231 else
01232 {
01233 return 0;
01234 }
01235 }
01236 }
01237 pWithWhiteSpace = p;
01238 p = SkipWhiteSpace( p, encoding );
01239 }
01240
01241 if ( !p )
01242 {
01243 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01244 }
01245 return p;
01246 }
01247
01248
01249 #ifdef TIXML_USE_STL
01250 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01251 {
01252 while ( in->good() )
01253 {
01254 int c = in->get();
01255 if ( c <= 0 )
01256 {
01257 TiXmlDocument* document = GetDocument();
01258 if ( document )
01259 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01260 return;
01261 }
01262 (*tag) += (char) c;
01263
01264 if ( c == '>' )
01265 {
01266
01267 return;
01268 }
01269 }
01270 }
01271 #endif
01272
01273
01274 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01275 {
01276 TiXmlDocument* document = GetDocument();
01277 p = SkipWhiteSpace( p, encoding );
01278
01279 if ( data )
01280 {
01281 data->Stamp( p, encoding );
01282 location = data->Cursor();
01283 }
01284 if ( !p || !*p || *p != '<' )
01285 {
01286 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01287 return 0;
01288 }
01289 ++p;
01290 value = "";
01291
01292 while ( p && *p && *p != '>' )
01293 {
01294 value += *p;
01295 ++p;
01296 }
01297
01298 if ( !p )
01299 {
01300 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01301 }
01302 if ( *p == '>' )
01303 return p+1;
01304 return p;
01305 }
01306
01307 #ifdef TIXML_USE_STL
01308 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01309 {
01310 while ( in->good() )
01311 {
01312 int c = in->get();
01313 if ( c <= 0 )
01314 {
01315 TiXmlDocument* document = GetDocument();
01316 if ( document )
01317 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01318 return;
01319 }
01320
01321 (*tag) += (char) c;
01322
01323 if ( c == '>'
01324 && tag->at( tag->length() - 2 ) == '-'
01325 && tag->at( tag->length() - 3 ) == '-' )
01326 {
01327
01328 return;
01329 }
01330 }
01331 }
01332 #endif
01333
01334
01335 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01336 {
01337 TiXmlDocument* document = GetDocument();
01338 value = "";
01339
01340 p = SkipWhiteSpace( p, encoding );
01341
01342 if ( data )
01343 {
01344 data->Stamp( p, encoding );
01345 location = data->Cursor();
01346 }
01347 const char* startTag = "<!--";
01348 const char* endTag = "-->";
01349
01350 if ( !StringEqual( p, startTag, false, encoding ) )
01351 {
01352 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01353 return 0;
01354 }
01355 p += strlen( startTag );
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375 value = "";
01376
01377 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01378 {
01379 value.append( p, 1 );
01380 ++p;
01381 }
01382 if ( p )
01383 p += strlen( endTag );
01384
01385 return p;
01386 }
01387
01388
01389 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01390 {
01391 p = SkipWhiteSpace( p, encoding );
01392 if ( !p || !*p ) return 0;
01393
01394
01395
01396
01397
01398 if ( data )
01399 {
01400 data->Stamp( p, encoding );
01401 location = data->Cursor();
01402 }
01403
01404 const char* pErr = p;
01405 p = ReadName( p, &name, encoding );
01406 if ( !p || !*p )
01407 {
01408 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01409 return 0;
01410 }
01411 p = SkipWhiteSpace( p, encoding );
01412 if ( !p || !*p || *p != '=' )
01413 {
01414 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01415 return 0;
01416 }
01417
01418 ++p;
01419 p = SkipWhiteSpace( p, encoding );
01420 if ( !p || !*p )
01421 {
01422 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01423 return 0;
01424 }
01425
01426 const char* end;
01427 const char SINGLE_QUOTE = '\'';
01428 const char DOUBLE_QUOTE = '\"';
01429
01430 if ( *p == SINGLE_QUOTE )
01431 {
01432 ++p;
01433 end = "\'";
01434 p = ReadText( p, &value, false, end, false, encoding );
01435 }
01436 else if ( *p == DOUBLE_QUOTE )
01437 {
01438 ++p;
01439 end = "\"";
01440 p = ReadText( p, &value, false, end, false, encoding );
01441 }
01442 else
01443 {
01444
01445
01446
01447 value = "";
01448 while ( p && *p
01449 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'
01450 && *p != '/' && *p != '>' )
01451 {
01452 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01453
01454
01455
01456 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01457 return 0;
01458 }
01459 value += *p;
01460 ++p;
01461 }
01462 }
01463 return p;
01464 }
01465
01466 #ifdef TIXML_USE_STL
01467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01468 {
01469 while ( in->good() )
01470 {
01471 int c = in->peek();
01472 if ( !cdata && (c == '<' ) )
01473 {
01474 return;
01475 }
01476 if ( c <= 0 )
01477 {
01478 TiXmlDocument* document = GetDocument();
01479 if ( document )
01480 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01481 return;
01482 }
01483
01484 (*tag) += (char) c;
01485 in->get();
01486
01487 if ( cdata && c == '>' && tag->size() >= 3 ) {
01488 size_t len = tag->size();
01489 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01490
01491 return;
01492 }
01493 }
01494 }
01495 }
01496 #endif
01497
01498 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01499 {
01500 value = "";
01501 TiXmlDocument* document = GetDocument();
01502
01503 if ( data )
01504 {
01505 data->Stamp( p, encoding );
01506 location = data->Cursor();
01507 }
01508
01509 const char* const startTag = "<![CDATA[";
01510 const char* const endTag = "]]>";
01511
01512 if ( cdata || StringEqual( p, startTag, false, encoding ) )
01513 {
01514 cdata = true;
01515
01516 if ( !StringEqual( p, startTag, false, encoding ) )
01517 {
01518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01519 return 0;
01520 }
01521 p += strlen( startTag );
01522
01523
01524 while ( p && *p
01525 && !StringEqual( p, endTag, false, encoding )
01526 )
01527 {
01528 value += *p;
01529 ++p;
01530 }
01531
01532 TIXML_STRING dummy;
01533 p = ReadText( p, &dummy, false, endTag, false, encoding );
01534 return p;
01535 }
01536 else
01537 {
01538 bool ignoreWhite = true;
01539
01540 const char* end = "<";
01541 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01542 if ( p )
01543 return p-1;
01544 return 0;
01545 }
01546 }
01547
01548 #ifdef TIXML_USE_STL
01549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01550 {
01551 while ( in->good() )
01552 {
01553 int c = in->get();
01554 if ( c <= 0 )
01555 {
01556 TiXmlDocument* document = GetDocument();
01557 if ( document )
01558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01559 return;
01560 }
01561 (*tag) += (char) c;
01562
01563 if ( c == '>' )
01564 {
01565
01566 return;
01567 }
01568 }
01569 }
01570 #endif
01571
01572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01573 {
01574 p = SkipWhiteSpace( p, _encoding );
01575
01576
01577 TiXmlDocument* document = GetDocument();
01578 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01579 {
01580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01581 return 0;
01582 }
01583 if ( data )
01584 {
01585 data->Stamp( p, _encoding );
01586 location = data->Cursor();
01587 }
01588 p += 5;
01589
01590 version = "";
01591 encoding = "";
01592 standalone = "";
01593
01594 while ( p && *p )
01595 {
01596 if ( *p == '>' )
01597 {
01598 ++p;
01599 return p;
01600 }
01601
01602 p = SkipWhiteSpace( p, _encoding );
01603 if ( StringEqual( p, "version", true, _encoding ) )
01604 {
01605 TiXmlAttribute attrib;
01606 p = attrib.Parse( p, data, _encoding );
01607 version = attrib.Value();
01608 }
01609 else if ( StringEqual( p, "encoding", true, _encoding ) )
01610 {
01611 TiXmlAttribute attrib;
01612 p = attrib.Parse( p, data, _encoding );
01613 encoding = attrib.Value();
01614 }
01615 else if ( StringEqual( p, "standalone", true, _encoding ) )
01616 {
01617 TiXmlAttribute attrib;
01618 p = attrib.Parse( p, data, _encoding );
01619 standalone = attrib.Value();
01620 }
01621 else
01622 {
01623
01624 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01625 ++p;
01626 }
01627 }
01628 return 0;
01629 }
01630
01631 bool TiXmlText::Blank() const
01632 {
01633 for ( unsigned i=0; i<value.length(); i++ )
01634 if ( !IsWhiteSpace( value[i] ) )
01635 return false;
01636 return true;
01637 }
01638