00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 #include <ctype.h>
00026 #include <stddef.h>
00027 
00028 #include "tinyxml.h"
00029 
00030 
00031 #if defined( DEBUG_PARSER )
00032 #       if defined( DEBUG ) && defined( _MSC_VER )
00033 #               include <windows.h>
00034 #               define TIXML_LOG OutputDebugString
00035 #       else
00036 #               define TIXML_LOG printf
00037 #       endif
00038 #endif
00039 
00040 
00041 
00042 
00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00044 {
00045         { "&",  5, '&' },
00046         { "<",   4, '<' },
00047         { ">",   4, '>' },
00048         { """, 6, '\"' },
00049         { "'", 6, '\'' }
00050 };
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065 
00066 const int TiXmlBase::utf8ByteTable[256] = 
00067 {
00068         
00069                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00070                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00071                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00072                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00073                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00074                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00075                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00080                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00081                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      
00082                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      
00083                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      
00084                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       
00085 };
00086 
00087 
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090         const unsigned long BYTE_MASK = 0xBF;
00091         const unsigned long BYTE_MARK = 0x80;
00092         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093 
00094         if (input < 0x80) 
00095                 *length = 1;
00096         else if ( input < 0x800 )
00097                 *length = 2;
00098         else if ( input < 0x10000 )
00099                 *length = 3;
00100         else if ( input < 0x200000 )
00101                 *length = 4;
00102         else
00103                 { *length = 0; return; }        
00104 
00105         output += *length;
00106 
00107         
00108         switch (*length) 
00109         {
00110                 case 4:
00111                         --output; 
00112                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00113                         input >>= 6;
00114                 case 3:
00115                         --output; 
00116                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00117                         input >>= 6;
00118                 case 2:
00119                         --output; 
00120                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00121                         input >>= 6;
00122                 case 1:
00123                         --output; 
00124                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125         }
00126 }
00127 
00128 
00129  int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding  )
00130 {
00131         
00132         
00133         
00134         
00135 
00136 
00137 
00138                 if ( anyByte < 127 )
00139                         return isalpha( anyByte );
00140                 else
00141                         return 1;       
00142 
00143 
00144 
00145 
00146 
00147 }
00148 
00149 
00150  int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding  )
00151 {
00152         
00153         
00154         
00155         
00156 
00157 
00158 
00159                 if ( anyByte < 127 )
00160                         return isalnum( anyByte );
00161                 else
00162                         return 1;       
00163 
00164 
00165 
00166 
00167 
00168 }
00169 
00170 
00171 class TiXmlParsingData
00172 {
00173         friend class TiXmlDocument;
00174   public:
00175         void Stamp( const char* now, TiXmlEncoding encoding );
00176 
00177         const TiXmlCursor& Cursor()     { return cursor; }
00178 
00179   private:
00180         
00181         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182         {
00183                 assert( start );
00184                 stamp = start;
00185                 tabsize = _tabsize;
00186                 cursor.row = row;
00187                 cursor.col = col;
00188         }
00189 
00190         TiXmlCursor             cursor;
00191         const char*             stamp;
00192         int                             tabsize;
00193 };
00194 
00195 
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198         assert( now );
00199 
00200         
00201         if ( tabsize < 1 )
00202         {
00203                 return;
00204         }
00205 
00206         
00207         int row = cursor.row;
00208         int col = cursor.col;
00209         const char* p = stamp;
00210         assert( p );
00211 
00212         while ( p < now )
00213         {
00214                 
00215                 const unsigned char* pU = (const unsigned char*)p;
00216 
00217                 
00218                 switch (*pU) {
00219                         case 0:
00220                                 
00221                                 
00222                                 return;
00223 
00224                         case '\r':
00225                                 
00226                                 ++row;
00227                                 col = 0;                                
00228                                 
00229                                 ++p;
00230 
00231                                 
00232                                 if (*p == '\n') {
00233                                         ++p;
00234                                 }
00235                                 break;
00236 
00237                         case '\n':
00238                                 
00239                                 ++row;
00240                                 col = 0;
00241 
00242                                 
00243                                 ++p;
00244 
00245                                 
00246                                 
00247                                 
00248                                 if (*p == '\r') {
00249                                         ++p;
00250                                 }
00251                                 break;
00252 
00253                         case '\t':
00254                                 
00255                                 ++p;
00256 
00257                                 
00258                                 col = (col / tabsize + 1) * tabsize;
00259                                 break;
00260 
00261                         case TIXML_UTF_LEAD_0:
00262                                 if ( encoding == TIXML_ENCODING_UTF8 )
00263                                 {
00264                                         if ( *(p+1) && *(p+2) )
00265                                         {
00266                                                 
00267                                                 
00268                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269                                                         p += 3; 
00270                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271                                                         p += 3; 
00272                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273                                                         p += 3; 
00274                                                 else
00275                                                         { p +=3; ++col; }       
00276                                         }
00277                                 }
00278                                 else
00279                                 {
00280                                         ++p;
00281                                         ++col;
00282                                 }
00283                                 break;
00284 
00285                         default:
00286                                 if ( encoding == TIXML_ENCODING_UTF8 )
00287                                 {
00288                                         
00289                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290                                         if ( step == 0 )
00291                                                 step = 1;               
00292                                         p += step;
00293 
00294                                         
00295                                         ++col;
00296                                 }
00297                                 else
00298                                 {
00299                                         ++p;
00300                                         ++col;
00301                                 }
00302                                 break;
00303                 }
00304         }
00305         cursor.row = row;
00306         cursor.col = col;
00307         assert( cursor.row >= -1 );
00308         assert( cursor.col >= -1 );
00309         stamp = p;
00310         assert( stamp );
00311 }
00312 
00313 
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316         if ( !p || !*p )
00317         {
00318                 return 0;
00319         }
00320         if ( encoding == TIXML_ENCODING_UTF8 )
00321         {
00322                 while ( *p )
00323                 {
00324                         const unsigned char* pU = (const unsigned char*)p;
00325                         
00326                         
00327                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00328                                  && *(pU+1)==TIXML_UTF_LEAD_1 
00329                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00330                         {
00331                                 p += 3;
00332                                 continue;
00333                         }
00334                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00335                                  && *(pU+1)==0xbfU
00336                                  && *(pU+2)==0xbeU )
00337                         {
00338                                 p += 3;
00339                                 continue;
00340                         }
00341                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00342                                  && *(pU+1)==0xbfU
00343                                  && *(pU+2)==0xbfU )
00344                         {
00345                                 p += 3;
00346                                 continue;
00347                         }
00348 
00349                         if ( IsWhiteSpace( *p ) )               
00350                                 ++p;
00351                         else
00352                                 break;
00353                 }
00354         }
00355         else
00356         {
00357                 while ( *p && IsWhiteSpace( *p ) )
00358                         ++p;
00359         }
00360 
00361         return p;
00362 }
00363 
00364 #ifdef TIXML_USE_STL
00365  bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367         for( ;; )
00368         {
00369                 if ( !in->good() ) return false;
00370 
00371                 int c = in->peek();
00372                 
00373                 if ( !IsWhiteSpace( c ) || c <= 0 )
00374                         return true;
00375 
00376                 *tag += (char) in->get();
00377         }
00378 }
00379 
00380  bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382         
00383         while ( in->good() )
00384         {
00385                 int c = in->peek();
00386                 if ( c == character )
00387                         return true;
00388                 if ( c <= 0 )           
00389                         return false;
00390 
00391                 in->get();
00392                 *tag += (char) c;
00393         }
00394         return false;
00395 }
00396 #endif
00397 
00398 
00399 
00400 
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403         
00404         
00405         
00406         *name = "";
00407         assert( p );
00408 
00409         
00410         
00411         
00412         
00413         
00414         
00415         
00416         if (    p && *p 
00417                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418         {
00419                 const char* start = p;
00420                 while(          p && *p
00421                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding ) 
00422                                                  || *p == '_'
00423                                                  || *p == '-'
00424                                                  || *p == '.'
00425                                                  || *p == ':' ) )
00426                 {
00427                         
00428                         ++p;
00429                 }
00430                 if ( p-start > 0 ) {
00431                         name->assign( start, p-start );
00432                 }
00433                 return p;
00434         }
00435         return 0;
00436 }
00437 
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440         
00441     TIXML_STRING ent;
00442         int i;
00443         *length = 0;
00444 
00445         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446         {
00447                 unsigned long ucs = 0;
00448                 ptrdiff_t delta = 0;
00449                 unsigned mult = 1;
00450 
00451                 if ( *(p+2) == 'x' )
00452                 {
00453                         
00454                         if ( !*(p+3) ) return 0;
00455 
00456                         const char* q = p+3;
00457                         q = strchr( q, ';' );
00458 
00459                         if ( !q || !*q ) return 0;
00460 
00461                         delta = q-p;
00462                         --q;
00463 
00464                         while ( *q != 'x' )
00465                         {
00466                                 if ( *q >= '0' && *q <= '9' )
00467                                         ucs += mult * (*q - '0');
00468                                 else if ( *q >= 'a' && *q <= 'f' )
00469                                         ucs += mult * (*q - 'a' + 10);
00470                                 else if ( *q >= 'A' && *q <= 'F' )
00471                                         ucs += mult * (*q - 'A' + 10 );
00472                                 else 
00473                                         return 0;
00474                                 mult *= 16;
00475                                 --q;
00476                         }
00477                 }
00478                 else
00479                 {
00480                         
00481                         if ( !*(p+2) ) return 0;
00482 
00483                         const char* q = p+2;
00484                         q = strchr( q, ';' );
00485 
00486                         if ( !q || !*q ) return 0;
00487 
00488                         delta = q-p;
00489                         --q;
00490 
00491                         while ( *q != '#' )
00492                         {
00493                                 if ( *q >= '0' && *q <= '9' )
00494                                         ucs += mult * (*q - '0');
00495                                 else 
00496                                         return 0;
00497                                 mult *= 10;
00498                                 --q;
00499                         }
00500                 }
00501                 if ( encoding == TIXML_ENCODING_UTF8 )
00502                 {
00503                         
00504                         ConvertUTF32ToUTF8( ucs, value, length );
00505                 }
00506                 else
00507                 {
00508                         *value = (char)ucs;
00509                         *length = 1;
00510                 }
00511                 return p + delta + 1;
00512         }
00513 
00514         
00515         for( i=0; i<NUM_ENTITY; ++i )
00516         {
00517                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518                 {
00519                         assert( strlen( entity[i].str ) == entity[i].strLength );
00520                         *value = entity[i].chr;
00521                         *length = 1;
00522                         return ( p + entity[i].strLength );
00523                 }
00524         }
00525 
00526         
00527         *value = *p;    
00528         
00529                                         
00530         return p+1;
00531 }
00532 
00533 
00534 bool TiXmlBase::StringEqual( const char* p,
00535                                                          const char* tag,
00536                                                          bool ignoreCase,
00537                                                          TiXmlEncoding encoding )
00538 {
00539         assert( p );
00540         assert( tag );
00541         if ( !p || !*p )
00542         {
00543                 assert( 0 );
00544                 return false;
00545         }
00546 
00547         const char* q = p;
00548 
00549         if ( ignoreCase )
00550         {
00551                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552                 {
00553                         ++q;
00554                         ++tag;
00555                 }
00556 
00557                 if ( *tag == 0 )
00558                         return true;
00559         }
00560         else
00561         {
00562                 while ( *q && *tag && *q == *tag )
00563                 {
00564                         ++q;
00565                         ++tag;
00566                 }
00567 
00568                 if ( *tag == 0 )                
00569                         return true;
00570         }
00571         return false;
00572 }
00573 
00574 const char* TiXmlBase::ReadText(        const char* p, 
00575                                                                         TIXML_STRING * text, 
00576                                                                         bool trimWhiteSpace, 
00577                                                                         const char* endTag, 
00578                                                                         bool caseInsensitive,
00579                                                                         TiXmlEncoding encoding )
00580 {
00581     *text = "";
00582         if (    !trimWhiteSpace                 
00583                  || !condenseWhiteSpace )       
00584         {
00585                 
00586                 while (    p && *p
00587                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00588                           )
00589                 {
00590                         int len;
00591                         char cArr[4] = { 0, 0, 0, 0 };
00592                         p = GetChar( p, cArr, &len, encoding );
00593                         text->append( cArr, len );
00594                 }
00595         }
00596         else
00597         {
00598                 bool whitespace = false;
00599 
00600                 
00601                 p = SkipWhiteSpace( p, encoding );
00602                 while (    p && *p
00603                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604                 {
00605                         if ( *p == '\r' || *p == '\n' )
00606                         {
00607                                 whitespace = true;
00608                                 ++p;
00609                         }
00610                         else if ( IsWhiteSpace( *p ) )
00611                         {
00612                                 whitespace = true;
00613                                 ++p;
00614                         }
00615                         else
00616                         {
00617                                 
00618                                 
00619                                 if ( whitespace )
00620                                 {
00621                                         (*text) += ' ';
00622                                         whitespace = false;
00623                                 }
00624                                 int len;
00625                                 char cArr[4] = { 0, 0, 0, 0 };
00626                                 p = GetChar( p, cArr, &len, encoding );
00627                                 if ( len == 1 )
00628                                         (*text) += cArr[0];     
00629                                 else
00630                                         text->append( cArr, len );
00631                         }
00632                 }
00633         }
00634         if ( p && *p ) 
00635                 p += strlen( endTag );
00636         return p;
00637 }
00638 
00639 #ifdef TIXML_USE_STL
00640 
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643         
00644         
00645         
00646         
00647         
00648         
00649 
00650         if ( !StreamTo( in, '<', tag ) ) 
00651         {
00652                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653                 return;
00654         }
00655 
00656         while ( in->good() )
00657         {
00658                 int tagIndex = (int) tag->length();
00659                 while ( in->good() && in->peek() != '>' )
00660                 {
00661                         int c = in->get();
00662                         if ( c <= 0 )
00663                         {
00664                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665                                 break;
00666                         }
00667                         (*tag) += (char) c;
00668                 }
00669 
00670                 if ( in->good() )
00671                 {
00672                         
00673                         
00674                         
00675                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676 
00677                         if ( node )
00678                         {
00679                                 node->StreamIn( in, tag );
00680                                 bool isElement = node->ToElement() != 0;
00681                                 delete node;
00682                                 node = 0;
00683 
00684                                 
00685                                 
00686                                 if ( isElement )
00687                                 {
00688                                         return;
00689                                 }
00690                         }
00691                         else
00692                         {
00693                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694                                 return;
00695                         }
00696                 }
00697         }
00698         
00699         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701 
00702 #endif
00703 
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706         ClearError();
00707 
00708         
00709         
00710         
00711         if ( !p || !*p )
00712         {
00713                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714                 return 0;
00715         }
00716 
00717         
00718         
00719         
00720         location.Clear();
00721         if ( prevData )
00722         {
00723                 location.row = prevData->cursor.row;
00724                 location.col = prevData->cursor.col;
00725         }
00726         else
00727         {
00728                 location.row = 0;
00729                 location.col = 0;
00730         }
00731         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732         location = data.Cursor();
00733 
00734         if ( encoding == TIXML_ENCODING_UNKNOWN )
00735         {
00736                 
00737                 const unsigned char* pU = (const unsigned char*)p;
00738                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741                 {
00742                         encoding = TIXML_ENCODING_UTF8;
00743                         useMicrosoftBOM = true;
00744                 }
00745         }
00746 
00747     p = SkipWhiteSpace( p, encoding );
00748         if ( !p )
00749         {
00750                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751                 return 0;
00752         }
00753 
00754         while ( p && *p )
00755         {
00756                 TiXmlNode* node = Identify( p, encoding );
00757                 if ( node )
00758                 {
00759                         p = node->Parse( p, &data, encoding );
00760                         LinkEndChild( node );
00761                 }
00762                 else
00763                 {
00764                         break;
00765                 }
00766 
00767                 
00768                 if (    encoding == TIXML_ENCODING_UNKNOWN
00769                          && node->ToDeclaration() )
00770                 {
00771                         TiXmlDeclaration* dec = node->ToDeclaration();
00772                         const char* enc = dec->Encoding();
00773                         assert( enc );
00774 
00775                         if ( *enc == 0 )
00776                                 encoding = TIXML_ENCODING_UTF8;
00777                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778                                 encoding = TIXML_ENCODING_UTF8;
00779                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780                                 encoding = TIXML_ENCODING_UTF8; 
00781                         else 
00782                                 encoding = TIXML_ENCODING_LEGACY;
00783                 }
00784 
00785                 p = SkipWhiteSpace( p, encoding );
00786         }
00787 
00788         
00789         if ( !firstChild ) {
00790                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791                 return 0;
00792         }
00793 
00794         
00795         return p;
00796 }
00797 
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 {       
00800         
00801         if ( error )
00802                 return;
00803 
00804         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805         error   = true;
00806         errorId = err;
00807         errorDesc = errorString[ errorId ];
00808 
00809         errorLocation.Clear();
00810         if ( pError && data )
00811         {
00812                 data->Stamp( pError, encoding );
00813                 errorLocation = data->Cursor();
00814         }
00815 }
00816 
00817 
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820         TiXmlNode* returnNode = 0;
00821 
00822         p = SkipWhiteSpace( p, encoding );
00823         if( !p || !*p || *p != '<' )
00824         {
00825                 return 0;
00826         }
00827 
00828         p = SkipWhiteSpace( p, encoding );
00829 
00830         if ( !p || !*p )
00831         {
00832                 return 0;
00833         }
00834 
00835         
00836         
00837         
00838         
00839         
00840         
00841 
00842         const char* xmlHeader = { "<?xml" };
00843         const char* commentHeader = { "<!--" };
00844         const char* dtdHeader = { "<!" };
00845         const char* cdataHeader = { "<![CDATA[" };
00846 
00847         if ( StringEqual( p, xmlHeader, true, encoding ) )
00848         {
00849                 #ifdef DEBUG_PARSER
00850                         TIXML_LOG( "XML parsing Declaration\n" );
00851                 #endif
00852                 returnNode = new TiXmlDeclaration();
00853         }
00854         else if ( StringEqual( p, commentHeader, false, encoding ) )
00855         {
00856                 #ifdef DEBUG_PARSER
00857                         TIXML_LOG( "XML parsing Comment\n" );
00858                 #endif
00859                 returnNode = new TiXmlComment();
00860         }
00861         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00862         {
00863                 #ifdef DEBUG_PARSER
00864                         TIXML_LOG( "XML parsing CDATA\n" );
00865                 #endif
00866                 TiXmlText* text = new TiXmlText( "" );
00867                 text->SetCDATA( true );
00868                 returnNode = text;
00869         }
00870         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00871         {
00872                 #ifdef DEBUG_PARSER
00873                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00874                 #endif
00875                 returnNode = new TiXmlUnknown();
00876         }
00877         else if (    IsAlpha( *(p+1), encoding )
00878                           || *(p+1) == '_' )
00879         {
00880                 #ifdef DEBUG_PARSER
00881                         TIXML_LOG( "XML parsing Element\n" );
00882                 #endif
00883                 returnNode = new TiXmlElement( "" );
00884         }
00885         else
00886         {
00887                 #ifdef DEBUG_PARSER
00888                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00889                 #endif
00890                 returnNode = new TiXmlUnknown();
00891         }
00892 
00893         if ( returnNode )
00894         {
00895                 
00896                 returnNode->parent = this;
00897         }
00898         return returnNode;
00899 }
00900 
00901 #ifdef TIXML_USE_STL
00902 
00903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00904 {
00905         
00906         
00907         while( in->good() )
00908         {
00909                 int c = in->get();
00910                 if ( c <= 0 )
00911                 {
00912                         TiXmlDocument* document = GetDocument();
00913                         if ( document )
00914                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00915                         return;
00916                 }
00917                 (*tag) += (char) c ;
00918                 
00919                 if ( c == '>' )
00920                         break;
00921         }
00922 
00923         if ( tag->length() < 3 ) return;
00924 
00925         
00926         
00927 
00928         if (    tag->at( tag->length() - 1 ) == '>' 
00929                  && tag->at( tag->length() - 2 ) == '/' )
00930         {
00931                 
00932                 return;
00933         }
00934         else if ( tag->at( tag->length() - 1 ) == '>' )
00935         {
00936                 
00937                 
00938                 
00939                 
00940                 
00941                 for ( ;; )
00942                 {
00943                         StreamWhiteSpace( in, tag );
00944 
00945                         
00946                         if ( in->good() && in->peek() != '<' ) 
00947                         {
00948                                 
00949                                 TiXmlText text( "" );
00950                                 text.StreamIn( in, tag );
00951 
00952                                 
00953                                 
00954                                 continue;
00955                         }
00956 
00957                         
00958                         
00959                         if ( !in->good() ) return;
00960                         assert( in->peek() == '<' );
00961                         int tagIndex = (int) tag->length();
00962 
00963                         bool closingTag = false;
00964                         bool firstCharFound = false;
00965 
00966                         for( ;; )
00967                         {
00968                                 if ( !in->good() )
00969                                         return;
00970 
00971                                 int c = in->peek();
00972                                 if ( c <= 0 )
00973                                 {
00974                                         TiXmlDocument* document = GetDocument();
00975                                         if ( document )
00976                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00977                                         return;
00978                                 }
00979                                 
00980                                 if ( c == '>' )
00981                                         break;
00982 
00983                                 *tag += (char) c;
00984                                 in->get();
00985 
00986                                 
00987                                 if ( c == '[' && tag->size() >= 9 )
00988                                 {
00989                                         size_t len = tag->size();
00990                                         const char* start = tag->c_str() + len - 9;
00991                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00992                                                 assert( !closingTag );
00993                                                 break;
00994                                         }
00995                                 }
00996 
00997                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00998                                 {
00999                                         firstCharFound = true;
01000                                         if ( c == '/' )
01001                                                 closingTag = true;
01002                                 }
01003                         }
01004                         
01005                         
01006                         if ( closingTag )
01007                         {
01008                                 if ( !in->good() )
01009                                         return;
01010 
01011                                 int c = in->get();
01012                                 if ( c <= 0 )
01013                                 {
01014                                         TiXmlDocument* document = GetDocument();
01015                                         if ( document )
01016                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01017                                         return;
01018                                 }
01019                                 assert( c == '>' );
01020                                 *tag += (char) c;
01021 
01022                                 
01023                                 return;
01024                         }
01025                         else
01026                         {
01027                                 
01028                                 const char* tagloc = tag->c_str() + tagIndex;
01029                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01030                                 if ( !node )
01031                                         return;
01032                                 node->StreamIn( in, tag );
01033                                 delete node;
01034                                 node = 0;
01035 
01036                                 
01037                         }
01038                 }
01039         }
01040 }
01041 #endif
01042 
01043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01044 {
01045         p = SkipWhiteSpace( p, encoding );
01046         TiXmlDocument* document = GetDocument();
01047 
01048         if ( !p || !*p )
01049         {
01050                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01051                 return 0;
01052         }
01053 
01054         if ( data )
01055         {
01056                 data->Stamp( p, encoding );
01057                 location = data->Cursor();
01058         }
01059 
01060         if ( *p != '<' )
01061         {
01062                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01063                 return 0;
01064         }
01065 
01066         p = SkipWhiteSpace( p+1, encoding );
01067 
01068         
01069         const char* pErr = p;
01070 
01071     p = ReadName( p, &value, encoding );
01072         if ( !p || !*p )
01073         {
01074                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01075                 return 0;
01076         }
01077 
01078     TIXML_STRING endTag ("</");
01079         endTag += value;
01080 
01081         
01082         
01083         while ( p && *p )
01084         {
01085                 pErr = p;
01086                 p = SkipWhiteSpace( p, encoding );
01087                 if ( !p || !*p )
01088                 {
01089                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01090                         return 0;
01091                 }
01092                 if ( *p == '/' )
01093                 {
01094                         ++p;
01095                         
01096                         if ( *p  != '>' )
01097                         {
01098                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );             
01099                                 return 0;
01100                         }
01101                         return (p+1);
01102                 }
01103                 else if ( *p == '>' )
01104                 {
01105                         
01106                         
01107                         
01108                         ++p;
01109                         p = ReadValue( p, data, encoding );             
01110                         if ( !p || !*p ) {
01111                                 
01112                                 
01113                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01114                                 return 0;
01115                         }
01116 
01117                         
01118                         
01119                         
01120                         
01121                         
01122                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01123                         {
01124                                 p += endTag.length();
01125                                 p = SkipWhiteSpace( p, encoding );
01126                                 if ( p && *p && *p == '>' ) {
01127                                         ++p;
01128                                         return p;
01129                                 }
01130                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01131                                 return 0;
01132                         }
01133                         else
01134                         {
01135                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01136                                 return 0;
01137                         }
01138                 }
01139                 else
01140                 {
01141                         
01142                         TiXmlAttribute* attrib = new TiXmlAttribute();
01143                         if ( !attrib )
01144                         {
01145                                 return 0;
01146                         }
01147 
01148                         attrib->SetDocument( document );
01149                         pErr = p;
01150                         p = attrib->Parse( p, data, encoding );
01151 
01152                         if ( !p || !*p )
01153                         {
01154                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01155                                 delete attrib;
01156                                 return 0;
01157                         }
01158 
01159                         
01160                         #ifdef TIXML_USE_STL
01161                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01162                         #else
01163                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01164                         #endif
01165                         if ( node )
01166                         {
01167                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01168                                 delete attrib;
01169                                 return 0;
01170                         }
01171 
01172                         attributeSet.Add( attrib );
01173                 }
01174         }
01175         return p;
01176 }
01177 
01178 
01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01180 {
01181         TiXmlDocument* document = GetDocument();
01182 
01183         
01184         const char* pWithWhiteSpace = p;
01185         p = SkipWhiteSpace( p, encoding );
01186 
01187         while ( p && *p )
01188         {
01189                 if ( *p != '<' )
01190                 {
01191                         
01192                         TiXmlText* textNode = new TiXmlText( "" );
01193 
01194                         if ( !textNode )
01195                         {
01196                             return 0;
01197                         }
01198 
01199                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01200                         {
01201                                 p = textNode->Parse( p, data, encoding );
01202                         }
01203                         else
01204                         {
01205                                 
01206                                 
01207                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01208                         }
01209 
01210                         if ( !textNode->Blank() )
01211                                 LinkEndChild( textNode );
01212                         else
01213                                 delete textNode;
01214                 } 
01215                 else 
01216                 {
01217                         
01218                         
01219                         
01220                         if ( StringEqual( p, "</", false, encoding ) )
01221                         {
01222                                 return p;
01223                         }
01224                         else
01225                         {
01226                                 TiXmlNode* node = Identify( p, encoding );
01227                                 if ( node )
01228                                 {
01229                                         p = node->Parse( p, data, encoding );
01230                                         LinkEndChild( node );
01231                                 }                               
01232                                 else
01233                                 {
01234                                         return 0;
01235                                 }
01236                         }
01237                 }
01238                 pWithWhiteSpace = p;
01239                 p = SkipWhiteSpace( p, encoding );
01240         }
01241 
01242         if ( !p )
01243         {
01244                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01245         }       
01246         return p;
01247 }
01248 
01249 
01250 #ifdef TIXML_USE_STL
01251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01252 {
01253         while ( in->good() )
01254         {
01255                 int c = in->get();      
01256                 if ( c <= 0 )
01257                 {
01258                         TiXmlDocument* document = GetDocument();
01259                         if ( document )
01260                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01261                         return;
01262                 }
01263                 (*tag) += (char) c;
01264 
01265                 if ( c == '>' )
01266                 {
01267                         
01268                         return;         
01269                 }
01270         }
01271 }
01272 #endif
01273 
01274 
01275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01276 {
01277         TiXmlDocument* document = GetDocument();
01278         p = SkipWhiteSpace( p, encoding );
01279 
01280         if ( data )
01281         {
01282                 data->Stamp( p, encoding );
01283                 location = data->Cursor();
01284         }
01285         if ( !p || !*p || *p != '<' )
01286         {
01287                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01288                 return 0;
01289         }
01290         ++p;
01291     value = "";
01292 
01293         while ( p && *p && *p != '>' )
01294         {
01295                 value += *p;
01296                 ++p;
01297         }
01298 
01299         if ( !p )
01300         {
01301                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01302         }
01303         else if ( *p == '>' )
01304         {
01305                 return p+1;
01306         }
01307         return p;
01308 }
01309 
01310 #ifdef TIXML_USE_STL
01311 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01312 {
01313         while ( in->good() )
01314         {
01315                 int c = in->get();      
01316                 if ( c <= 0 )
01317                 {
01318                         TiXmlDocument* document = GetDocument();
01319                         if ( document )
01320                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01321                         return;
01322                 }
01323 
01324                 (*tag) += (char) c;
01325 
01326                 if ( c == '>' 
01327                          && tag->at( tag->length() - 2 ) == '-'
01328                          && tag->at( tag->length() - 3 ) == '-' )
01329                 {
01330                         
01331                         return;         
01332                 }
01333         }
01334 }
01335 #endif
01336 
01337 
01338 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01339 {
01340         TiXmlDocument* document = GetDocument();
01341         value = "";
01342 
01343         p = SkipWhiteSpace( p, encoding );
01344 
01345         if ( data )
01346         {
01347                 data->Stamp( p, encoding );
01348                 location = data->Cursor();
01349         }
01350         const char* startTag = "<!--";
01351         const char* endTag   = "-->";
01352 
01353         if ( !StringEqual( p, startTag, false, encoding ) )
01354         {
01355                 if ( document )
01356                         document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01357                 return 0;
01358         }
01359         p += strlen( startTag );
01360 
01361         
01362         
01363         
01364         
01365         
01366         
01367 
01368 
01369 
01370 
01371 
01372 
01373 
01374 
01375 
01376 
01377 
01378 
01379     value = "";
01380         
01381         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01382         {
01383                 value.append( p, 1 );
01384                 ++p;
01385         }
01386         if ( p && *p ) 
01387                 p += strlen( endTag );
01388 
01389         return p;
01390 }
01391 
01392 
01393 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01394 {
01395         p = SkipWhiteSpace( p, encoding );
01396         if ( !p || !*p ) return 0;
01397 
01398         if ( data )
01399         {
01400                 data->Stamp( p, encoding );
01401                 location = data->Cursor();
01402         }
01403         
01404         const char* pErr = p;
01405         p = ReadName( p, &name, encoding );
01406         if ( !p || !*p )
01407         {
01408                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01409                 return 0;
01410         }
01411         p = SkipWhiteSpace( p, encoding );
01412         if ( !p || !*p || *p != '=' )
01413         {
01414                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01415                 return 0;
01416         }
01417 
01418         ++p;    
01419         p = SkipWhiteSpace( p, encoding );
01420         if ( !p || !*p )
01421         {
01422                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01423                 return 0;
01424         }
01425         
01426         const char* end;
01427         const char SINGLE_QUOTE = '\'';
01428         const char DOUBLE_QUOTE = '\"';
01429 
01430         if ( *p == SINGLE_QUOTE )
01431         {
01432                 ++p;
01433                 end = "\'";             
01434                 p = ReadText( p, &value, false, end, false, encoding );
01435         }
01436         else if ( *p == DOUBLE_QUOTE )
01437         {
01438                 ++p;
01439                 end = "\"";             
01440                 p = ReadText( p, &value, false, end, false, encoding );
01441         }
01442         else
01443         {
01444                 
01445                 
01446                 
01447                 value = "";
01448                 while (    p && *p                                                                                      
01449                                 && !IsWhiteSpace( *p )                                                          
01450                                 && *p != '/' && *p != '>' )                                                     
01451                 {
01452                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01453                                 
01454                                 
01455                                 
01456                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01457                                 return 0;
01458                         }
01459                         value += *p;
01460                         ++p;
01461                 }
01462         }
01463         return p;
01464 }
01465 
01466 #ifdef TIXML_USE_STL
01467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01468 {
01469         while ( in->good() )
01470         {
01471                 int c = in->peek();     
01472                 if ( !cdata && (c == '<' ) ) 
01473                 {
01474                         return;
01475                 }
01476                 if ( c <= 0 )
01477                 {
01478                         TiXmlDocument* document = GetDocument();
01479                         if ( document )
01480                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01481                         return;
01482                 }
01483 
01484                 (*tag) += (char) c;
01485                 in->get();      
01486 
01487                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01488                         size_t len = tag->size();
01489                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01490                                 
01491                                 return;
01492                         }
01493                 }    
01494         }
01495 }
01496 #endif
01497 
01498 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01499 {
01500         value = "";
01501         TiXmlDocument* document = GetDocument();
01502 
01503         if ( data )
01504         {
01505                 data->Stamp( p, encoding );
01506                 location = data->Cursor();
01507         }
01508 
01509         const char* const startTag = "<![CDATA[";
01510         const char* const endTag   = "]]>";
01511 
01512         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01513         {
01514                 cdata = true;
01515 
01516                 if ( !StringEqual( p, startTag, false, encoding ) )
01517                 {
01518                         if ( document )
01519                                 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01520                         return 0;
01521                 }
01522                 p += strlen( startTag );
01523 
01524                 
01525                 while (    p && *p
01526                                 && !StringEqual( p, endTag, false, encoding )
01527                           )
01528                 {
01529                         value += *p;
01530                         ++p;
01531                 }
01532 
01533                 TIXML_STRING dummy; 
01534                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01535                 return p;
01536         }
01537         else
01538         {
01539                 bool ignoreWhite = true;
01540 
01541                 const char* end = "<";
01542                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01543                 if ( p )
01544                         return p-1;     
01545                 return 0;
01546         }
01547 }
01548 
01549 #ifdef TIXML_USE_STL
01550 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01551 {
01552         while ( in->good() )
01553         {
01554                 int c = in->get();
01555                 if ( c <= 0 )
01556                 {
01557                         TiXmlDocument* document = GetDocument();
01558                         if ( document )
01559                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01560                         return;
01561                 }
01562                 (*tag) += (char) c;
01563 
01564                 if ( c == '>' )
01565                 {
01566                         
01567                         return;
01568                 }
01569         }
01570 }
01571 #endif
01572 
01573 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01574 {
01575         p = SkipWhiteSpace( p, _encoding );
01576         
01577         
01578         TiXmlDocument* document = GetDocument();
01579         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01580         {
01581                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01582                 return 0;
01583         }
01584         if ( data )
01585         {
01586                 data->Stamp( p, _encoding );
01587                 location = data->Cursor();
01588         }
01589         p += 5;
01590 
01591         version = "";
01592         encoding = "";
01593         standalone = "";
01594 
01595         while ( p && *p )
01596         {
01597                 if ( *p == '>' )
01598                 {
01599                         ++p;
01600                         return p;
01601                 }
01602 
01603                 p = SkipWhiteSpace( p, _encoding );
01604                 if ( StringEqual( p, "version", true, _encoding ) )
01605                 {
01606                         TiXmlAttribute attrib;
01607                         p = attrib.Parse( p, data, _encoding );         
01608                         version = attrib.Value();
01609                 }
01610                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01611                 {
01612                         TiXmlAttribute attrib;
01613                         p = attrib.Parse( p, data, _encoding );         
01614                         encoding = attrib.Value();
01615                 }
01616                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01617                 {
01618                         TiXmlAttribute attrib;
01619                         p = attrib.Parse( p, data, _encoding );         
01620                         standalone = attrib.Value();
01621                 }
01622                 else
01623                 {
01624                         
01625                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01626                                 ++p;
01627                 }
01628         }
01629         return 0;
01630 }
01631 
01632 bool TiXmlText::Blank() const
01633 {
01634         for ( unsigned i=0; i<value.length(); i++ )
01635                 if ( !IsWhiteSpace( value[i] ) )
01636                         return false;
01637         return true;
01638 }
01639