tinyxmlparser.cpp
Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied 
00006 warranty. In no event will the authors be held liable for any 
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any 
00010 purpose, including commercial applications, and to alter it and 
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must 
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and 
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source 
00022 distribution.
00023 */
00024 
00025 #include <ctype.h>
00026 #include <stddef.h>
00027 
00028 #include "tinyxml.h"
00029 
00030 //#define DEBUG_PARSER
00031 #if defined( DEBUG_PARSER )
00032 #       if defined( DEBUG ) && defined( _MSC_VER )
00033 #               include <windows.h>
00034 #               define TIXML_LOG OutputDebugString
00035 #       else
00036 #               define TIXML_LOG printf
00037 #       endif
00038 #endif
00039 
00040 // Note tha "PutString" hardcodes the same list. This
00041 // is less flexible than it appears. Changing the entries
00042 // or order will break putstring.       
00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00044 {
00045         { "&amp;",  5, '&' },
00046         { "&lt;",   4, '<' },
00047         { "&gt;",   4, '>' },
00048         { "&quot;", 6, '\"' },
00049         { "&apos;", 6, '\'' }
00050 };
00051 
00052 // Bunch of unicode info at:
00053 //              http://www.unicode.org/faq/utf_bom.html
00054 // Including the basic of this table, which determines the #bytes in the
00055 // sequence from the lead byte. 1 placed for invalid sequences --
00056 // although the result will be junk, pass it through as much as possible.
00057 // Beware of the non-characters in UTF-8:       
00058 //                              ef bb bf (Microsoft "lead bytes")
00059 //                              ef bf be
00060 //                              ef bf bf 
00061 
00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00065 
00066 const int TiXmlBase::utf8ByteTable[256] = 
00067 {
00068         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00069                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00070                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00071                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00072                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00073                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00074                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00075                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90 
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0 
00080                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0 
00081                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00082                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00083                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00084                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00085 };
00086 
00087 
00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00089 {
00090         const unsigned long BYTE_MASK = 0xBF;
00091         const unsigned long BYTE_MARK = 0x80;
00092         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00093 
00094         if (input < 0x80) 
00095                 *length = 1;
00096         else if ( input < 0x800 )
00097                 *length = 2;
00098         else if ( input < 0x10000 )
00099                 *length = 3;
00100         else if ( input < 0x200000 )
00101                 *length = 4;
00102         else
00103                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00104 
00105         output += *length;
00106 
00107         // Scary scary fall throughs.
00108         switch (*length) 
00109         {
00110                 case 4:
00111                         --output; 
00112                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00113                         input >>= 6;
00114                 case 3:
00115                         --output; 
00116                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00117                         input >>= 6;
00118                 case 2:
00119                         --output; 
00120                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00121                         input >>= 6;
00122                 case 1:
00123                         --output; 
00124                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00125         }
00126 }
00127 
00128 
00129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00130 {
00131         // This will only work for low-ascii, everything else is assumed to be a valid
00132         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00133         // to figure out alhabetical vs. not across encoding. So take a very 
00134         // conservative approach.
00135 
00136 //      if ( encoding == TIXML_ENCODING_UTF8 )
00137 //      {
00138                 if ( anyByte < 127 )
00139                         return isalpha( anyByte );
00140                 else
00141                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00142 //      }
00143 //      else
00144 //      {
00145 //              return isalpha( anyByte );
00146 //      }
00147 }
00148 
00149 
00150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00151 {
00152         // This will only work for low-ascii, everything else is assumed to be a valid
00153         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00154         // to figure out alhabetical vs. not across encoding. So take a very 
00155         // conservative approach.
00156 
00157 //      if ( encoding == TIXML_ENCODING_UTF8 )
00158 //      {
00159                 if ( anyByte < 127 )
00160                         return isalnum( anyByte );
00161                 else
00162                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00163 //      }
00164 //      else
00165 //      {
00166 //              return isalnum( anyByte );
00167 //      }
00168 }
00169 
00170 
00171 class TiXmlParsingData
00172 {
00173         friend class TiXmlDocument;
00174   public:
00175         void Stamp( const char* now, TiXmlEncoding encoding );
00176 
00177         const TiXmlCursor& Cursor()     { return cursor; }
00178 
00179   private:
00180         // Only used by the document!
00181         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00182         {
00183                 assert( start );
00184                 stamp = start;
00185                 tabsize = _tabsize;
00186                 cursor.row = row;
00187                 cursor.col = col;
00188         }
00189 
00190         TiXmlCursor             cursor;
00191         const char*             stamp;
00192         int                             tabsize;
00193 };
00194 
00195 
00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00197 {
00198         assert( now );
00199 
00200         // Do nothing if the tabsize is 0.
00201         if ( tabsize < 1 )
00202         {
00203                 return;
00204         }
00205 
00206         // Get the current row, column.
00207         int row = cursor.row;
00208         int col = cursor.col;
00209         const char* p = stamp;
00210         assert( p );
00211 
00212         while ( p < now )
00213         {
00214                 // Treat p as unsigned, so we have a happy compiler.
00215                 const unsigned char* pU = (const unsigned char*)p;
00216 
00217                 // Code contributed by Fletcher Dunn: (modified by lee)
00218                 switch (*pU) {
00219                         case 0:
00220                                 // We *should* never get here, but in case we do, don't
00221                                 // advance past the terminating null character, ever
00222                                 return;
00223 
00224                         case '\r':
00225                                 // bump down to the next line
00226                                 ++row;
00227                                 col = 0;                                
00228                                 // Eat the character
00229                                 ++p;
00230 
00231                                 // Check for \r\n sequence, and treat this as a single character
00232                                 if (*p == '\n') {
00233                                         ++p;
00234                                 }
00235                                 break;
00236 
00237                         case '\n':
00238                                 // bump down to the next line
00239                                 ++row;
00240                                 col = 0;
00241 
00242                                 // Eat the character
00243                                 ++p;
00244 
00245                                 // Check for \n\r sequence, and treat this as a single
00246                                 // character.  (Yes, this bizarre thing does occur still
00247                                 // on some arcane platforms...)
00248                                 if (*p == '\r') {
00249                                         ++p;
00250                                 }
00251                                 break;
00252 
00253                         case '\t':
00254                                 // Eat the character
00255                                 ++p;
00256 
00257                                 // Skip to next tab stop
00258                                 col = (col / tabsize + 1) * tabsize;
00259                                 break;
00260 
00261                         case TIXML_UTF_LEAD_0:
00262                                 if ( encoding == TIXML_ENCODING_UTF8 )
00263                                 {
00264                                         if ( *(p+1) && *(p+2) )
00265                                         {
00266                                                 // In these cases, don't advance the column. These are
00267                                                 // 0-width spaces.
00268                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00269                                                         p += 3; 
00270                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00271                                                         p += 3; 
00272                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00273                                                         p += 3; 
00274                                                 else
00275                                                         { p +=3; ++col; }       // A normal character.
00276                                         }
00277                                 }
00278                                 else
00279                                 {
00280                                         ++p;
00281                                         ++col;
00282                                 }
00283                                 break;
00284 
00285                         default:
00286                                 if ( encoding == TIXML_ENCODING_UTF8 )
00287                                 {
00288                                         // Eat the 1 to 4 byte utf8 character.
00289                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00290                                         if ( step == 0 )
00291                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00292                                         p += step;
00293 
00294                                         // Just advance one column, of course.
00295                                         ++col;
00296                                 }
00297                                 else
00298                                 {
00299                                         ++p;
00300                                         ++col;
00301                                 }
00302                                 break;
00303                 }
00304         }
00305         cursor.row = row;
00306         cursor.col = col;
00307         assert( cursor.row >= -1 );
00308         assert( cursor.col >= -1 );
00309         stamp = p;
00310         assert( stamp );
00311 }
00312 
00313 
00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00315 {
00316         if ( !p || !*p )
00317         {
00318                 return 0;
00319         }
00320         if ( encoding == TIXML_ENCODING_UTF8 )
00321         {
00322                 while ( *p )
00323                 {
00324                         const unsigned char* pU = (const unsigned char*)p;
00325                         
00326                         // Skip the stupid Microsoft UTF-8 Byte order marks
00327                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00328                                  && *(pU+1)==TIXML_UTF_LEAD_1 
00329                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00330                         {
00331                                 p += 3;
00332                                 continue;
00333                         }
00334                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00335                                  && *(pU+1)==0xbfU
00336                                  && *(pU+2)==0xbeU )
00337                         {
00338                                 p += 3;
00339                                 continue;
00340                         }
00341                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00342                                  && *(pU+1)==0xbfU
00343                                  && *(pU+2)==0xbfU )
00344                         {
00345                                 p += 3;
00346                                 continue;
00347                         }
00348 
00349                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00350                                 ++p;
00351                         else
00352                                 break;
00353                 }
00354         }
00355         else
00356         {
00357                 while (*p && (IsWhiteSpace( *p ) || *p == '\n' || *p =='\r'))
00358                         ++p;
00359         }
00360 
00361         return p;
00362 }
00363 
00364 #ifdef TIXML_USE_STL
00365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00366 {
00367         for( ;; )
00368         {
00369                 if ( !in->good() ) return false;
00370 
00371                 int c = in->peek();
00372                 // At this scope, we can't get to a document. So fail silently.
00373                 if ( !IsWhiteSpace( c ) || c <= 0 )
00374                         return true;
00375 
00376                 *tag += (char) in->get();
00377         }
00378 }
00379 
00380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00381 {
00382         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00383         while ( in->good() )
00384         {
00385                 int c = in->peek();
00386                 if ( c == character )
00387                         return true;
00388                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00389                         return false;
00390 
00391                 in->get();
00392                 *tag += (char) c;
00393         }
00394         return false;
00395 }
00396 #endif
00397 
00398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00399 // "assign" optimization removes over 10% of the execution time.
00400 //
00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00402 {
00403         // Oddly, not supported on some comilers,
00404         //name->clear();
00405         // So use this:
00406         *name = "";
00407         assert( p );
00408 
00409         // Names start with letters or underscores.
00410         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00411         // algorithm is generous.
00412         //
00413         // After that, they can be letters, underscores, numbers,
00414         // hyphens, or colons. (Colons are valid ony for namespaces,
00415         // but tinyxml can't tell namespaces from names.)
00416         if (    p && *p 
00417                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00418         {
00419                 const char* start = p;
00420                 while(          p && *p
00421                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding ) 
00422                                                  || *p == '_'
00423                                                  || *p == '-'
00424                                                  || *p == '.'
00425                                                  || *p == ':' ) )
00426                 {
00427                         //(*name) += *p; // expensive
00428                         ++p;
00429                 }
00430                 if ( p-start > 0 ) {
00431                         name->assign( start, p-start );
00432                 }
00433                 return p;
00434         }
00435         return 0;
00436 }
00437 
00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00439 {
00440         // Presume an entity, and pull it out.
00441     TIXML_STRING ent;
00442         int i;
00443         *length = 0;
00444 
00445         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00446         {
00447                 unsigned long ucs = 0;
00448                 ptrdiff_t delta = 0;
00449                 unsigned mult = 1;
00450 
00451                 if ( *(p+2) == 'x' )
00452                 {
00453                         // Hexadecimal.
00454                         if ( !*(p+3) ) return 0;
00455 
00456                         const char* q = p+3;
00457                         q = strchr( q, ';' );
00458 
00459                         if ( !q || !*q ) return 0;
00460 
00461                         delta = q-p;
00462                         --q;
00463 
00464                         while ( *q != 'x' )
00465                         {
00466                                 if ( *q >= '0' && *q <= '9' )
00467                                         ucs += mult * (*q - '0');
00468                                 else if ( *q >= 'a' && *q <= 'f' )
00469                                         ucs += mult * (*q - 'a' + 10);
00470                                 else if ( *q >= 'A' && *q <= 'F' )
00471                                         ucs += mult * (*q - 'A' + 10 );
00472                                 else 
00473                                         return 0;
00474                                 mult *= 16;
00475                                 --q;
00476                         }
00477                 }
00478                 else
00479                 {
00480                         // Decimal.
00481                         if ( !*(p+2) ) return 0;
00482 
00483                         const char* q = p+2;
00484                         q = strchr( q, ';' );
00485 
00486                         if ( !q || !*q ) return 0;
00487 
00488                         delta = q-p;
00489                         --q;
00490 
00491                         while ( *q != '#' )
00492                         {
00493                                 if ( *q >= '0' && *q <= '9' )
00494                                         ucs += mult * (*q - '0');
00495                                 else 
00496                                         return 0;
00497                                 mult *= 10;
00498                                 --q;
00499                         }
00500                 }
00501                 if ( encoding == TIXML_ENCODING_UTF8 )
00502                 {
00503                         // convert the UCS to UTF-8
00504                         ConvertUTF32ToUTF8( ucs, value, length );
00505                 }
00506                 else
00507                 {
00508                         *value = (char)ucs;
00509                         *length = 1;
00510                 }
00511                 return p + delta + 1;
00512         }
00513 
00514         // Now try to match it.
00515         for( i=0; i<NUM_ENTITY; ++i )
00516         {
00517                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00518                 {
00519                         assert( strlen( entity[i].str ) == entity[i].strLength );
00520                         *value = entity[i].chr;
00521                         *length = 1;
00522                         return ( p + entity[i].strLength );
00523                 }
00524         }
00525 
00526         // So it wasn't an entity, its unrecognized, or something like that.
00527         *value = *p;    // Don't put back the last one, since we return it!
00528         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00529                                         // Just writes strange XML.
00530         return p+1;
00531 }
00532 
00533 
00534 bool TiXmlBase::StringEqual( const char* p,
00535                                                          const char* tag,
00536                                                          bool ignoreCase,
00537                                                          TiXmlEncoding encoding )
00538 {
00539         assert( p );
00540         assert( tag );
00541         if ( !p || !*p )
00542         {
00543                 assert( 0 );
00544                 return false;
00545         }
00546 
00547         const char* q = p;
00548 
00549         if ( ignoreCase )
00550         {
00551                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00552                 {
00553                         ++q;
00554                         ++tag;
00555                 }
00556 
00557                 if ( *tag == 0 )
00558                         return true;
00559         }
00560         else
00561         {
00562                 while ( *q && *tag && *q == *tag )
00563                 {
00564                         ++q;
00565                         ++tag;
00566                 }
00567 
00568                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00569                         return true;
00570         }
00571         return false;
00572 }
00573 
00574 const char* TiXmlBase::ReadText(        const char* p, 
00575                                                                         TIXML_STRING * text, 
00576                                                                         bool trimWhiteSpace, 
00577                                                                         const char* endTag, 
00578                                                                         bool caseInsensitive,
00579                                                                         TiXmlEncoding encoding )
00580 {
00581     *text = "";
00582         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00583                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00584         {
00585                 // Keep all the white space.
00586                 while (    p && *p
00587                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00588                           )
00589                 {
00590                         int len;
00591                         char cArr[4] = { 0, 0, 0, 0 };
00592                         p = GetChar( p, cArr, &len, encoding );
00593                         text->append( cArr, len );
00594                 }
00595         }
00596         else
00597         {
00598                 bool whitespace = false;
00599 
00600                 // Remove leading white space:
00601                 p = SkipWhiteSpace( p, encoding );
00602                 while (    p && *p
00603                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00604                 {
00605                         if ( *p == '\r' || *p == '\n' )
00606                         {
00607                                 whitespace = true;
00608                                 ++p;
00609                         }
00610                         else if ( IsWhiteSpace( *p ) )
00611                         {
00612                                 whitespace = true;
00613                                 ++p;
00614                         }
00615                         else
00616                         {
00617                                 // If we've found whitespace, add it before the
00618                                 // new character. Any whitespace just becomes a space.
00619                                 if ( whitespace )
00620                                 {
00621                                         (*text) += ' ';
00622                                         whitespace = false;
00623                                 }
00624                                 int len;
00625                                 char cArr[4] = { 0, 0, 0, 0 };
00626                                 p = GetChar( p, cArr, &len, encoding );
00627                                 if ( len == 1 )
00628                                         (*text) += cArr[0];     // more efficient
00629                                 else
00630                                         text->append( cArr, len );
00631                         }
00632                 }
00633         }
00634         if ( p ) 
00635                 p += strlen( endTag );
00636         return p;
00637 }
00638 
00639 #ifdef TIXML_USE_STL
00640 
00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00642 {
00643         // The basic issue with a document is that we don't know what we're
00644         // streaming. Read something presumed to be a tag (and hope), then
00645         // identify it, and call the appropriate stream method on the tag.
00646         //
00647         // This "pre-streaming" will never read the closing ">" so the
00648         // sub-tag can orient itself.
00649 
00650         if ( !StreamTo( in, '<', tag ) ) 
00651         {
00652                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00653                 return;
00654         }
00655 
00656         while ( in->good() )
00657         {
00658                 int tagIndex = (int) tag->length();
00659                 while ( in->good() && in->peek() != '>' )
00660                 {
00661                         int c = in->get();
00662                         if ( c <= 0 )
00663                         {
00664                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00665                                 break;
00666                         }
00667                         (*tag) += (char) c;
00668                 }
00669 
00670                 if ( in->good() )
00671                 {
00672                         // We now have something we presume to be a node of 
00673                         // some sort. Identify it, and call the node to
00674                         // continue streaming.
00675                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00676 
00677                         if ( node )
00678                         {
00679                                 node->StreamIn( in, tag );
00680                                 bool isElement = node->ToElement() != 0;
00681                                 delete node;
00682                                 node = 0;
00683 
00684                                 // If this is the root element, we're done. Parsing will be
00685                                 // done by the >> operator.
00686                                 if ( isElement )
00687                                 {
00688                                         return;
00689                                 }
00690                         }
00691                         else
00692                         {
00693                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00694                                 return;
00695                         }
00696                 }
00697         }
00698         // We should have returned sooner.
00699         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00700 }
00701 
00702 #endif
00703 
00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00705 {
00706         ClearError();
00707 
00708         // Parse away, at the document level. Since a document
00709         // contains nothing but other tags, most of what happens
00710         // here is skipping white space.
00711         if ( !p || !*p )
00712         {
00713                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00714                 return 0;
00715         }
00716 
00717         // Note that, for a document, this needs to come
00718         // before the while space skip, so that parsing
00719         // starts from the pointer we are given.
00720         location.Clear();
00721         if ( prevData )
00722         {
00723                 location.row = prevData->cursor.row;
00724                 location.col = prevData->cursor.col;
00725         }
00726         else
00727         {
00728                 location.row = 0;
00729                 location.col = 0;
00730         }
00731         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00732         location = data.Cursor();
00733 
00734         if ( encoding == TIXML_ENCODING_UNKNOWN )
00735         {
00736                 // Check for the Microsoft UTF-8 lead bytes.
00737                 const unsigned char* pU = (const unsigned char*)p;
00738                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00739                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00740                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00741                 {
00742                         encoding = TIXML_ENCODING_UTF8;
00743                         useMicrosoftBOM = true;
00744                 }
00745         }
00746 
00747     p = SkipWhiteSpace( p, encoding );
00748         if ( !p )
00749         {
00750                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00751                 return 0;
00752         }
00753 
00754         while ( p && *p )
00755         {
00756                 TiXmlNode* node = Identify( p, encoding );
00757                 if ( node )
00758                 {
00759                         p = node->Parse( p, &data, encoding );
00760                         LinkEndChild( node );
00761                 }
00762                 else
00763                 {
00764                         break;
00765                 }
00766 
00767                 // Did we get encoding info?
00768                 if (    encoding == TIXML_ENCODING_UNKNOWN
00769                          && node->ToDeclaration() )
00770                 {
00771                         TiXmlDeclaration* dec = node->ToDeclaration();
00772                         const char* enc = dec->Encoding();
00773                         assert( enc );
00774 
00775                         if ( *enc == 0 )
00776                                 encoding = TIXML_ENCODING_UTF8;
00777                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00778                                 encoding = TIXML_ENCODING_UTF8;
00779                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00780                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00781                         else 
00782                                 encoding = TIXML_ENCODING_LEGACY;
00783                 }
00784 
00785                 p = SkipWhiteSpace( p, encoding );
00786         }
00787 
00788         // Was this empty?
00789         if ( !firstChild ) {
00790                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00791                 return 0;
00792         }
00793 
00794         // All is well.
00795         return p;
00796 }
00797 
00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00799 {       
00800         // The first error in a chain is more accurate - don't set again!
00801         if ( error )
00802                 return;
00803 
00804         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00805         error   = true;
00806         errorId = err;
00807         errorDesc = errorString[ errorId ];
00808 
00809         errorLocation.Clear();
00810         if ( pError && data )
00811         {
00812                 data->Stamp( pError, encoding );
00813                 errorLocation = data->Cursor();
00814         }
00815 }
00816 
00817 
00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00819 {
00820         TiXmlNode* returnNode = 0;
00821 
00822         p = SkipWhiteSpace( p, encoding );
00823         if( !p || !*p || *p != '<' )
00824         {
00825                 return 0;
00826         }
00827 
00828         TiXmlDocument* doc = GetDocument();
00829         p = SkipWhiteSpace( p, encoding );
00830 
00831         if ( !p || !*p )
00832         {
00833                 return 0;
00834         }
00835 
00836         // What is this thing? 
00837         // - Elements start with a letter or underscore, but xml is reserved.
00838         // - Comments: <!--
00839         // - Decleration: <?xml
00840         // - Everthing else is unknown to tinyxml.
00841         //
00842 
00843         const char* xmlHeader = { "<?xml" };
00844         const char* commentHeader = { "<!--" };
00845         const char* dtdHeader = { "<!" };
00846         const char* cdataHeader = { "<![CDATA[" };
00847 
00848         if ( StringEqual( p, xmlHeader, true, encoding ) )
00849         {
00850                 #ifdef DEBUG_PARSER
00851                         TIXML_LOG( "XML parsing Declaration\n" );
00852                 #endif
00853                 returnNode = new TiXmlDeclaration();
00854         }
00855         else if ( StringEqual( p, commentHeader, false, encoding ) )
00856         {
00857                 #ifdef DEBUG_PARSER
00858                         TIXML_LOG( "XML parsing Comment\n" );
00859                 #endif
00860                 returnNode = new TiXmlComment();
00861         }
00862         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00863         {
00864                 #ifdef DEBUG_PARSER
00865                         TIXML_LOG( "XML parsing CDATA\n" );
00866                 #endif
00867                 TiXmlText* text = new TiXmlText( "" );
00868                 text->SetCDATA( true );
00869                 returnNode = text;
00870         }
00871         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00872         {
00873                 #ifdef DEBUG_PARSER
00874                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00875                 #endif
00876                 returnNode = new TiXmlUnknown();
00877         }
00878         else if (    IsAlpha( *(p+1), encoding )
00879                           || *(p+1) == '_' )
00880         {
00881                 #ifdef DEBUG_PARSER
00882                         TIXML_LOG( "XML parsing Element\n" );
00883                 #endif
00884                 returnNode = new TiXmlElement( "" );
00885         }
00886         else
00887         {
00888                 #ifdef DEBUG_PARSER
00889                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00890                 #endif
00891                 returnNode = new TiXmlUnknown();
00892         }
00893 
00894         if ( returnNode )
00895         {
00896                 // Set the parent, so it can report errors
00897                 returnNode->parent = this;
00898         }
00899         else
00900         {
00901                 if ( doc )
00902                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00903         }
00904         return returnNode;
00905 }
00906 
00907 #ifdef TIXML_USE_STL
00908 
00909 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00910 {
00911         // We're called with some amount of pre-parsing. That is, some of "this"
00912         // element is in "tag". Go ahead and stream to the closing ">"
00913         while( in->good() )
00914         {
00915                 int c = in->get();
00916                 if ( c <= 0 )
00917                 {
00918                         TiXmlDocument* document = GetDocument();
00919                         if ( document )
00920                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00921                         return;
00922                 }
00923                 (*tag) += (char) c ;
00924                 
00925                 if ( c == '>' )
00926                         break;
00927         }
00928 
00929         if ( tag->length() < 3 ) return;
00930 
00931         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00932         // If not, identify and stream.
00933 
00934         if (    tag->at( tag->length() - 1 ) == '>' 
00935                  && tag->at( tag->length() - 2 ) == '/' )
00936         {
00937                 // All good!
00938                 return;
00939         }
00940         else if ( tag->at( tag->length() - 1 ) == '>' )
00941         {
00942                 // There is more. Could be:
00943                 //              text
00944                 //              cdata text (which looks like another node)
00945                 //              closing tag
00946                 //              another node.
00947                 for ( ;; )
00948                 {
00949                         StreamWhiteSpace( in, tag );
00950 
00951                         // Do we have text?
00952                         if ( in->good() && in->peek() != '<' ) 
00953                         {
00954                                 // Yep, text.
00955                                 TiXmlText text( "" );
00956                                 text.StreamIn( in, tag );
00957 
00958                                 // What follows text is a closing tag or another node.
00959                                 // Go around again and figure it out.
00960                                 continue;
00961                         }
00962 
00963                         // We now have either a closing tag...or another node.
00964                         // We should be at a "<", regardless.
00965                         if ( !in->good() ) return;
00966                         assert( in->peek() == '<' );
00967                         int tagIndex = (int) tag->length();
00968 
00969                         bool closingTag = false;
00970                         bool firstCharFound = false;
00971 
00972                         for( ;; )
00973                         {
00974                                 if ( !in->good() )
00975                                         return;
00976 
00977                                 int c = in->peek();
00978                                 if ( c <= 0 )
00979                                 {
00980                                         TiXmlDocument* document = GetDocument();
00981                                         if ( document )
00982                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00983                                         return;
00984                                 }
00985                                 
00986                                 if ( c == '>' )
00987                                         break;
00988 
00989                                 *tag += (char) c;
00990                                 in->get();
00991 
00992                                 // Early out if we find the CDATA id.
00993                                 if ( c == '[' && tag->size() >= 9 )
00994                                 {
00995                                         size_t len = tag->size();
00996                                         const char* start = tag->c_str() + len - 9;
00997                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00998                                                 assert( !closingTag );
00999                                                 break;
01000                                         }
01001                                 }
01002 
01003                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01004                                 {
01005                                         firstCharFound = true;
01006                                         if ( c == '/' )
01007                                                 closingTag = true;
01008                                 }
01009                         }
01010                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01011                         // If it was not, the streaming will be done by the tag.
01012                         if ( closingTag )
01013                         {
01014                                 if ( !in->good() )
01015                                         return;
01016 
01017                                 int c = in->get();
01018                                 if ( c <= 0 )
01019                                 {
01020                                         TiXmlDocument* document = GetDocument();
01021                                         if ( document )
01022                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01023                                         return;
01024                                 }
01025                                 assert( c == '>' );
01026                                 *tag += (char) c;
01027 
01028                                 // We are done, once we've found our closing tag.
01029                                 return;
01030                         }
01031                         else
01032                         {
01033                                 // If not a closing tag, id it, and stream.
01034                                 const char* tagloc = tag->c_str() + tagIndex;
01035                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01036                                 if ( !node )
01037                                         return;
01038                                 node->StreamIn( in, tag );
01039                                 delete node;
01040                                 node = 0;
01041 
01042                                 // No return: go around from the beginning: text, closing tag, or node.
01043                         }
01044                 }
01045         }
01046 }
01047 #endif
01048 
01049 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01050 {
01051         p = SkipWhiteSpace( p, encoding );
01052         TiXmlDocument* document = GetDocument();
01053 
01054         if ( !p || !*p )
01055         {
01056                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01057                 return 0;
01058         }
01059 
01060         if ( data )
01061         {
01062                 data->Stamp( p, encoding );
01063                 location = data->Cursor();
01064         }
01065 
01066         if ( *p != '<' )
01067         {
01068                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01069                 return 0;
01070         }
01071 
01072         p = SkipWhiteSpace( p+1, encoding );
01073 
01074         // Read the name.
01075         const char* pErr = p;
01076 
01077     p = ReadName( p, &value, encoding );
01078         if ( !p || !*p )
01079         {
01080                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01081                 return 0;
01082         }
01083 
01084     TIXML_STRING endTag ("</");
01085         endTag += value;
01086         endTag += ">";
01087 
01088         // Check for and read attributes. Also look for an empty
01089         // tag or an end tag.
01090         while ( p && *p )
01091         {
01092                 pErr = p;
01093                 p = SkipWhiteSpace( p, encoding );
01094                 if ( !p || !*p )
01095                 {
01096                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01097                         return 0;
01098                 }
01099                 if ( *p == '/' )
01100                 {
01101                         ++p;
01102                         // Empty tag.
01103                         if ( *p  != '>' )
01104                         {
01105                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );             
01106                                 return 0;
01107                         }
01108                         return (p+1);
01109                 }
01110                 else if ( *p == '>' )
01111                 {
01112                         // Done with attributes (if there were any.)
01113                         // Read the value -- which can include other
01114                         // elements -- read the end tag, and return.
01115                         ++p;
01116                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01117                         if ( !p || !*p ) {
01118                                 // We were looking for the end tag, but found nothing.
01119                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01120                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01121                                 return 0;
01122                         }
01123 
01124                         // We should find the end tag now
01125                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01126                         {
01127                                 p += endTag.length();
01128                                 return p;
01129                         }
01130                         else
01131                         {
01132                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01133                                 return 0;
01134                         }
01135                 }
01136                 else
01137                 {
01138                         // Try to read an attribute:
01139                         TiXmlAttribute* attrib = new TiXmlAttribute();
01140                         if ( !attrib )
01141                         {
01142                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01143                                 return 0;
01144                         }
01145 
01146                         attrib->SetDocument( document );
01147                         pErr = p;
01148                         p = attrib->Parse( p, data, encoding );
01149 
01150                         if ( !p || !*p )
01151                         {
01152                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01153                                 delete attrib;
01154                                 return 0;
01155                         }
01156 
01157                         // Handle the strange case of double attributes:
01158                         #ifdef TIXML_USE_STL
01159                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01160                         #else
01161                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01162                         #endif
01163                         if ( node )
01164                         {
01165                                 node->SetValue( attrib->Value() );
01166                                 delete attrib;
01167                                 return 0;
01168                         }
01169 
01170                         attributeSet.Add( attrib );
01171                 }
01172         }
01173         return p;
01174 }
01175 
01176 
01177 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01178 {
01179         TiXmlDocument* document = GetDocument();
01180 
01181         // Read in text and elements in any order.
01182         const char* pWithWhiteSpace = p;
01183         p = SkipWhiteSpace( p, encoding );
01184 
01185         while ( p && *p )
01186         {
01187                 if ( *p != '<' )
01188                 {
01189                         // Take what we have, make a text element.
01190                         TiXmlText* textNode = new TiXmlText( "" );
01191 
01192                         if ( !textNode )
01193                         {
01194                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01195                                     return 0;
01196                         }
01197 
01198                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01199                         {
01200                                 p = textNode->Parse( p, data, encoding );
01201                         }
01202                         else
01203                         {
01204                                 // Special case: we want to keep the white space
01205                                 // so that leading spaces aren't removed.
01206                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01207                         }
01208 
01209                         if ( !textNode->Blank() )
01210                                 LinkEndChild( textNode );
01211                         else
01212                                 delete textNode;
01213                 } 
01214                 else 
01215                 {
01216                         // We hit a '<'
01217                         // Have we hit a new element or an end tag? This could also be
01218                         // a TiXmlText in the "CDATA" style.
01219                         if ( StringEqual( p, "</", false, encoding ) )
01220                         {
01221                                 return p;
01222                         }
01223                         else
01224                         {
01225                                 TiXmlNode* node = Identify( p, encoding );
01226                                 if ( node )
01227                                 {
01228                                         p = node->Parse( p, data, encoding );
01229                                         LinkEndChild( node );
01230                                 }                               
01231                                 else
01232                                 {
01233                                         return 0;
01234                                 }
01235                         }
01236                 }
01237                 pWithWhiteSpace = p;
01238                 p = SkipWhiteSpace( p, encoding );
01239         }
01240 
01241         if ( !p )
01242         {
01243                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01244         }       
01245         return p;
01246 }
01247 
01248 
01249 #ifdef TIXML_USE_STL
01250 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01251 {
01252         while ( in->good() )
01253         {
01254                 int c = in->get();      
01255                 if ( c <= 0 )
01256                 {
01257                         TiXmlDocument* document = GetDocument();
01258                         if ( document )
01259                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01260                         return;
01261                 }
01262                 (*tag) += (char) c;
01263 
01264                 if ( c == '>' )
01265                 {
01266                         // All is well.
01267                         return;         
01268                 }
01269         }
01270 }
01271 #endif
01272 
01273 
01274 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01275 {
01276         TiXmlDocument* document = GetDocument();
01277         p = SkipWhiteSpace( p, encoding );
01278 
01279         if ( data )
01280         {
01281                 data->Stamp( p, encoding );
01282                 location = data->Cursor();
01283         }
01284         if ( !p || !*p || *p != '<' )
01285         {
01286                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01287                 return 0;
01288         }
01289         ++p;
01290     value = "";
01291 
01292         while ( p && *p && *p != '>' )
01293         {
01294                 value += *p;
01295                 ++p;
01296         }
01297 
01298         if ( !p )
01299         {
01300                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01301         }
01302         if ( *p == '>' )
01303                 return p+1;
01304         return p;
01305 }
01306 
01307 #ifdef TIXML_USE_STL
01308 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01309 {
01310         while ( in->good() )
01311         {
01312                 int c = in->get();      
01313                 if ( c <= 0 )
01314                 {
01315                         TiXmlDocument* document = GetDocument();
01316                         if ( document )
01317                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01318                         return;
01319                 }
01320 
01321                 (*tag) += (char) c;
01322 
01323                 if ( c == '>' 
01324                          && tag->at( tag->length() - 2 ) == '-'
01325                          && tag->at( tag->length() - 3 ) == '-' )
01326                 {
01327                         // All is well.
01328                         return;         
01329                 }
01330         }
01331 }
01332 #endif
01333 
01334 
01335 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01336 {
01337         TiXmlDocument* document = GetDocument();
01338         value = "";
01339 
01340         p = SkipWhiteSpace( p, encoding );
01341 
01342         if ( data )
01343         {
01344                 data->Stamp( p, encoding );
01345                 location = data->Cursor();
01346         }
01347         const char* startTag = "<!--";
01348         const char* endTag   = "-->";
01349 
01350         if ( !StringEqual( p, startTag, false, encoding ) )
01351         {
01352                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01353                 return 0;
01354         }
01355         p += strlen( startTag );
01356 
01357         // [ 1475201 ] TinyXML parses entities in comments
01358         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01359         // p = ReadText( p, &value, false, endTag, false, encoding );
01360         //
01361         // from the XML spec:
01362         /*
01363          [Definition: Comments may appear anywhere in a document outside other markup; in addition, 
01364                       they may appear within the document type declaration at places allowed by the grammar. 
01365                                   They are not part of the document's character data; an XML processor MAY, but need not, 
01366                                   make it possible for an application to retrieve the text of comments. For compatibility, 
01367                                   the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 
01368                                   references MUST NOT be recognized within comments.
01369 
01370                                   An example of a comment:
01371 
01372                                   <!-- declarations for <head> & <body> -->
01373         */
01374 
01375     value = "";
01376         // Keep all the white space.
01377         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01378         {
01379                 value.append( p, 1 );
01380                 ++p;
01381         }
01382         if ( p ) 
01383                 p += strlen( endTag );
01384 
01385         return p;
01386 }
01387 
01388 
01389 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01390 {
01391         p = SkipWhiteSpace( p, encoding );
01392         if ( !p || !*p ) return 0;
01393 
01394 //      int tabsize = 4;
01395 //      if ( document )
01396 //              tabsize = document->TabSize();
01397 
01398         if ( data )
01399         {
01400                 data->Stamp( p, encoding );
01401                 location = data->Cursor();
01402         }
01403         // Read the name, the '=' and the value.
01404         const char* pErr = p;
01405         p = ReadName( p, &name, encoding );
01406         if ( !p || !*p )
01407         {
01408                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01409                 return 0;
01410         }
01411         p = SkipWhiteSpace( p, encoding );
01412         if ( !p || !*p || *p != '=' )
01413         {
01414                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01415                 return 0;
01416         }
01417 
01418         ++p;    // skip '='
01419         p = SkipWhiteSpace( p, encoding );
01420         if ( !p || !*p )
01421         {
01422                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01423                 return 0;
01424         }
01425         
01426         const char* end;
01427         const char SINGLE_QUOTE = '\'';
01428         const char DOUBLE_QUOTE = '\"';
01429 
01430         if ( *p == SINGLE_QUOTE )
01431         {
01432                 ++p;
01433                 end = "\'";             // single quote in string
01434                 p = ReadText( p, &value, false, end, false, encoding );
01435         }
01436         else if ( *p == DOUBLE_QUOTE )
01437         {
01438                 ++p;
01439                 end = "\"";             // double quote in string
01440                 p = ReadText( p, &value, false, end, false, encoding );
01441         }
01442         else
01443         {
01444                 // All attribute values should be in single or double quotes.
01445                 // But this is such a common error that the parser will try
01446                 // its best, even without them.
01447                 value = "";
01448                 while (    p && *p                                                                                      // existence
01449                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01450                                 && *p != '/' && *p != '>' )                                                     // tag end
01451                 {
01452                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01453                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01454                                 // We did not have an opening quote but seem to have a 
01455                                 // closing one. Give up and throw an error.
01456                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01457                                 return 0;
01458                         }
01459                         value += *p;
01460                         ++p;
01461                 }
01462         }
01463         return p;
01464 }
01465 
01466 #ifdef TIXML_USE_STL
01467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01468 {
01469         while ( in->good() )
01470         {
01471                 int c = in->peek();     
01472                 if ( !cdata && (c == '<' ) ) 
01473                 {
01474                         return;
01475                 }
01476                 if ( c <= 0 )
01477                 {
01478                         TiXmlDocument* document = GetDocument();
01479                         if ( document )
01480                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01481                         return;
01482                 }
01483 
01484                 (*tag) += (char) c;
01485                 in->get();      // "commits" the peek made above
01486 
01487                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01488                         size_t len = tag->size();
01489                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01490                                 // terminator of cdata.
01491                                 return;
01492                         }
01493                 }    
01494         }
01495 }
01496 #endif
01497 
01498 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01499 {
01500         value = "";
01501         TiXmlDocument* document = GetDocument();
01502 
01503         if ( data )
01504         {
01505                 data->Stamp( p, encoding );
01506                 location = data->Cursor();
01507         }
01508 
01509         const char* const startTag = "<![CDATA[";
01510         const char* const endTag   = "]]>";
01511 
01512         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01513         {
01514                 cdata = true;
01515 
01516                 if ( !StringEqual( p, startTag, false, encoding ) )
01517                 {
01518                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01519                         return 0;
01520                 }
01521                 p += strlen( startTag );
01522 
01523                 // Keep all the white space, ignore the encoding, etc.
01524                 while (    p && *p
01525                                 && !StringEqual( p, endTag, false, encoding )
01526                           )
01527                 {
01528                         value += *p;
01529                         ++p;
01530                 }
01531 
01532                 TIXML_STRING dummy; 
01533                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01534                 return p;
01535         }
01536         else
01537         {
01538                 bool ignoreWhite = true;
01539 
01540                 const char* end = "<";
01541                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01542                 if ( p )
01543                         return p-1;     // don't truncate the '<'
01544                 return 0;
01545         }
01546 }
01547 
01548 #ifdef TIXML_USE_STL
01549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01550 {
01551         while ( in->good() )
01552         {
01553                 int c = in->get();
01554                 if ( c <= 0 )
01555                 {
01556                         TiXmlDocument* document = GetDocument();
01557                         if ( document )
01558                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01559                         return;
01560                 }
01561                 (*tag) += (char) c;
01562 
01563                 if ( c == '>' )
01564                 {
01565                         // All is well.
01566                         return;
01567                 }
01568         }
01569 }
01570 #endif
01571 
01572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01573 {
01574         p = SkipWhiteSpace( p, _encoding );
01575         // Find the beginning, find the end, and look for
01576         // the stuff in-between.
01577         TiXmlDocument* document = GetDocument();
01578         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01579         {
01580                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01581                 return 0;
01582         }
01583         if ( data )
01584         {
01585                 data->Stamp( p, _encoding );
01586                 location = data->Cursor();
01587         }
01588         p += 5;
01589 
01590         version = "";
01591         encoding = "";
01592         standalone = "";
01593 
01594         while ( p && *p )
01595         {
01596                 if ( *p == '>' )
01597                 {
01598                         ++p;
01599                         return p;
01600                 }
01601 
01602                 p = SkipWhiteSpace( p, _encoding );
01603                 if ( StringEqual( p, "version", true, _encoding ) )
01604                 {
01605                         TiXmlAttribute attrib;
01606                         p = attrib.Parse( p, data, _encoding );         
01607                         version = attrib.Value();
01608                 }
01609                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01610                 {
01611                         TiXmlAttribute attrib;
01612                         p = attrib.Parse( p, data, _encoding );         
01613                         encoding = attrib.Value();
01614                 }
01615                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01616                 {
01617                         TiXmlAttribute attrib;
01618                         p = attrib.Parse( p, data, _encoding );         
01619                         standalone = attrib.Value();
01620                 }
01621                 else
01622                 {
01623                         // Read over whatever it is.
01624                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01625                                 ++p;
01626                 }
01627         }
01628         return 0;
01629 }
01630 
01631 bool TiXmlText::Blank() const
01632 {
01633         for ( unsigned i=0; i<value.length(); i++ )
01634                 if ( !IsWhiteSpace( value[i] ) )
01635                         return false;
01636         return true;
01637 }
01638 


robbie_architecture
Author(s): Viktor Seib
autogenerated on Mon Oct 6 2014 02:53:09