appl: tinyxmlparser.cpp Source File

Go to the documentation of this file.
00001 /*
00002    www.sourceforge.net/projects/tinyxml
00003    Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005    This software is provided 'as-is', without any express or implied
00006    warranty. In no event will the authors be held liable for any
00007    damages arising from the use of this software.
00008 
00009    Permission is granted to anyone to use this software for any
00010    purpose, including commercial applications, and to alter it and
00011    redistribute it freely, subject to the following restrictions:
00012 
00013    1. The origin of this software must not be misrepresented; you must
00014    not claim that you wrote the original software. If you use this
00015    software in a product, an acknowledgment in the product documentation
00016    would be appreciated but is not required.
00017 
00018    2. Altered source versions must be plainly marked as such, and
00019    must not be misrepresented as being the original software.
00020 
00021    3. This notice may not be removed or altered from any source
00022    distribution.
00023    */
00024 
00025 #include <ctype.h>
00026 #include <stddef.h>
00027 
00028 #include "tinyxml.h"
00029 
00030 //#define DEBUG_PARSER
00031 #if defined( DEBUG_PARSER )
00032 #       if defined( DEBUG ) && defined( _MSC_VER )
00033 #               include <windows.h>
00034 #               define TIXML_LOG OutputDebugString
00035 #       else
00036 #               define TIXML_LOG printf
00037 #       endif
00038 #endif
00039 
00040 // Note tha "PutString" hardcodes the same list. This
00041 // is less flexible than it appears. Changing the entries
00042 // or order will break putstring.
00043 TiXmlBase::Entity TiXmlBase::entity[NUM_ENTITY] = { { "&amp;", 5, '&' }, {
00044     "&lt;", 4, '<' }, { "&gt;", 4, '>' }, { "&quot;", 6, '\"' }, {
00045         "&apos;", 6, '\'' } };
00046 
00047 // Bunch of unicode info at:
00048 //              http://www.unicode.org/faq/utf_bom.html
00049 // Including the basic of this table, which determines the #bytes in the
00050 // sequence from the lead byte. 1 placed for invalid sequences --
00051 // although the result will be junk, pass it through as much as possible.
00052 // Beware of the non-characters in UTF-8:
00053 //                              ef bb bf (Microsoft "lead bytes")
00054 //                              ef bf be
00055 //                              ef bf bf
00056 
00057 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00058 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00059 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00060 
00061 const int TiXmlBase::utf8ByteTable[256] = {
00062     //  0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00063     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
00064     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
00065     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
00066     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
00067     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
00068     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
00069     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
00070     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70     End of ASCII range
00071     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
00072     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
00073     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
00074     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
00075     1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
00076     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
00077     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
00078     4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00079 };
00080 
00081 void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char* output,
00082         int* length) {
00083     const unsigned long BYTE_MASK = 0xBF;
00084     const unsigned long BYTE_MARK = 0x80;
00085     const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0,
00086         0xF8, 0xFC };
00087 
00088     if (input < 0x80)
00089         *length = 1;
00090     else if (input < 0x800)
00091         *length = 2;
00092     else if (input < 0x10000)
00093         *length = 3;
00094     else if (input < 0x200000)
00095         *length = 4;
00096     else {
00097         *length = 0;
00098         return;
00099     } // This code won't covert this correctly anyway.
00100 
00101     output += *length;
00102 
00103     // Scary scary fall throughs.
00104     switch (*length) {
00105         case 4:
00106             --output;
00107             *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00108             input >>= 6;
00109         case 3:
00110             --output;
00111             *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00112             input >>= 6;
00113         case 2:
00114             --output;
00115             *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00116             input >>= 6;
00117         case 1:
00118             --output;
00119             *output = (char) (input | FIRST_BYTE_MARK[*length]);
00120     }
00121 }
00122 
00123 /*static*/int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding /*encoding*/) {
00124     // This will only work for low-ascii, everything else is assumed to be a valid
00125     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00126     // to figure out alhabetical vs. not across encoding. So take a very
00127     // conservative approach.
00128 
00129     //  if ( encoding == TIXML_ENCODING_UTF8 )
00130     //  {
00131     if (anyByte < 127)
00132         return isalpha(anyByte);
00133     else
00134         return 1; // What else to do? The unicode set is huge...get the english ones right.
00135     //  }
00136     //  else
00137     //  {
00138     //          return isalpha( anyByte );
00139     //  }
00140 }
00141 
00142 /*static*/int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding /*encoding*/) {
00143     // This will only work for low-ascii, everything else is assumed to be a valid
00144     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00145     // to figure out alhabetical vs. not across encoding. So take a very
00146     // conservative approach.
00147 
00148     //  if ( encoding == TIXML_ENCODING_UTF8 )
00149     //  {
00150     if (anyByte < 127)
00151         return isalnum(anyByte);
00152     else
00153         return 1; // What else to do? The unicode set is huge...get the english ones right.
00154     //  }
00155     //  else
00156     //  {
00157     //          return isalnum( anyByte );
00158     //  }
00159 }
00160 
00161 class TiXmlParsingData {
00162     friend class TiXmlDocument;
00163     public:
00164     void Stamp(const char* now, TiXmlEncoding encoding);
00165 
00166     const TiXmlCursor& Cursor() {
00167         return cursor;
00168     }
00169 
00170     private:
00171     // Only used by the document!
00172     TiXmlParsingData(const char* start, int _tabsize, int row, int col) {
00173         assert( start );
00174         stamp = start;
00175         tabsize = _tabsize;
00176         cursor.row = row;
00177         cursor.col = col;
00178     }
00179 
00180     TiXmlCursor cursor;
00181     const char* stamp;
00182     int tabsize;
00183 };
00184 
00185 void TiXmlParsingData::Stamp(const char* now, TiXmlEncoding encoding) {
00186     assert( now );
00187 
00188     // Do nothing if the tabsize is 0.
00189     if (tabsize < 1) {
00190         return;
00191     }
00192 
00193     // Get the current row, column.
00194     int row = cursor.row;
00195     int col = cursor.col;
00196     const char* p = stamp;
00197     assert( p );
00198 
00199     while (p < now) {
00200         // Treat p as unsigned, so we have a happy compiler.
00201         const unsigned char* pU = (const unsigned char*) p;
00202 
00203         // Code contributed by Fletcher Dunn: (modified by lee)
00204         switch (*pU) {
00205             case 0:
00206                 // We *should* never get here, but in case we do, don't
00207                 // advance past the terminating null character, ever
00208                 return;
00209 
00210             case '\r':
00211                 // bump down to the next line
00212                 ++row;
00213                 col = 0;
00214                 // Eat the character
00215                 ++p;
00216 
00217                 // Check for \r\n sequence, and treat this as a single character
00218                 if (*p == '\n') {
00219                     ++p;
00220                 }
00221                 break;
00222 
00223             case '\n':
00224                 // bump down to the next line
00225                 ++row;
00226                 col = 0;
00227 
00228                 // Eat the character
00229                 ++p;
00230 
00231                 // Check for \n\r sequence, and treat this as a single
00232                 // character.  (Yes, this bizarre thing does occur still
00233                 // on some arcane platforms...)
00234                 if (*p == '\r') {
00235                     ++p;
00236                 }
00237                 break;
00238 
00239             case '\t':
00240                 // Eat the character
00241                 ++p;
00242 
00243                 // Skip to next tab stop
00244                 col = (col / tabsize + 1) * tabsize;
00245                 break;
00246 
00247             case TIXML_UTF_LEAD_0:
00248                 if (encoding == TIXML_ENCODING_UTF8) {
00249                     if (*(p + 1) && *(p + 2)) {
00250                         // In these cases, don't advance the column. These are
00251                         // 0-width spaces.
00252                         if (*(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2)
00253                                 == TIXML_UTF_LEAD_2)
00254                             p += 3;
00255                         else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbeU)
00256                             p += 3;
00257                         else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbfU)
00258                             p += 3;
00259                         else {
00260                             p += 3;
00261                             ++col;
00262                         } // A normal character.
00263                     }
00264                 } else {
00265                     ++p;
00266                     ++col;
00267                 }
00268                 break;
00269 
00270             default:
00271                 if (encoding == TIXML_ENCODING_UTF8) {
00272                     // Eat the 1 to 4 byte utf8 character.
00273                     int step =
00274                         TiXmlBase::utf8ByteTable[*((const unsigned char*) p)];
00275                     if (step == 0)
00276                         step = 1; // Error case from bad encoding, but handle gracefully.
00277                     p += step;
00278 
00279                     // Just advance one column, of course.
00280                     ++col;
00281                 } else {
00282                     ++p;
00283                     ++col;
00284                 }
00285                 break;
00286         }
00287     }
00288     cursor.row = row;
00289     cursor.col = col;
00290     assert( cursor.row >= -1 );
00291     assert( cursor.col >= -1 );
00292     stamp = p;
00293     assert( stamp );
00294 }
00295 
00296 const char* TiXmlBase::SkipWhiteSpace(const char* p, TiXmlEncoding encoding) {
00297     if (!p || !*p) {
00298         return 0;
00299     }
00300     if (encoding == TIXML_ENCODING_UTF8) {
00301         while (*p) {
00302             const unsigned char* pU = (const unsigned char*) p;
00303 
00304             // Skip the stupid Microsoft UTF-8 Byte order marks
00305             if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == TIXML_UTF_LEAD_1
00306                     && *(pU + 2) == TIXML_UTF_LEAD_2) {
00307                 p += 3;
00308                 continue;
00309             } else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU
00310                     && *(pU + 2) == 0xbeU) {
00311                 p += 3;
00312                 continue;
00313             } else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU
00314                     && *(pU + 2) == 0xbfU) {
00315                 p += 3;
00316                 continue;
00317             }
00318 
00319             if (IsWhiteSpace(*p) || *p == '\n' || *p == '\r') // Still using old rules for white space.
00320                 ++p;
00321             else
00322                 break;
00323         }
00324     } else {
00325         while (*p && IsWhiteSpace(*p) || *p == '\n' || *p == '\r')
00326             ++p;
00327     }
00328 
00329     return p;
00330 }
00331 
00332 #ifdef TIXML_USE_STL
00333 /*static*/bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00334 {
00335     for(;; )
00336     {
00337         if ( !in->good() ) return false;
00338 
00339         int c = in->peek();
00340         // At this scope, we can't get to a document. So fail silently.
00341         if ( !IsWhiteSpace( c ) || c <= 0 )
00342             return true;
00343 
00344         *tag += (char) in->get();
00345     }
00346 }
00347 
00348 /*static*/bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00349 {
00350     //assert( character > 0 && character < 128 );       // else it won't work in utf-8
00351     while ( in->good() )
00352     {
00353         int c = in->peek();
00354         if ( c == character )
00355             return true;
00356         if ( c <= 0 ) // Silent failure: can't get document at this scope
00357             return false;
00358 
00359         in->get();
00360         *tag += (char) c;
00361     }
00362     return false;
00363 }
00364 #endif
00365 
00366 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00367 // "assign" optimization removes over 10% of the execution time.
00368 //
00369 const char* TiXmlBase::ReadName(const char* p, TIXML_STRING * name,
00370         TiXmlEncoding encoding) {
00371     // Oddly, not supported on some comilers,
00372     //name->clear();
00373     // So use this:
00374     *name = "";
00375     assert( p );
00376 
00377     // Names start with letters or underscores.
00378     // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00379     // algorithm is generous.
00380     //
00381     // After that, they can be letters, underscores, numbers,
00382     // hyphens, or colons. (Colons are valid ony for namespaces,
00383     // but tinyxml can't tell namespaces from names.)
00384     if (p && *p && (IsAlpha((unsigned char) *p, encoding) || *p == '_')) {
00385         const char* start = p;
00386         while (p && *p && (IsAlphaNum((unsigned char) *p, encoding) || *p
00387                     == '_' || *p == '-' || *p == '.' || *p == ':')) {
00388             //(*name) += *p; // expensive
00389             ++p;
00390         }
00391         if (p - start > 0) {
00392             name->assign(start, p - start);
00393         }
00394         return p;
00395     }
00396     return 0;
00397 }
00398 
00399 const char* TiXmlBase::GetEntity(const char* p, char* value, int* length,
00400         TiXmlEncoding encoding) {
00401     // Presume an entity, and pull it out.
00402     TIXML_STRING ent;
00403     int i;
00404     *length = 0;
00405 
00406     if (*(p + 1) && *(p + 1) == '#' && *(p + 2)) {
00407         unsigned long ucs = 0;
00408         ptrdiff_t delta = 0;
00409         unsigned mult = 1;
00410 
00411         if (*(p + 2) == 'x') {
00412             // Hexadecimal.
00413             if (!*(p + 3))
00414                 return 0;
00415 
00416             const char* q = p + 3;
00417             q = strchr(q, ';');
00418 
00419             if (!q || !*q)
00420                 return 0;
00421 
00422             delta = q - p;
00423             --q;
00424 
00425             while (*q != 'x') {
00426                 if (*q >= '0' && *q <= '9')
00427                     ucs += mult * (*q - '0');
00428                 else if (*q >= 'a' && *q <= 'f')
00429                     ucs += mult * (*q - 'a' + 10);
00430                 else if (*q >= 'A' && *q <= 'F')
00431                     ucs += mult * (*q - 'A' + 10);
00432                 else
00433                     return 0;
00434                 mult *= 16;
00435                 --q;
00436             }
00437         } else {
00438             // Decimal.
00439             if (!*(p + 2))
00440                 return 0;
00441 
00442             const char* q = p + 2;
00443             q = strchr(q, ';');
00444 
00445             if (!q || !*q)
00446                 return 0;
00447 
00448             delta = q - p;
00449             --q;
00450 
00451             while (*q != '#') {
00452                 if (*q >= '0' && *q <= '9')
00453                     ucs += mult * (*q - '0');
00454                 else
00455                     return 0;
00456                 mult *= 10;
00457                 --q;
00458             }
00459         }
00460         if (encoding == TIXML_ENCODING_UTF8) {
00461             // convert the UCS to UTF-8
00462             ConvertUTF32ToUTF8(ucs, value, length);
00463         } else {
00464             *value = (char) ucs;
00465             *length = 1;
00466         }
00467         return p + delta + 1;
00468     }
00469 
00470     // Now try to match it.
00471     for (i = 0; i < NUM_ENTITY; ++i) {
00472         if (strncmp(entity[i].str, p, entity[i].strLength) == 0) {
00473             assert( strlen( entity[i].str ) == entity[i].strLength );
00474             *value = entity[i].chr;
00475             *length = 1;
00476             return (p + entity[i].strLength);
00477         }
00478     }
00479 
00480     // So it wasn't an entity, its unrecognized, or something like that.
00481     *value = *p; // Don't put back the last one, since we return it!
00482     //*length = 1;      // Leave unrecognized entities - this doesn't really work.
00483     // Just writes strange XML.
00484     return p + 1;
00485 }
00486 
00487 bool TiXmlBase::StringEqual(const char* p, const char* tag, bool ignoreCase,
00488         TiXmlEncoding encoding) {
00489     assert( p );
00490     assert( tag );
00491     if (!p || !*p) {
00492         assert( 0 );
00493         return false;
00494     }
00495 
00496     const char* q = p;
00497 
00498     if (ignoreCase) {
00499         while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding)) {
00500             ++q;
00501             ++tag;
00502         }
00503 
00504         if (*tag == 0)
00505             return true;
00506     } else {
00507         while (*q && *tag && *q == *tag) {
00508             ++q;
00509             ++tag;
00510         }
00511 
00512         if (*tag == 0) // Have we found the end of the tag, and everything equal?
00513             return true;
00514     }
00515     return false;
00516 }
00517 
00518 const char* TiXmlBase::ReadText(const char* p, TIXML_STRING * text,
00519         bool trimWhiteSpace, const char* endTag, bool caseInsensitive,
00520         TiXmlEncoding encoding) {
00521     *text = "";
00522     if (!trimWhiteSpace // certain tags always keep whitespace
00523             || !condenseWhiteSpace) // if true, whitespace is always kept
00524     {
00525         // Keep all the white space.
00526         while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding)) {
00527             int len;
00528             char cArr[4] = { 0, 0, 0, 0 };
00529             p = GetChar(p, cArr, &len, encoding);
00530             text->append(cArr, len);
00531         }
00532     } else {
00533         bool whitespace = false;
00534 
00535         // Remove leading white space:
00536         p = SkipWhiteSpace(p, encoding);
00537         while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding)) {
00538             if (*p == '\r' || *p == '\n') {
00539                 whitespace = true;
00540                 ++p;
00541             } else if (IsWhiteSpace(*p)) {
00542                 whitespace = true;
00543                 ++p;
00544             } else {
00545                 // If we've found whitespace, add it before the
00546                 // new character. Any whitespace just becomes a space.
00547                 if (whitespace) {
00548                     (*text) += ' ';
00549                     whitespace = false;
00550                 }
00551                 int len;
00552                 char cArr[4] = { 0, 0, 0, 0 };
00553                 p = GetChar(p, cArr, &len, encoding);
00554                 if (len == 1)
00555                     (*text) += cArr[0]; // more efficient
00556                 else
00557                     text->append(cArr, len);
00558             }
00559         }
00560     }
00561     if (p)
00562         p += strlen(endTag);
00563     return p;
00564 }
00565 
00566 #ifdef TIXML_USE_STL
00567 
00568 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00569 {
00570     // The basic issue with a document is that we don't know what we're
00571     // streaming. Read something presumed to be a tag (and hope), then
00572     // identify it, and call the appropriate stream method on the tag.
00573     //
00574     // This "pre-streaming" will never read the closing ">" so the
00575     // sub-tag can orient itself.
00576 
00577     if ( !StreamTo( in, '<', tag ) )
00578     {
00579         SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00580         return;
00581     }
00582 
00583     while ( in->good() )
00584     {
00585         int tagIndex = (int) tag->length();
00586         while ( in->good() && in->peek() != '>' )
00587         {
00588             int c = in->get();
00589             if ( c <= 0 )
00590             {
00591                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00592                 break;
00593             }
00594             (*tag) += (char) c;
00595         }
00596 
00597         if ( in->good() )
00598         {
00599             // We now have something we presume to be a node of
00600             // some sort. Identify it, and call the node to
00601             // continue streaming.
00602             TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00603 
00604             if ( node )
00605             {
00606                 node->StreamIn( in, tag );
00607                 bool isElement = node->ToElement() != 0;
00608                 delete node;
00609                 node = 0;
00610 
00611                 // If this is the root element, we're done. Parsing will be
00612                 // done by the >> operator.
00613                 if ( isElement )
00614                 {
00615                     return;
00616                 }
00617             }
00618             else
00619             {
00620                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00621                 return;
00622             }
00623         }
00624     }
00625     // We should have returned sooner.
00626     SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00627 }
00628 
00629 #endif
00630 
00631 const char* TiXmlDocument::Parse(const char* p, TiXmlParsingData* prevData,
00632         TiXmlEncoding encoding) {
00633     ClearError();
00634 
00635     // Parse away, at the document level. Since a document
00636     // contains nothing but other tags, most of what happens
00637     // here is skipping white space.
00638     if (!p || !*p) {
00639         SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
00640         return 0;
00641     }
00642 
00643     // Note that, for a document, this needs to come
00644     // before the while space skip, so that parsing
00645     // starts from the pointer we are given.
00646     location.Clear();
00647     if (prevData) {
00648         location.row = prevData->cursor.row;
00649         location.col = prevData->cursor.col;
00650     } else {
00651         location.row = 0;
00652         location.col = 0;
00653     }
00654     TiXmlParsingData data(p, TabSize(), location.row, location.col);
00655     location = data.Cursor();
00656 
00657     if (encoding == TIXML_ENCODING_UNKNOWN) {
00658         // Check for the Microsoft UTF-8 lead bytes.
00659         const unsigned char* pU = (const unsigned char*) p;
00660         if (*(pU + 0) && *(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1)
00661                 && *(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2) && *(pU + 2)
00662                 == TIXML_UTF_LEAD_2) {
00663             encoding = TIXML_ENCODING_UTF8;
00664             useMicrosoftBOM = true;
00665         }
00666     }
00667 
00668     p = SkipWhiteSpace(p, encoding);
00669     if (!p) {
00670         SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
00671         return 0;
00672     }
00673 
00674     while (p && *p) {
00675         TiXmlNode* node = Identify(p, encoding);
00676         if (node) {
00677             p = node->Parse(p, &data, encoding);
00678             LinkEndChild(node);
00679         } else {
00680             break;
00681         }
00682 
00683         // Did we get encoding info?
00684         if (encoding == TIXML_ENCODING_UNKNOWN && node->ToDeclaration()) {
00685             TiXmlDeclaration* dec = node->ToDeclaration();
00686             const char* enc = dec->Encoding();
00687             assert( enc );
00688 
00689             if (*enc == 0)
00690                 encoding = TIXML_ENCODING_UTF8;
00691             else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN))
00692                 encoding = TIXML_ENCODING_UTF8;
00693             else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN))
00694                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00695             else
00696                 encoding = TIXML_ENCODING_LEGACY;
00697         }
00698 
00699         p = SkipWhiteSpace(p, encoding);
00700     }
00701 
00702     // Was this empty?
00703     if (!firstChild) {
00704         SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding);
00705         return 0;
00706     }
00707 
00708     // All is well.
00709     return p;
00710 }
00711 
00712 void TiXmlDocument::SetError(int err, const char* pError,
00713         TiXmlParsingData* data, TiXmlEncoding encoding) {
00714     // The first error in a chain is more accurate - don't set again!
00715     if (error)
00716         return;
00717 
00718     assert( err> 0 && err < TIXML_ERROR_STRING_COUNT );
00719     error = true;
00720     errorId = err;
00721     errorDesc = errorString[errorId];
00722 
00723     errorLocation.Clear();
00724     if (pError && data) {
00725         data->Stamp(pError, encoding);
00726         errorLocation = data->Cursor();
00727     }
00728 }
00729 
00730 TiXmlNode* TiXmlNode::Identify(const char* p, TiXmlEncoding encoding) {
00731     TiXmlNode* returnNode = 0;
00732 
00733     p = SkipWhiteSpace(p, encoding);
00734     if (!p || !*p || *p != '<') {
00735         return 0;
00736     }
00737 
00738     TiXmlDocument* doc = GetDocument();
00739     p = SkipWhiteSpace(p, encoding);
00740 
00741     if (!p || !*p) {
00742         return 0;
00743     }
00744 
00745     // What is this thing?
00746     // - Elements start with a letter or underscore, but xml is reserved.
00747     // - Comments: <!--
00748     // - Decleration: <?xml
00749     // - Everthing else is unknown to tinyxml.
00750     //
00751 
00752     const char* xmlHeader = { "<?xml" };
00753     const char* commentHeader = { "<!--" };
00754     const char* dtdHeader = { "<!" };
00755     const char* cdataHeader = { "<![CDATA[" };
00756 
00757     if (StringEqual(p, xmlHeader, true, encoding)) {
00758 #ifdef DEBUG_PARSER
00759         TIXML_LOG( "XML parsing Declaration\n" );
00760 #endif
00761         returnNode = new TiXmlDeclaration();
00762     } else if (StringEqual(p, commentHeader, false, encoding)) {
00763 #ifdef DEBUG_PARSER
00764         TIXML_LOG( "XML parsing Comment\n" );
00765 #endif
00766         returnNode = new TiXmlComment();
00767     } else if (StringEqual(p, cdataHeader, false, encoding)) {
00768 #ifdef DEBUG_PARSER
00769         TIXML_LOG( "XML parsing CDATA\n" );
00770 #endif
00771         TiXmlText* text = new TiXmlText("");
00772         text->SetCDATA(true);
00773         returnNode = text;
00774     } else if (StringEqual(p, dtdHeader, false, encoding)) {
00775 #ifdef DEBUG_PARSER
00776         TIXML_LOG( "XML parsing Unknown(1)\n" );
00777 #endif
00778         returnNode = new TiXmlUnknown();
00779     } else if (IsAlpha(*(p + 1), encoding) || *(p + 1) == '_') {
00780 #ifdef DEBUG_PARSER
00781         TIXML_LOG( "XML parsing Element\n" );
00782 #endif
00783         returnNode = new TiXmlElement("");
00784     } else {
00785 #ifdef DEBUG_PARSER
00786         TIXML_LOG( "XML parsing Unknown(2)\n" );
00787 #endif
00788         returnNode = new TiXmlUnknown();
00789     }
00790 
00791     if (returnNode) {
00792         // Set the parent, so it can report errors
00793         returnNode->parent = this;
00794     } else {
00795         if (doc)
00796             doc->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0,
00797                     TIXML_ENCODING_UNKNOWN);
00798     }
00799     return returnNode;
00800 }
00801 
00802 #ifdef TIXML_USE_STL
00803 
00804 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00805 {
00806     // We're called with some amount of pre-parsing. That is, some of "this"
00807     // element is in "tag". Go ahead and stream to the closing ">"
00808     while( in->good() )
00809     {
00810         int c = in->get();
00811         if ( c <= 0 )
00812         {
00813             TiXmlDocument* document = GetDocument();
00814             if ( document )
00815                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00816             return;
00817         }
00818         (*tag) += (char) c;
00819 
00820         if ( c == '>' )
00821             break;
00822     }
00823 
00824     if ( tag->length() < 3 ) return;
00825 
00826     // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00827     // If not, identify and stream.
00828 
00829     if ( tag->at( tag->length() - 1 ) == '>'
00830             && tag->at( tag->length() - 2 ) == '/' )
00831     {
00832         // All good!
00833         return;
00834     }
00835     else if ( tag->at( tag->length() - 1 ) == '>' )
00836     {
00837         // There is more. Could be:
00838         //              text
00839         //              cdata text (which looks like another node)
00840         //              closing tag
00841         //              another node.
00842         for (;; )
00843         {
00844             StreamWhiteSpace( in, tag );
00845 
00846             // Do we have text?
00847             if ( in->good() && in->peek() != '<' )
00848             {
00849                 // Yep, text.
00850                 TiXmlText text( "" );
00851                 text.StreamIn( in, tag );
00852 
00853                 // What follows text is a closing tag or another node.
00854                 // Go around again and figure it out.
00855                 continue;
00856             }
00857 
00858             // We now have either a closing tag...or another node.
00859             // We should be at a "<", regardless.
00860             if ( !in->good() ) return;
00861             assert( in->peek() == '<' );
00862             int tagIndex = (int) tag->length();
00863 
00864             bool closingTag = false;
00865             bool firstCharFound = false;
00866 
00867             for(;; )
00868             {
00869                 if ( !in->good() )
00870                     return;
00871 
00872                 int c = in->peek();
00873                 if ( c <= 0 )
00874                 {
00875                     TiXmlDocument* document = GetDocument();
00876                     if ( document )
00877                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00878                     return;
00879                 }
00880 
00881                 if ( c == '>' )
00882                     break;
00883 
00884                 *tag += (char) c;
00885                 in->get();
00886 
00887                 // Early out if we find the CDATA id.
00888                 if ( c == '[' && tag->size() >= 9 )
00889                 {
00890                     size_t len = tag->size();
00891                     const char* start = tag->c_str() + len - 9;
00892                     if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00893                         assert( !closingTag );
00894                         break;
00895                     }
00896                 }
00897 
00898                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00899                 {
00900                     firstCharFound = true;
00901                     if ( c == '/' )
00902                         closingTag = true;
00903                 }
00904             }
00905             // If it was a closing tag, then read in the closing '>' to clean up the input stream.
00906             // If it was not, the streaming will be done by the tag.
00907             if ( closingTag )
00908             {
00909                 if ( !in->good() )
00910                     return;
00911 
00912                 int c = in->get();
00913                 if ( c <= 0 )
00914                 {
00915                     TiXmlDocument* document = GetDocument();
00916                     if ( document )
00917                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00918                     return;
00919                 }
00920                 assert( c == '>' );
00921                 *tag += (char) c;
00922 
00923                 // We are done, once we've found our closing tag.
00924                 return;
00925             }
00926             else
00927             {
00928                 // If not a closing tag, id it, and stream.
00929                 const char* tagloc = tag->c_str() + tagIndex;
00930                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
00931                 if ( !node )
00932                     return;
00933                 node->StreamIn( in, tag );
00934                 delete node;
00935                 node = 0;
00936 
00937                 // No return: go around from the beginning: text, closing tag, or node.
00938             }
00939         }
00940     }
00941 }
00942 #endif
00943 
00944 const char* TiXmlElement::Parse(const char* p, TiXmlParsingData* data,
00945         TiXmlEncoding encoding) {
00946     p = SkipWhiteSpace(p, encoding);
00947     TiXmlDocument* document = GetDocument();
00948 
00949     if (!p || !*p) {
00950         if (document)
00951             document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding);
00952         return 0;
00953     }
00954 
00955     if (data) {
00956         data->Stamp(p, encoding);
00957         location = data->Cursor();
00958     }
00959 
00960     if (*p != '<') {
00961         if (document)
00962             document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding);
00963         return 0;
00964     }
00965 
00966     p = SkipWhiteSpace(p + 1, encoding);
00967 
00968     // Read the name.
00969     const char* pErr = p;
00970 
00971     p = ReadName(p, &value, encoding);
00972     if (!p || !*p) {
00973         if (document)
00974             document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr,
00975                     data, encoding);
00976         return 0;
00977     }
00978     TIXML_STRING endTag("</");
00979     endTag += value;
00980     endTag += ">";
00981 
00982     // Check for and read attributes. Also look for an empty
00983     // tag or an end tag.
00984     while (p && *p) {
00985         pErr = p;
00986         p = SkipWhiteSpace(p, encoding);
00987         if (!p || !*p) {
00988             if (document)
00989                 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data,
00990                         encoding);
00991             return 0;
00992         }
00993         if (*p == '/') {
00994             ++p;
00995             // Empty tag.
00996             if (*p != '>') {
00997                 if (document)
00998                     document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data,
00999                             encoding);
01000                 return 0;
01001             }
01002             return (p + 1);
01003         } else if (*p == '>') {
01004             // Done with attributes (if there were any.)
01005             // Read the value -- which can include other
01006             // elements -- read the end tag, and return.
01007             ++p;
01008             p = ReadValue(p, data, encoding); // Note this is an Element method, and will set the error if one happens.
01009             if (!p || !*p) {
01010                 // We were looking for the end tag, but found nothing.
01011                 // Fix for [ 1663758 ] Failure to report error on bad XML
01012                 if (document)
01013                     document->SetError(TIXML_ERROR_READING_END_TAG, p, data,
01014                             encoding);
01015                 return 0;
01016             }
01017 
01018             // We should find the end tag now
01019             if (StringEqual(p, endTag.c_str(), false, encoding)) {
01020                 p += endTag.length();
01021                 return p;
01022             } else {
01023                 if (document)
01024                     document->SetError(TIXML_ERROR_READING_END_TAG, p, data,
01025                             encoding);
01026                 return 0;
01027             }
01028         } else {
01029             // Try to read an attribute:
01030             TiXmlAttribute* attrib = new TiXmlAttribute();
01031             if (!attrib) {
01032                 if (document)
01033                     document->SetError(TIXML_ERROR_OUT_OF_MEMORY, pErr, data,
01034                             encoding);
01035                 return 0;
01036             }
01037 
01038             attrib->SetDocument(document);
01039             pErr = p;
01040             p = attrib->Parse(p, data, encoding);
01041 
01042             if (!p || !*p) {
01043                 if (document)
01044                     document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data,
01045                             encoding);
01046                 delete attrib;
01047                 return 0;
01048             }
01049 
01050             // Handle the strange case of double attributes:
01051 #ifdef TIXML_USE_STL
01052             TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01053 #else
01054             TiXmlAttribute* node = attributeSet.Find(attrib->Name());
01055 #endif
01056             if (node) {
01057                 node->SetValue(attrib->Value());
01058                 delete attrib;
01059                 return 0;
01060             }
01061 
01062             attributeSet.Add(attrib);
01063         }
01064     }
01065     return p;
01066 }
01067 
01068 const char* TiXmlElement::ReadValue(const char* p, TiXmlParsingData* data,
01069         TiXmlEncoding encoding) {
01070     TiXmlDocument* document = GetDocument();
01071 
01072     // Read in text and elements in any order.
01073     const char* pWithWhiteSpace = p;
01074     p = SkipWhiteSpace(p, encoding);
01075 
01076     while (p && *p) {
01077         if (*p != '<') {
01078             // Take what we have, make a text element.
01079             TiXmlText* textNode = new TiXmlText("");
01080 
01081             if (!textNode) {
01082                 if (document)
01083                     document->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0,
01084                             encoding);
01085                 return 0;
01086             }
01087 
01088             if (TiXmlBase::IsWhiteSpaceCondensed()) {
01089                 p = textNode->Parse(p, data, encoding);
01090             } else {
01091                 // Special case: we want to keep the white space
01092                 // so that leading spaces aren't removed.
01093                 p = textNode->Parse(pWithWhiteSpace, data, encoding);
01094             }
01095 
01096             if (!textNode->Blank())
01097                 LinkEndChild(textNode);
01098             else
01099                 delete textNode;
01100         } else {
01101             // We hit a '<'
01102             // Have we hit a new element or an end tag? This could also be
01103             // a TiXmlText in the "CDATA" style.
01104             if (StringEqual(p, "</", false, encoding)) {
01105                 return p;
01106             } else {
01107                 TiXmlNode* node = Identify(p, encoding);
01108                 if (node) {
01109                     p = node->Parse(p, data, encoding);
01110                     LinkEndChild(node);
01111                 } else {
01112                     return 0;
01113                 }
01114             }
01115         }
01116         pWithWhiteSpace = p;
01117         p = SkipWhiteSpace(p, encoding);
01118     }
01119 
01120     if (!p) {
01121         if (document)
01122             document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0,
01123                     encoding);
01124     }
01125     return p;
01126 }
01127 
01128 #ifdef TIXML_USE_STL
01129 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01130 {
01131     while ( in->good() )
01132     {
01133         int c = in->get();
01134         if ( c <= 0 )
01135         {
01136             TiXmlDocument* document = GetDocument();
01137             if ( document )
01138                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01139             return;
01140         }
01141         (*tag) += (char) c;
01142 
01143         if ( c == '>' )
01144         {
01145             // All is well.
01146             return;
01147         }
01148     }
01149 }
01150 #endif
01151 
01152 const char* TiXmlUnknown::Parse(const char* p, TiXmlParsingData* data,
01153         TiXmlEncoding encoding) {
01154     TiXmlDocument* document = GetDocument();
01155     p = SkipWhiteSpace(p, encoding);
01156 
01157     if (data) {
01158         data->Stamp(p, encoding);
01159         location = data->Cursor();
01160     }
01161     if (!p || !*p || *p != '<') {
01162         if (document)
01163             document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding);
01164         return 0;
01165     }
01166     ++p;
01167     value = "";
01168 
01169     while (p && *p && *p != '>') {
01170         value += *p;
01171         ++p;
01172     }
01173 
01174     if (!p) {
01175         if (document)
01176             document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding);
01177     }
01178     if (*p == '>')
01179         return p + 1;
01180     return p;
01181 }
01182 
01183 #ifdef TIXML_USE_STL
01184 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01185 {
01186     while ( in->good() )
01187     {
01188         int c = in->get();
01189         if ( c <= 0 )
01190         {
01191             TiXmlDocument* document = GetDocument();
01192             if ( document )
01193                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01194             return;
01195         }
01196 
01197         (*tag) += (char) c;
01198 
01199         if ( c == '>'
01200                 && tag->at( tag->length() - 2 ) == '-'
01201                 && tag->at( tag->length() - 3 ) == '-' )
01202         {
01203             // All is well.
01204             return;
01205         }
01206     }
01207 }
01208 #endif
01209 
01210 const char* TiXmlComment::Parse(const char* p, TiXmlParsingData* data,
01211         TiXmlEncoding encoding) {
01212     TiXmlDocument* document = GetDocument();
01213     value = "";
01214 
01215     p = SkipWhiteSpace(p, encoding);
01216 
01217     if (data) {
01218         data->Stamp(p, encoding);
01219         location = data->Cursor();
01220     }
01221     const char* startTag = "<!--";
01222     const char* endTag = "-->";
01223 
01224     if (!StringEqual(p, startTag, false, encoding)) {
01225         document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding);
01226         return 0;
01227     }
01228     p += strlen(startTag);
01229 
01230     // [ 1475201 ] TinyXML parses entities in comments
01231     // Oops - ReadText doesn't work, because we don't want to parse the entities.
01232     // p = ReadText( p, &value, false, endTag, false, encoding );
01233     //
01234     // from the XML spec:
01235     /*
01236        [Definition: Comments may appear anywhere in a document outside other markup; in addition,
01237        they may appear within the document type declaration at places allowed by the grammar.
01238        They are not part of the document's character data; an XML processor MAY, but need not,
01239        make it possible for an application to retrieve the text of comments. For compatibility,
01240        the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
01241        references MUST NOT be recognized within comments.
01242 
01243        An example of a comment:
01244 
01245        <!-- declarations for <head> & <body> -->
01246        */
01247 
01248     value = "";
01249     // Keep all the white space.
01250     while (p && *p && !StringEqual(p, endTag, false, encoding)) {
01251         value.append(p, 1);
01252         ++p;
01253     }
01254     if (p)
01255         p += strlen(endTag);
01256 
01257     return p;
01258 }
01259 
01260 const char* TiXmlAttribute::Parse(const char* p, TiXmlParsingData* data,
01261         TiXmlEncoding encoding) {
01262     p = SkipWhiteSpace(p, encoding);
01263     if (!p || !*p)
01264         return 0;
01265 
01266     //  int tabsize = 4;
01267     //  if ( document )
01268     //          tabsize = document->TabSize();
01269 
01270     if (data) {
01271         data->Stamp(p, encoding);
01272         location = data->Cursor();
01273     }
01274     // Read the name, the '=' and the value.
01275     const char* pErr = p;
01276     p = ReadName(p, &name, encoding);
01277     if (!p || !*p) {
01278         if (document)
01279             document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data,
01280                     encoding);
01281         return 0;
01282     }
01283     p = SkipWhiteSpace(p, encoding);
01284     if (!p || !*p || *p != '=') {
01285         if (document)
01286             document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01287                     encoding);
01288         return 0;
01289     }
01290 
01291     ++p; // skip '='
01292     p = SkipWhiteSpace(p, encoding);
01293     if (!p || !*p) {
01294         if (document)
01295             document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01296                     encoding);
01297         return 0;
01298     }
01299 
01300     const char* end;
01301     const char SINGLE_QUOTE = '\'';
01302     const char DOUBLE_QUOTE = '\"';
01303 
01304     if (*p == SINGLE_QUOTE) {
01305         ++p;
01306         end = "\'"; // single quote in string
01307         p = ReadText(p, &value, false, end, false, encoding);
01308     } else if (*p == DOUBLE_QUOTE) {
01309         ++p;
01310         end = "\""; // double quote in string
01311         p = ReadText(p, &value, false, end, false, encoding);
01312     } else {
01313         // All attribute values should be in single or double quotes.
01314         // But this is such a common error that the parser will try
01315         // its best, even without them.
01316         value = "";
01317         while (p && *p // existence
01318                 && !IsWhiteSpace(*p) && *p != '\n' && *p != '\r' // whitespace
01319                 && *p != '/' && *p != '>') // tag end
01320         {
01321             if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE) {
01322                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01323                 // We did not have an opening quote but seem to have a
01324                 // closing one. Give up and throw an error.
01325                 if (document)
01326                     document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01327                             encoding);
01328                 return 0;
01329             }
01330             value += *p;
01331             ++p;
01332         }
01333     }
01334     return p;
01335 }
01336 
01337 #ifdef TIXML_USE_STL
01338 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01339 {
01340     while ( in->good() )
01341     {
01342         int c = in->peek();
01343         if ( !cdata && (c == '<' ) )
01344         {
01345             return;
01346         }
01347         if ( c <= 0 )
01348         {
01349             TiXmlDocument* document = GetDocument();
01350             if ( document )
01351                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01352             return;
01353         }
01354 
01355         (*tag) += (char) c;
01356         in->get(); // "commits" the peek made above
01357 
01358         if ( cdata && c == '>' && tag->size() >= 3 ) {
01359             size_t len = tag->size();
01360             if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01361                 // terminator of cdata.
01362                 return;
01363             }
01364         }
01365     }
01366 }
01367 #endif
01368 
01369 const char* TiXmlText::Parse(const char* p, TiXmlParsingData* data,
01370         TiXmlEncoding encoding) {
01371     value = "";
01372     TiXmlDocument* document = GetDocument();
01373 
01374     if (data) {
01375         data->Stamp(p, encoding);
01376         location = data->Cursor();
01377     }
01378 
01379     const char* const startTag = "<![CDATA[";
01380     const char* const endTag = "]]>";
01381 
01382     if (cdata || StringEqual(p, startTag, false, encoding)) {
01383         cdata = true;
01384 
01385         if (!StringEqual(p, startTag, false, encoding)) {
01386             document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding);
01387             return 0;
01388         }
01389         p += strlen(startTag);
01390 
01391         // Keep all the white space, ignore the encoding, etc.
01392         while (p && *p && !StringEqual(p, endTag, false, encoding)) {
01393             value += *p;
01394             ++p;
01395         }
01396         TIXML_STRING dummy;
01397         p = ReadText(p, &dummy, false, endTag, false, encoding);
01398         return p;
01399     } else {
01400         bool ignoreWhite = true;
01401 
01402         const char* end = "<";
01403         p = ReadText(p, &value, ignoreWhite, end, false, encoding);
01404         if (p)
01405             return p - 1; // don't truncate the '<'
01406         return 0;
01407     }
01408 }
01409 
01410 #ifdef TIXML_USE_STL
01411 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01412 {
01413     while ( in->good() )
01414     {
01415         int c = in->get();
01416         if ( c <= 0 )
01417         {
01418             TiXmlDocument* document = GetDocument();
01419             if ( document )
01420                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01421             return;
01422         }
01423         (*tag) += (char) c;
01424 
01425         if ( c == '>' )
01426         {
01427             // All is well.
01428             return;
01429         }
01430     }
01431 }
01432 #endif
01433 
01434 const char* TiXmlDeclaration::Parse(const char* p, TiXmlParsingData* data,
01435         TiXmlEncoding _encoding) {
01436     p = SkipWhiteSpace(p, _encoding);
01437     // Find the beginning, find the end, and look for
01438     // the stuff in-between.
01439     TiXmlDocument* document = GetDocument();
01440     if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding)) {
01441         if (document)
01442             document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding);
01443         return 0;
01444     }
01445     if (data) {
01446         data->Stamp(p, _encoding);
01447         location = data->Cursor();
01448     }
01449     p += 5;
01450 
01451     version = "";
01452     encoding = "";
01453     standalone = "";
01454 
01455     while (p && *p) {
01456         if (*p == '>') {
01457             ++p;
01458             return p;
01459         }
01460 
01461         p = SkipWhiteSpace(p, _encoding);
01462         if (StringEqual(p, "version", true, _encoding)) {
01463             TiXmlAttribute attrib;
01464             p = attrib.Parse(p, data, _encoding);
01465             version = attrib.Value();
01466         } else if (StringEqual(p, "encoding", true, _encoding)) {
01467             TiXmlAttribute attrib;
01468             p = attrib.Parse(p, data, _encoding);
01469             encoding = attrib.Value();
01470         } else if (StringEqual(p, "standalone", true, _encoding)) {
01471             TiXmlAttribute attrib;
01472             p = attrib.Parse(p, data, _encoding);
01473             standalone = attrib.Value();
01474         } else {
01475             // Read over whatever it is.
01476             while (p && *p && *p != '>' && !IsWhiteSpace(*p))
01477                 ++p;
01478         }
01479     }
01480     return 0;
01481 }
01482 
01483 bool TiXmlText::Blank() const {
01484     for (unsigned i = 0; i < value.length(); i++)
01485         if (!IsWhiteSpace(value[i]))
01486             return false;
01487     return true;
01488 }
01489