tinyxmlparser.cpp
Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied 
00006 warranty. In no event will the authors be held liable for any 
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any 
00010 purpose, including commercial applications, and to alter it and 
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must 
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and 
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source 
00022 distribution.
00023 */
00024 
00025 #include <ctype.h>
00026 #include <stddef.h>
00027 
00028 #include "tinyxml.h"
00029 
00030 //#define DEBUG_PARSER
00031 #if defined( DEBUG_PARSER )
00032 #       if defined( DEBUG ) && defined( _MSC_VER )
00033 #               include <windows.h>
00034 #               define TIXML_LOG OutputDebugString
00035 #       else
00036 #               define TIXML_LOG printf
00037 #       endif
00038 #endif
00039 
00040 namespace rospack_tinyxml {
00041 
00042 // Note tha "PutString" hardcodes the same list. This
00043 // is less flexible than it appears. Changing the entries
00044 // or order will break putstring.       
00045 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00046 {
00047         { "&amp;",  5, '&' },
00048         { "&lt;",   4, '<' },
00049         { "&gt;",   4, '>' },
00050         { "&quot;", 6, '\"' },
00051         { "&apos;", 6, '\'' }
00052 };
00053 
00054 // Bunch of unicode info at:
00055 //              http://www.unicode.org/faq/utf_bom.html
00056 // Including the basic of this table, which determines the #bytes in the
00057 // sequence from the lead byte. 1 placed for invalid sequences --
00058 // although the result will be junk, pass it through as much as possible.
00059 // Beware of the non-characters in UTF-8:       
00060 //                              ef bb bf (Microsoft "lead bytes")
00061 //                              ef bf be
00062 //                              ef bf bf 
00063 
00064 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00065 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00066 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00067 
00068 const int TiXmlBase::utf8ByteTable[256] = 
00069 {
00070         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00071                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00072                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00073                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00074                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00075                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00080                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90 
00081                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0 
00082                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0 
00083                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00084                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00085                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00086                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00087 };
00088 
00089 
00090 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00091 {
00092         const unsigned long BYTE_MASK = 0xBF;
00093         const unsigned long BYTE_MARK = 0x80;
00094         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00095 
00096         if (input < 0x80) 
00097                 *length = 1;
00098         else if ( input < 0x800 )
00099                 *length = 2;
00100         else if ( input < 0x10000 )
00101                 *length = 3;
00102         else if ( input < 0x200000 )
00103                 *length = 4;
00104         else
00105                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00106 
00107         output += *length;
00108 
00109         // Scary scary fall throughs.
00110         switch (*length) 
00111         {
00112                 case 4:
00113                         --output; 
00114                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00115                         input >>= 6;
00116                 case 3:
00117                         --output; 
00118                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00119                         input >>= 6;
00120                 case 2:
00121                         --output; 
00122                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00123                         input >>= 6;
00124                 case 1:
00125                         --output; 
00126                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00127         }
00128 }
00129 
00130 
00131 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00132 {
00133         // This will only work for low-ascii, everything else is assumed to be a valid
00134         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00135         // to figure out alhabetical vs. not across encoding. So take a very 
00136         // conservative approach.
00137 
00138 //      if ( encoding == TIXML_ENCODING_UTF8 )
00139 //      {
00140                 if ( anyByte < 127 )
00141                         return isalpha( anyByte );
00142                 else
00143                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00144 //      }
00145 //      else
00146 //      {
00147 //              return isalpha( anyByte );
00148 //      }
00149 }
00150 
00151 
00152 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00153 {
00154         // This will only work for low-ascii, everything else is assumed to be a valid
00155         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00156         // to figure out alhabetical vs. not across encoding. So take a very 
00157         // conservative approach.
00158 
00159 //      if ( encoding == TIXML_ENCODING_UTF8 )
00160 //      {
00161                 if ( anyByte < 127 )
00162                         return isalnum( anyByte );
00163                 else
00164                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00165 //      }
00166 //      else
00167 //      {
00168 //              return isalnum( anyByte );
00169 //      }
00170 }
00171 
00172 
00173 class TiXmlParsingData
00174 {
00175         friend class TiXmlDocument;
00176   public:
00177         void Stamp( const char* now, TiXmlEncoding encoding );
00178 
00179         const TiXmlCursor& Cursor()     { return cursor; }
00180 
00181   private:
00182         // Only used by the document!
00183         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00184         {
00185                 assert( start );
00186                 stamp = start;
00187                 tabsize = _tabsize;
00188                 cursor.row = row;
00189                 cursor.col = col;
00190         }
00191 
00192         TiXmlCursor             cursor;
00193         const char*             stamp;
00194         int                             tabsize;
00195 };
00196 
00197 
00198 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00199 {
00200         assert( now );
00201 
00202         // Do nothing if the tabsize is 0.
00203         if ( tabsize < 1 )
00204         {
00205                 return;
00206         }
00207 
00208         // Get the current row, column.
00209         int row = cursor.row;
00210         int col = cursor.col;
00211         const char* p = stamp;
00212         assert( p );
00213 
00214         while ( p < now )
00215         {
00216                 // Treat p as unsigned, so we have a happy compiler.
00217                 const unsigned char* pU = (const unsigned char*)p;
00218 
00219                 // Code contributed by Fletcher Dunn: (modified by lee)
00220                 switch (*pU) {
00221                         case 0:
00222                                 // We *should* never get here, but in case we do, don't
00223                                 // advance past the terminating null character, ever
00224                                 return;
00225 
00226                         case '\r':
00227                                 // bump down to the next line
00228                                 ++row;
00229                                 col = 0;                                
00230                                 // Eat the character
00231                                 ++p;
00232 
00233                                 // Check for \r\n sequence, and treat this as a single character
00234                                 if (*p == '\n') {
00235                                         ++p;
00236                                 }
00237                                 break;
00238 
00239                         case '\n':
00240                                 // bump down to the next line
00241                                 ++row;
00242                                 col = 0;
00243 
00244                                 // Eat the character
00245                                 ++p;
00246 
00247                                 // Check for \n\r sequence, and treat this as a single
00248                                 // character.  (Yes, this bizarre thing does occur still
00249                                 // on some arcane platforms...)
00250                                 if (*p == '\r') {
00251                                         ++p;
00252                                 }
00253                                 break;
00254 
00255                         case '\t':
00256                                 // Eat the character
00257                                 ++p;
00258 
00259                                 // Skip to next tab stop
00260                                 col = (col / tabsize + 1) * tabsize;
00261                                 break;
00262 
00263                         case TIXML_UTF_LEAD_0:
00264                                 if ( encoding == TIXML_ENCODING_UTF8 )
00265                                 {
00266                                         if ( *(p+1) && *(p+2) )
00267                                         {
00268                                                 // In these cases, don't advance the column. These are
00269                                                 // 0-width spaces.
00270                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00271                                                         p += 3; 
00272                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00273                                                         p += 3; 
00274                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00275                                                         p += 3; 
00276                                                 else
00277                                                         { p +=3; ++col; }       // A normal character.
00278                                         }
00279                                 }
00280                                 else
00281                                 {
00282                                         ++p;
00283                                         ++col;
00284                                 }
00285                                 break;
00286 
00287                         default:
00288                                 if ( encoding == TIXML_ENCODING_UTF8 )
00289                                 {
00290                                         // Eat the 1 to 4 byte utf8 character.
00291                                         int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
00292                                         if ( step == 0 )
00293                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00294                                         p += step;
00295 
00296                                         // Just advance one column, of course.
00297                                         ++col;
00298                                 }
00299                                 else
00300                                 {
00301                                         ++p;
00302                                         ++col;
00303                                 }
00304                                 break;
00305                 }
00306         }
00307         cursor.row = row;
00308         cursor.col = col;
00309         assert( cursor.row >= -1 );
00310         assert( cursor.col >= -1 );
00311         stamp = p;
00312         assert( stamp );
00313 }
00314 
00315 
00316 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00317 {
00318         if ( !p || !*p )
00319         {
00320                 return 0;
00321         }
00322         if ( encoding == TIXML_ENCODING_UTF8 )
00323         {
00324                 while ( *p )
00325                 {
00326                         const unsigned char* pU = (const unsigned char*)p;
00327                         
00328                         // Skip the stupid Microsoft UTF-8 Byte order marks
00329                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00330                                  && *(pU+1)==TIXML_UTF_LEAD_1 
00331                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00332                         {
00333                                 p += 3;
00334                                 continue;
00335                         }
00336                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00337                                  && *(pU+1)==0xbfU
00338                                  && *(pU+2)==0xbeU )
00339                         {
00340                                 p += 3;
00341                                 continue;
00342                         }
00343                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00344                                  && *(pU+1)==0xbfU
00345                                  && *(pU+2)==0xbfU )
00346                         {
00347                                 p += 3;
00348                                 continue;
00349                         }
00350 
00351                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00352                                 ++p;
00353                         else
00354                                 break;
00355                 }
00356         }
00357         else
00358         {
00359                 while ( *p && (IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ))
00360                         ++p;
00361         }
00362 
00363         return p;
00364 }
00365 
00366 #ifdef TIXML_USE_STL
00367 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00368 {
00369         for( ;; )
00370         {
00371                 if ( !in->good() ) return false;
00372 
00373                 int c = in->peek();
00374                 // At this scope, we can't get to a document. So fail silently.
00375                 if ( !IsWhiteSpace( c ) || c <= 0 )
00376                         return true;
00377 
00378                 *tag += (char) in->get();
00379         }
00380 }
00381 
00382 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00383 {
00384         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00385         while ( in->good() )
00386         {
00387                 int c = in->peek();
00388                 if ( c == character )
00389                         return true;
00390                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00391                         return false;
00392 
00393                 in->get();
00394                 *tag += (char) c;
00395         }
00396         return false;
00397 }
00398 #endif
00399 
00400 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00401 // "assign" optimization removes over 10% of the execution time.
00402 //
00403 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00404 {
00405         // Oddly, not supported on some comilers,
00406         //name->clear();
00407         // So use this:
00408         *name = "";
00409         assert( p );
00410 
00411         // Names start with letters or underscores.
00412         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00413         // algorithm is generous.
00414         //
00415         // After that, they can be letters, underscores, numbers,
00416         // hyphens, or colons. (Colons are valid ony for namespaces,
00417         // but tinyxml can't tell namespaces from names.)
00418         if (    p && *p 
00419                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00420         {
00421                 const char* start = p;
00422                 while(          p && *p
00423                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding ) 
00424                                                  || *p == '_'
00425                                                  || *p == '-'
00426                                                  || *p == '.'
00427                                                  || *p == ':' ) )
00428                 {
00429                         //(*name) += *p; // expensive
00430                         ++p;
00431                 }
00432                 if ( p-start > 0 ) {
00433                         name->assign( start, p-start );
00434                 }
00435                 return p;
00436         }
00437         return 0;
00438 }
00439 
00440 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00441 {
00442         // Presume an entity, and pull it out.
00443     TIXML_STRING ent;
00444         int i;
00445         *length = 0;
00446 
00447         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00448         {
00449                 unsigned long ucs = 0;
00450                 ptrdiff_t delta = 0;
00451                 unsigned mult = 1;
00452 
00453                 if ( *(p+2) == 'x' )
00454                 {
00455                         // Hexadecimal.
00456                         if ( !*(p+3) ) return 0;
00457 
00458                         const char* q = p+3;
00459                         q = strchr( q, ';' );
00460 
00461                         if ( !q || !*q ) return 0;
00462 
00463                         delta = q-p;
00464                         --q;
00465 
00466                         while ( *q != 'x' )
00467                         {
00468                                 if ( *q >= '0' && *q <= '9' )
00469                                         ucs += mult * (*q - '0');
00470                                 else if ( *q >= 'a' && *q <= 'f' )
00471                                         ucs += mult * (*q - 'a' + 10);
00472                                 else if ( *q >= 'A' && *q <= 'F' )
00473                                         ucs += mult * (*q - 'A' + 10 );
00474                                 else 
00475                                         return 0;
00476                                 mult *= 16;
00477                                 --q;
00478                         }
00479                 }
00480                 else
00481                 {
00482                         // Decimal.
00483                         if ( !*(p+2) ) return 0;
00484 
00485                         const char* q = p+2;
00486                         q = strchr( q, ';' );
00487 
00488                         if ( !q || !*q ) return 0;
00489 
00490                         delta = q-p;
00491                         --q;
00492 
00493                         while ( *q != '#' )
00494                         {
00495                                 if ( *q >= '0' && *q <= '9' )
00496                                         ucs += mult * (*q - '0');
00497                                 else 
00498                                         return 0;
00499                                 mult *= 10;
00500                                 --q;
00501                         }
00502                 }
00503                 if ( encoding == TIXML_ENCODING_UTF8 )
00504                 {
00505                         // convert the UCS to UTF-8
00506                         ConvertUTF32ToUTF8( ucs, value, length );
00507                 }
00508                 else
00509                 {
00510                         *value = (char)ucs;
00511                         *length = 1;
00512                 }
00513                 return p + delta + 1;
00514         }
00515 
00516         // Now try to match it.
00517         for( i=0; i<NUM_ENTITY; ++i )
00518         {
00519                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00520                 {
00521                         assert( strlen( entity[i].str ) == entity[i].strLength );
00522                         *value = entity[i].chr;
00523                         *length = 1;
00524                         return ( p + entity[i].strLength );
00525                 }
00526         }
00527 
00528         // So it wasn't an entity, its unrecognized, or something like that.
00529         *value = *p;    // Don't put back the last one, since we return it!
00530         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00531                                         // Just writes strange XML.
00532         return p+1;
00533 }
00534 
00535 
00536 bool TiXmlBase::StringEqual( const char* p,
00537                                                          const char* tag,
00538                                                          bool ignoreCase,
00539                                                          TiXmlEncoding encoding )
00540 {
00541         assert( p );
00542         assert( tag );
00543         if ( !p || !*p )
00544         {
00545                 assert( 0 );
00546                 return false;
00547         }
00548 
00549         const char* q = p;
00550 
00551         if ( ignoreCase )
00552         {
00553                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00554                 {
00555                         ++q;
00556                         ++tag;
00557                 }
00558 
00559                 if ( *tag == 0 )
00560                         return true;
00561         }
00562         else
00563         {
00564                 while ( *q && *tag && *q == *tag )
00565                 {
00566                         ++q;
00567                         ++tag;
00568                 }
00569 
00570                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00571                         return true;
00572         }
00573         return false;
00574 }
00575 
00576 const char* TiXmlBase::ReadText(        const char* p, 
00577                                                                         TIXML_STRING * text, 
00578                                                                         bool trimWhiteSpace, 
00579                                                                         const char* endTag, 
00580                                                                         bool caseInsensitive,
00581                                                                         TiXmlEncoding encoding )
00582 {
00583     *text = "";
00584         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00585                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00586         {
00587                 // Keep all the white space.
00588                 while (    p && *p
00589                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00590                           )
00591                 {
00592                         int len;
00593                         char cArr[4] = { 0, 0, 0, 0 };
00594                         p = GetChar( p, cArr, &len, encoding );
00595                         text->append( cArr, len );
00596                 }
00597         }
00598         else
00599         {
00600                 bool whitespace = false;
00601 
00602                 // Remove leading white space:
00603                 p = SkipWhiteSpace( p, encoding );
00604                 while (    p && *p
00605                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00606                 {
00607                         if ( *p == '\r' || *p == '\n' )
00608                         {
00609                                 whitespace = true;
00610                                 ++p;
00611                         }
00612                         else if ( IsWhiteSpace( *p ) )
00613                         {
00614                                 whitespace = true;
00615                                 ++p;
00616                         }
00617                         else
00618                         {
00619                                 // If we've found whitespace, add it before the
00620                                 // new character. Any whitespace just becomes a space.
00621                                 if ( whitespace )
00622                                 {
00623                                         (*text) += ' ';
00624                                         whitespace = false;
00625                                 }
00626                                 int len;
00627                                 char cArr[4] = { 0, 0, 0, 0 };
00628                                 p = GetChar( p, cArr, &len, encoding );
00629                                 if ( len == 1 )
00630                                         (*text) += cArr[0];     // more efficient
00631                                 else
00632                                         text->append( cArr, len );
00633                         }
00634                 }
00635         }
00636         if ( p ) 
00637                 p += strlen( endTag );
00638         return p;
00639 }
00640 
00641 #ifdef TIXML_USE_STL
00642 
00643 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00644 {
00645         // The basic issue with a document is that we don't know what we're
00646         // streaming. Read something presumed to be a tag (and hope), then
00647         // identify it, and call the appropriate stream method on the tag.
00648         //
00649         // This "pre-streaming" will never read the closing ">" so the
00650         // sub-tag can orient itself.
00651 
00652         if ( !StreamTo( in, '<', tag ) ) 
00653         {
00654                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00655                 return;
00656         }
00657 
00658         while ( in->good() )
00659         {
00660                 int tagIndex = (int) tag->length();
00661                 while ( in->good() && in->peek() != '>' )
00662                 {
00663                         int c = in->get();
00664                         if ( c <= 0 )
00665                         {
00666                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00667                                 break;
00668                         }
00669                         (*tag) += (char) c;
00670                 }
00671 
00672                 if ( in->good() )
00673                 {
00674                         // We now have something we presume to be a node of 
00675                         // some sort. Identify it, and call the node to
00676                         // continue streaming.
00677                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00678 
00679                         if ( node )
00680                         {
00681                                 node->StreamIn( in, tag );
00682                                 bool isElement = node->ToElement() != 0;
00683                                 delete node;
00684                                 node = 0;
00685 
00686                                 // If this is the root element, we're done. Parsing will be
00687                                 // done by the >> operator.
00688                                 if ( isElement )
00689                                 {
00690                                         return;
00691                                 }
00692                         }
00693                         else
00694                         {
00695                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00696                                 return;
00697                         }
00698                 }
00699         }
00700         // We should have returned sooner.
00701         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00702 }
00703 
00704 #endif
00705 
00706 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00707 {
00708         ClearError();
00709 
00710         // Parse away, at the document level. Since a document
00711         // contains nothing but other tags, most of what happens
00712         // here is skipping white space.
00713         if ( !p || !*p )
00714         {
00715                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00716                 return 0;
00717         }
00718 
00719         // Note that, for a document, this needs to come
00720         // before the while space skip, so that parsing
00721         // starts from the pointer we are given.
00722         location.Clear();
00723         if ( prevData )
00724         {
00725                 location.row = prevData->cursor.row;
00726                 location.col = prevData->cursor.col;
00727         }
00728         else
00729         {
00730                 location.row = 0;
00731                 location.col = 0;
00732         }
00733         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00734         location = data.Cursor();
00735 
00736         if ( encoding == TIXML_ENCODING_UNKNOWN )
00737         {
00738                 // Check for the Microsoft UTF-8 lead bytes.
00739                 const unsigned char* pU = (const unsigned char*)p;
00740                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00741                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00742                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00743                 {
00744                         encoding = TIXML_ENCODING_UTF8;
00745                         useMicrosoftBOM = true;
00746                 }
00747         }
00748 
00749     p = SkipWhiteSpace( p, encoding );
00750         if ( !p )
00751         {
00752                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00753                 return 0;
00754         }
00755 
00756         while ( p && *p )
00757         {
00758                 TiXmlNode* node = Identify( p, encoding );
00759                 if ( node )
00760                 {
00761                         p = node->Parse( p, &data, encoding );
00762                         LinkEndChild( node );
00763                 }
00764                 else
00765                 {
00766                         break;
00767                 }
00768 
00769                 // Did we get encoding info?
00770                 if (    encoding == TIXML_ENCODING_UNKNOWN
00771                          && node->ToDeclaration() )
00772                 {
00773                         TiXmlDeclaration* dec = node->ToDeclaration();
00774                         const char* enc = dec->Encoding();
00775                         assert( enc );
00776 
00777                         if ( *enc == 0 )
00778                                 encoding = TIXML_ENCODING_UTF8;
00779                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00780                                 encoding = TIXML_ENCODING_UTF8;
00781                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00782                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00783                         else 
00784                                 encoding = TIXML_ENCODING_LEGACY;
00785                 }
00786 
00787                 p = SkipWhiteSpace( p, encoding );
00788         }
00789 
00790         // Was this empty?
00791         if ( !firstChild ) {
00792                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00793                 return 0;
00794         }
00795 
00796         // All is well.
00797         return p;
00798 }
00799 
00800 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00801 {       
00802         // The first error in a chain is more accurate - don't set again!
00803         if ( error )
00804                 return;
00805 
00806         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00807         error   = true;
00808         errorId = err;
00809         errorDesc = errorString[ errorId ];
00810 
00811         errorLocation.Clear();
00812         if ( pError && data )
00813         {
00814                 data->Stamp( pError, encoding );
00815                 errorLocation = data->Cursor();
00816         }
00817 }
00818 
00819 
00820 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00821 {
00822         TiXmlNode* returnNode = 0;
00823 
00824         p = SkipWhiteSpace( p, encoding );
00825         if( !p || !*p || *p != '<' )
00826         {
00827                 return 0;
00828         }
00829 
00830         TiXmlDocument* doc = GetDocument();
00831         p = SkipWhiteSpace( p, encoding );
00832 
00833         if ( !p || !*p )
00834         {
00835                 return 0;
00836         }
00837 
00838         // What is this thing? 
00839         // - Elements start with a letter or underscore, but xml is reserved.
00840         // - Comments: <!--
00841         // - Decleration: <?xml
00842         // - Everthing else is unknown to tinyxml.
00843         //
00844 
00845         const char* xmlHeader = { "<?xml" };
00846         const char* commentHeader = { "<!--" };
00847         const char* dtdHeader = { "<!" };
00848         const char* cdataHeader = { "<![CDATA[" };
00849 
00850         if ( StringEqual( p, xmlHeader, true, encoding ) )
00851         {
00852                 #ifdef DEBUG_PARSER
00853                         TIXML_LOG( "XML parsing Declaration\n" );
00854                 #endif
00855                 returnNode = new TiXmlDeclaration();
00856         }
00857         else if ( StringEqual( p, commentHeader, false, encoding ) )
00858         {
00859                 #ifdef DEBUG_PARSER
00860                         TIXML_LOG( "XML parsing Comment\n" );
00861                 #endif
00862                 returnNode = new TiXmlComment();
00863         }
00864         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00865         {
00866                 #ifdef DEBUG_PARSER
00867                         TIXML_LOG( "XML parsing CDATA\n" );
00868                 #endif
00869                 TiXmlText* text = new TiXmlText( "" );
00870                 text->SetCDATA( true );
00871                 returnNode = text;
00872         }
00873         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00874         {
00875                 #ifdef DEBUG_PARSER
00876                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00877                 #endif
00878                 returnNode = new TiXmlUnknown();
00879         }
00880         else if (    IsAlpha( *(p+1), encoding )
00881                           || *(p+1) == '_' )
00882         {
00883                 #ifdef DEBUG_PARSER
00884                         TIXML_LOG( "XML parsing Element\n" );
00885                 #endif
00886                 returnNode = new TiXmlElement( "" );
00887         }
00888         else
00889         {
00890                 #ifdef DEBUG_PARSER
00891                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00892                 #endif
00893                 returnNode = new TiXmlUnknown();
00894         }
00895 
00896         if ( returnNode )
00897         {
00898                 // Set the parent, so it can report errors
00899                 returnNode->parent = this;
00900         }
00901         else
00902         {
00903                 if ( doc )
00904                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00905         }
00906         return returnNode;
00907 }
00908 
00909 #ifdef TIXML_USE_STL
00910 
00911 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00912 {
00913         // We're called with some amount of pre-parsing. That is, some of "this"
00914         // element is in "tag". Go ahead and stream to the closing ">"
00915         while( in->good() )
00916         {
00917                 int c = in->get();
00918                 if ( c <= 0 )
00919                 {
00920                         TiXmlDocument* document = GetDocument();
00921                         if ( document )
00922                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00923                         return;
00924                 }
00925                 (*tag) += (char) c ;
00926                 
00927                 if ( c == '>' )
00928                         break;
00929         }
00930 
00931         if ( tag->length() < 3 ) return;
00932 
00933         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00934         // If not, identify and stream.
00935 
00936         if (    tag->at( tag->length() - 1 ) == '>' 
00937                  && tag->at( tag->length() - 2 ) == '/' )
00938         {
00939                 // All good!
00940                 return;
00941         }
00942         else if ( tag->at( tag->length() - 1 ) == '>' )
00943         {
00944                 // There is more. Could be:
00945                 //              text
00946                 //              cdata text (which looks like another node)
00947                 //              closing tag
00948                 //              another node.
00949                 for ( ;; )
00950                 {
00951                         StreamWhiteSpace( in, tag );
00952 
00953                         // Do we have text?
00954                         if ( in->good() && in->peek() != '<' ) 
00955                         {
00956                                 // Yep, text.
00957                                 TiXmlText text( "" );
00958                                 text.StreamIn( in, tag );
00959 
00960                                 // What follows text is a closing tag or another node.
00961                                 // Go around again and figure it out.
00962                                 continue;
00963                         }
00964 
00965                         // We now have either a closing tag...or another node.
00966                         // We should be at a "<", regardless.
00967                         if ( !in->good() ) return;
00968                         assert( in->peek() == '<' );
00969                         int tagIndex = (int) tag->length();
00970 
00971                         bool closingTag = false;
00972                         bool firstCharFound = false;
00973 
00974                         for( ;; )
00975                         {
00976                                 if ( !in->good() )
00977                                         return;
00978 
00979                                 int c = in->peek();
00980                                 if ( c <= 0 )
00981                                 {
00982                                         TiXmlDocument* document = GetDocument();
00983                                         if ( document )
00984                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00985                                         return;
00986                                 }
00987                                 
00988                                 if ( c == '>' )
00989                                         break;
00990 
00991                                 *tag += (char) c;
00992                                 in->get();
00993 
00994                                 // Early out if we find the CDATA id.
00995                                 if ( c == '[' && tag->size() >= 9 )
00996                                 {
00997                                         size_t len = tag->size();
00998                                         const char* start = tag->c_str() + len - 9;
00999                                         if ( strcmp( start, "<![CDATA[" ) == 0 ) {
01000                                                 assert( !closingTag );
01001                                                 break;
01002                                         }
01003                                 }
01004 
01005                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01006                                 {
01007                                         firstCharFound = true;
01008                                         if ( c == '/' )
01009                                                 closingTag = true;
01010                                 }
01011                         }
01012                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01013                         // If it was not, the streaming will be done by the tag.
01014                         if ( closingTag )
01015                         {
01016                                 if ( !in->good() )
01017                                         return;
01018 
01019                                 int c = in->get();
01020                                 if ( c <= 0 )
01021                                 {
01022                                         TiXmlDocument* document = GetDocument();
01023                                         if ( document )
01024                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01025                                         return;
01026                                 }
01027                                 assert( c == '>' );
01028                                 *tag += (char) c;
01029 
01030                                 // We are done, once we've found our closing tag.
01031                                 return;
01032                         }
01033                         else
01034                         {
01035                                 // If not a closing tag, id it, and stream.
01036                                 const char* tagloc = tag->c_str() + tagIndex;
01037                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01038                                 if ( !node )
01039                                         return;
01040                                 node->StreamIn( in, tag );
01041                                 delete node;
01042                                 node = 0;
01043 
01044                                 // No return: go around from the beginning: text, closing tag, or node.
01045                         }
01046                 }
01047         }
01048 }
01049 #endif
01050 
01051 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01052 {
01053         p = SkipWhiteSpace( p, encoding );
01054         TiXmlDocument* document = GetDocument();
01055 
01056         if ( !p || !*p )
01057         {
01058                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01059                 return 0;
01060         }
01061 
01062         if ( data )
01063         {
01064                 data->Stamp( p, encoding );
01065                 location = data->Cursor();
01066         }
01067 
01068         if ( *p != '<' )
01069         {
01070                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01071                 return 0;
01072         }
01073 
01074         p = SkipWhiteSpace( p+1, encoding );
01075 
01076         // Read the name.
01077         const char* pErr = p;
01078 
01079     p = ReadName( p, &value, encoding );
01080         if ( !p || !*p )
01081         {
01082                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01083                 return 0;
01084         }
01085 
01086     TIXML_STRING endTag ("</");
01087         endTag += value;
01088         endTag += ">";
01089 
01090         // Check for and read attributes. Also look for an empty
01091         // tag or an end tag.
01092         while ( p && *p )
01093         {
01094                 pErr = p;
01095                 p = SkipWhiteSpace( p, encoding );
01096                 if ( !p || !*p )
01097                 {
01098                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01099                         return 0;
01100                 }
01101                 if ( *p == '/' )
01102                 {
01103                         ++p;
01104                         // Empty tag.
01105                         if ( *p  != '>' )
01106                         {
01107                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );             
01108                                 return 0;
01109                         }
01110                         return (p+1);
01111                 }
01112                 else if ( *p == '>' )
01113                 {
01114                         // Done with attributes (if there were any.)
01115                         // Read the value -- which can include other
01116                         // elements -- read the end tag, and return.
01117                         ++p;
01118                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01119                         if ( !p || !*p ) {
01120                                 // We were looking for the end tag, but found nothing.
01121                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01122                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01123                                 return 0;
01124                         }
01125 
01126                         // We should find the end tag now
01127                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01128                         {
01129                                 p += endTag.length();
01130                                 return p;
01131                         }
01132                         else
01133                         {
01134                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01135                                 return 0;
01136                         }
01137                 }
01138                 else
01139                 {
01140                         // Try to read an attribute:
01141                         TiXmlAttribute* attrib = new TiXmlAttribute();
01142                         if ( !attrib )
01143                         {
01144                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01145                                 return 0;
01146                         }
01147 
01148                         attrib->SetDocument( document );
01149                         pErr = p;
01150                         p = attrib->Parse( p, data, encoding );
01151 
01152                         if ( !p || !*p )
01153                         {
01154                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01155                                 delete attrib;
01156                                 return 0;
01157                         }
01158 
01159                         // Handle the strange case of double attributes:
01160                         #ifdef TIXML_USE_STL
01161                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01162                         #else
01163                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01164                         #endif
01165                         if ( node )
01166                         {
01167                                 node->SetValue( attrib->Value() );
01168                                 delete attrib;
01169                                 return 0;
01170                         }
01171 
01172                         attributeSet.Add( attrib );
01173                 }
01174         }
01175         return p;
01176 }
01177 
01178 
01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01180 {
01181         TiXmlDocument* document = GetDocument();
01182 
01183         // Read in text and elements in any order.
01184         const char* pWithWhiteSpace = p;
01185         p = SkipWhiteSpace( p, encoding );
01186 
01187         while ( p && *p )
01188         {
01189                 if ( *p != '<' )
01190                 {
01191                         // Take what we have, make a text element.
01192                         TiXmlText* textNode = new TiXmlText( "" );
01193 
01194                         if ( !textNode )
01195                         {
01196                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01197                                     return 0;
01198                         }
01199 
01200                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01201                         {
01202                                 p = textNode->Parse( p, data, encoding );
01203                         }
01204                         else
01205                         {
01206                                 // Special case: we want to keep the white space
01207                                 // so that leading spaces aren't removed.
01208                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01209                         }
01210 
01211                         if ( !textNode->Blank() )
01212                                 LinkEndChild( textNode );
01213                         else
01214                                 delete textNode;
01215                 } 
01216                 else 
01217                 {
01218                         // We hit a '<'
01219                         // Have we hit a new element or an end tag? This could also be
01220                         // a TiXmlText in the "CDATA" style.
01221                         if ( StringEqual( p, "</", false, encoding ) )
01222                         {
01223                                 return p;
01224                         }
01225                         else
01226                         {
01227                                 TiXmlNode* node = Identify( p, encoding );
01228                                 if ( node )
01229                                 {
01230                                         p = node->Parse( p, data, encoding );
01231                                         LinkEndChild( node );
01232                                 }                               
01233                                 else
01234                                 {
01235                                         return 0;
01236                                 }
01237                         }
01238                 }
01239                 pWithWhiteSpace = p;
01240                 p = SkipWhiteSpace( p, encoding );
01241         }
01242 
01243         if ( !p )
01244         {
01245                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01246         }       
01247         return p;
01248 }
01249 
01250 
01251 #ifdef TIXML_USE_STL
01252 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01253 {
01254         while ( in->good() )
01255         {
01256                 int c = in->get();      
01257                 if ( c <= 0 )
01258                 {
01259                         TiXmlDocument* document = GetDocument();
01260                         if ( document )
01261                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01262                         return;
01263                 }
01264                 (*tag) += (char) c;
01265 
01266                 if ( c == '>' )
01267                 {
01268                         // All is well.
01269                         return;         
01270                 }
01271         }
01272 }
01273 #endif
01274 
01275 
01276 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01277 {
01278         TiXmlDocument* document = GetDocument();
01279         p = SkipWhiteSpace( p, encoding );
01280 
01281         if ( data )
01282         {
01283                 data->Stamp( p, encoding );
01284                 location = data->Cursor();
01285         }
01286         if ( !p || !*p || *p != '<' )
01287         {
01288                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01289                 return 0;
01290         }
01291         ++p;
01292     value = "";
01293 
01294         while ( p && *p && *p != '>' )
01295         {
01296                 value += *p;
01297                 ++p;
01298         }
01299 
01300         if ( !p )
01301         {
01302                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01303         }
01304         if ( *p == '>' )
01305                 return p+1;
01306         return p;
01307 }
01308 
01309 #ifdef TIXML_USE_STL
01310 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01311 {
01312         while ( in->good() )
01313         {
01314                 int c = in->get();      
01315                 if ( c <= 0 )
01316                 {
01317                         TiXmlDocument* document = GetDocument();
01318                         if ( document )
01319                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01320                         return;
01321                 }
01322 
01323                 (*tag) += (char) c;
01324 
01325                 if ( c == '>' 
01326                          && tag->at( tag->length() - 2 ) == '-'
01327                          && tag->at( tag->length() - 3 ) == '-' )
01328                 {
01329                         // All is well.
01330                         return;         
01331                 }
01332         }
01333 }
01334 #endif
01335 
01336 
01337 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01338 {
01339         TiXmlDocument* document = GetDocument();
01340         value = "";
01341 
01342         p = SkipWhiteSpace( p, encoding );
01343 
01344         if ( data )
01345         {
01346                 data->Stamp( p, encoding );
01347                 location = data->Cursor();
01348         }
01349         const char* startTag = "<!--";
01350         const char* endTag   = "-->";
01351 
01352         if ( !StringEqual( p, startTag, false, encoding ) )
01353         {
01354                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01355                 return 0;
01356         }
01357         p += strlen( startTag );
01358 
01359         // [ 1475201 ] TinyXML parses entities in comments
01360         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01361         // p = ReadText( p, &value, false, endTag, false, encoding );
01362         //
01363         // from the XML spec:
01364         /*
01365          [Definition: Comments may appear anywhere in a document outside other markup; in addition, 
01366                       they may appear within the document type declaration at places allowed by the grammar. 
01367                                   They are not part of the document's character data; an XML processor MAY, but need not, 
01368                                   make it possible for an application to retrieve the text of comments. For compatibility, 
01369                                   the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 
01370                                   references MUST NOT be recognized within comments.
01371 
01372                                   An example of a comment:
01373 
01374                                   <!-- declarations for <head> & <body> -->
01375         */
01376 
01377     value = "";
01378         // Keep all the white space.
01379         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01380         {
01381                 value.append( p, 1 );
01382                 ++p;
01383         }
01384         if ( p ) 
01385                 p += strlen( endTag );
01386 
01387         return p;
01388 }
01389 
01390 
01391 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01392 {
01393         p = SkipWhiteSpace( p, encoding );
01394         if ( !p || !*p ) return 0;
01395 
01396 //      int tabsize = 4;
01397 //      if ( document )
01398 //              tabsize = document->TabSize();
01399 
01400         if ( data )
01401         {
01402                 data->Stamp( p, encoding );
01403                 location = data->Cursor();
01404         }
01405         // Read the name, the '=' and the value.
01406         const char* pErr = p;
01407         p = ReadName( p, &name, encoding );
01408         if ( !p || !*p )
01409         {
01410                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01411                 return 0;
01412         }
01413         p = SkipWhiteSpace( p, encoding );
01414         if ( !p || !*p || *p != '=' )
01415         {
01416                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01417                 return 0;
01418         }
01419 
01420         ++p;    // skip '='
01421         p = SkipWhiteSpace( p, encoding );
01422         if ( !p || !*p )
01423         {
01424                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01425                 return 0;
01426         }
01427         
01428         const char* end;
01429         const char SINGLE_QUOTE = '\'';
01430         const char DOUBLE_QUOTE = '\"';
01431 
01432         if ( *p == SINGLE_QUOTE )
01433         {
01434                 ++p;
01435                 end = "\'";             // single quote in string
01436                 p = ReadText( p, &value, false, end, false, encoding );
01437         }
01438         else if ( *p == DOUBLE_QUOTE )
01439         {
01440                 ++p;
01441                 end = "\"";             // double quote in string
01442                 p = ReadText( p, &value, false, end, false, encoding );
01443         }
01444         else
01445         {
01446                 // All attribute values should be in single or double quotes.
01447                 // But this is such a common error that the parser will try
01448                 // its best, even without them.
01449                 value = "";
01450                 while (    p && *p                                                                                      // existence
01451                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01452                                 && *p != '/' && *p != '>' )                                                     // tag end
01453                 {
01454                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
01455                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01456                                 // We did not have an opening quote but seem to have a 
01457                                 // closing one. Give up and throw an error.
01458                                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01459                                 return 0;
01460                         }
01461                         value += *p;
01462                         ++p;
01463                 }
01464         }
01465         return p;
01466 }
01467 
01468 #ifdef TIXML_USE_STL
01469 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01470 {
01471         while ( in->good() )
01472         {
01473                 int c = in->peek();     
01474                 if ( !cdata && (c == '<' ) ) 
01475                 {
01476                         return;
01477                 }
01478                 if ( c <= 0 )
01479                 {
01480                         TiXmlDocument* document = GetDocument();
01481                         if ( document )
01482                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01483                         return;
01484                 }
01485 
01486                 (*tag) += (char) c;
01487                 in->get();      // "commits" the peek made above
01488 
01489                 if ( cdata && c == '>' && tag->size() >= 3 ) {
01490                         size_t len = tag->size();
01491                         if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01492                                 // terminator of cdata.
01493                                 return;
01494                         }
01495                 }    
01496         }
01497 }
01498 #endif
01499 
01500 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01501 {
01502         value = "";
01503         TiXmlDocument* document = GetDocument();
01504 
01505         if ( data )
01506         {
01507                 data->Stamp( p, encoding );
01508                 location = data->Cursor();
01509         }
01510 
01511         const char* const startTag = "<![CDATA[";
01512         const char* const endTag   = "]]>";
01513 
01514         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01515         {
01516                 cdata = true;
01517 
01518                 if ( !StringEqual( p, startTag, false, encoding ) )
01519                 {
01520                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01521                         return 0;
01522                 }
01523                 p += strlen( startTag );
01524 
01525                 // Keep all the white space, ignore the encoding, etc.
01526                 while (    p && *p
01527                                 && !StringEqual( p, endTag, false, encoding )
01528                           )
01529                 {
01530                         value += *p;
01531                         ++p;
01532                 }
01533 
01534                 TIXML_STRING dummy; 
01535                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01536                 return p;
01537         }
01538         else
01539         {
01540                 bool ignoreWhite = true;
01541 
01542                 const char* end = "<";
01543                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01544                 if ( p )
01545                         return p-1;     // don't truncate the '<'
01546                 return 0;
01547         }
01548 }
01549 
01550 #ifdef TIXML_USE_STL
01551 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01552 {
01553         while ( in->good() )
01554         {
01555                 int c = in->get();
01556                 if ( c <= 0 )
01557                 {
01558                         TiXmlDocument* document = GetDocument();
01559                         if ( document )
01560                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01561                         return;
01562                 }
01563                 (*tag) += (char) c;
01564 
01565                 if ( c == '>' )
01566                 {
01567                         // All is well.
01568                         return;
01569                 }
01570         }
01571 }
01572 #endif
01573 
01574 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01575 {
01576         p = SkipWhiteSpace( p, _encoding );
01577         // Find the beginning, find the end, and look for
01578         // the stuff in-between.
01579         TiXmlDocument* document = GetDocument();
01580         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01581         {
01582                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01583                 return 0;
01584         }
01585         if ( data )
01586         {
01587                 data->Stamp( p, _encoding );
01588                 location = data->Cursor();
01589         }
01590         p += 5;
01591 
01592         version = "";
01593         encoding = "";
01594         standalone = "";
01595 
01596         while ( p && *p )
01597         {
01598                 if ( *p == '>' )
01599                 {
01600                         ++p;
01601                         return p;
01602                 }
01603 
01604                 p = SkipWhiteSpace( p, _encoding );
01605                 if ( StringEqual( p, "version", true, _encoding ) )
01606                 {
01607                         TiXmlAttribute attrib;
01608                         p = attrib.Parse( p, data, _encoding );         
01609                         version = attrib.Value();
01610                 }
01611                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01612                 {
01613                         TiXmlAttribute attrib;
01614                         p = attrib.Parse( p, data, _encoding );         
01615                         encoding = attrib.Value();
01616                 }
01617                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01618                 {
01619                         TiXmlAttribute attrib;
01620                         p = attrib.Parse( p, data, _encoding );         
01621                         standalone = attrib.Value();
01622                 }
01623                 else
01624                 {
01625                         // Read over whatever it is.
01626                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01627                                 ++p;
01628                 }
01629         }
01630         return 0;
01631 }
01632 
01633 bool TiXmlText::Blank() const
01634 {
01635         for ( unsigned i=0; i<value.length(); i++ )
01636                 if ( !IsWhiteSpace( value[i] ) )
01637                         return false;
01638         return true;
01639 }
01640 
01641 }


rospack
Author(s): Brian Gerkey/gerkey@willowgarage.com, Morgan Quigley/mquigley@cs.stanford.edu
autogenerated on Fri Jan 3 2014 11:51:40