$search
00001 /*************************************************************************** 00002 tag: Peter Soetens do nov 2 13:06:01 CET 2006 tinyxmlparser.cpp 00003 00004 tinyxmlparser.cpp - description 00005 ------------------- 00006 begin : do november 02 2006 00007 copyright : (C) 2006 Peter Soetens 00008 email : peter.soetens@gmail.com 00009 00010 *************************************************************************** 00011 * This library is free software; you can redistribute it and/or * 00012 * modify it under the terms of the GNU General Public * 00013 * License as published by the Free Software Foundation; * 00014 * version 2 of the License. * 00015 * * 00016 * As a special exception, you may use this file as part of a free * 00017 * software library without restriction. Specifically, if other files * 00018 * instantiate templates or use macros or inline functions from this * 00019 * file, or you compile this file and link it with other files to * 00020 * produce an executable, this file does not by itself cause the * 00021 * resulting executable to be covered by the GNU General Public * 00022 * License. This exception does not however invalidate any other * 00023 * reasons why the executable file might be covered by the GNU General * 00024 * Public License. * 00025 * * 00026 * This library is distributed in the hope that it will be useful, * 00027 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00028 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * 00029 * Lesser General Public License for more details. * 00030 * * 00031 * You should have received a copy of the GNU General Public * 00032 * License along with this library; if not, write to the Free Software * 00033 * Foundation, Inc., 59 Temple Place, * 00034 * Suite 330, Boston, MA 02111-1307 USA * 00035 * * 00036 ***************************************************************************/ 00037 00038 00039 /* 00040 www.sourceforge.net/projects/tinyxml 00041 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) 00042 00043 This software is provided 'as-is', without any express or implied 00044 warranty. In no event will the authors be held liable for any 00045 damages arising from the use of this software. 00046 00047 Permission is granted to anyone to use this software for any 00048 purpose, including commercial applications, and to alter it and 00049 redistribute it freely, subject to the following restrictions: 00050 00051 1. The origin of this software must not be misrepresented; you must 00052 not claim that you wrote the original software. If you use this 00053 software in a product, an acknowledgment in the product documentation 00054 would be appreciated but is not required. 00055 00056 2. Altered source versions must be plainly marked as such, and 00057 must not be misrepresented as being the original software. 00058 00059 3. This notice may not be removed or altered from any source 00060 distribution. 00061 */ 00062 00063 #include "tinyxml.h" 00064 #include <ctype.h> 00065 #include <stddef.h> 00066 00067 //#define DEBUG_PARSER 00068 #if defined( DEBUG_PARSER ) 00069 # if defined( DEBUG ) && defined( _MSC_VER ) 00070 # include <windows.h> 00071 # define TIXML_LOG OutputDebugString 00072 # else 00073 # define TIXML_LOG printf 00074 # endif 00075 #endif 00076 00077 namespace RTT { namespace marsh { 00078 00079 // Note tha "PutString" hardcodes the same list. This 00080 // is less flexible than it appears. Changing the entries 00081 // or order will break putstring. 00082 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 00083 { 00084 { "&", 5, '&' }, 00085 { "<", 4, '<' }, 00086 { ">", 4, '>' }, 00087 { """, 6, '\"' }, 00088 { "'", 6, '\'' } 00089 }; 00090 00091 // Bunch of unicode info at: 00092 // http://www.unicode.org/faq/utf_bom.html 00093 // Including the basic of this table, which determines the #bytes in the 00094 // sequence from the lead byte. 1 placed for invalid sequences -- 00095 // although the result will be junk, pass it through as much as possible. 00096 // Beware of the non-characters in UTF-8: 00097 // ef bb bf (Microsoft "lead bytes") 00098 // ef bf be 00099 // ef bf bf 00100 00101 const unsigned char TIXML_UTF_LEAD_0 = 0xefU; 00102 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; 00103 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; 00104 00105 const int TiXmlBase::utf8ByteTable[256] = 00106 { 00107 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 00109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 00110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 00111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 00112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 00113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 00114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 00115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range 00116 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid 00117 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 00118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 00119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 00120 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte 00121 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 00122 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte 00123 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 00124 }; 00125 00126 00127 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) 00128 { 00129 const unsigned long BYTE_MASK = 0xBF; 00130 const unsigned long BYTE_MARK = 0x80; 00131 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 00132 00133 if (input < 0x80) 00134 *length = 1; 00135 else if ( input < 0x800 ) 00136 *length = 2; 00137 else if ( input < 0x10000 ) 00138 *length = 3; 00139 else if ( input < 0x200000 ) 00140 *length = 4; 00141 else 00142 { *length = 0; return; } // This code won't covert this correctly anyway. 00143 00144 output += *length; 00145 00146 // Scary scary fall throughs. 00147 switch (*length) 00148 { 00149 case 4: 00150 --output; 00151 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00152 input >>= 6; 00153 case 3: 00154 --output; 00155 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00156 input >>= 6; 00157 case 2: 00158 --output; 00159 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00160 input >>= 6; 00161 case 1: 00162 --output; 00163 *output = (char)(input | FIRST_BYTE_MARK[*length]); 00164 } 00165 } 00166 00167 00168 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 00169 { 00170 // This will only work for low-ascii, everything else is assumed to be a valid 00171 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00172 // to figure out alhabetical vs. not across encoding. So take a very 00173 // conservative approach. 00174 00175 // if ( encoding == TIXML_ENCODING_UTF8 ) 00176 // { 00177 if ( anyByte < 127 ) 00178 return isalpha( anyByte ); 00179 else 00180 return 1; // What else to do? The unicode set is huge...get the english ones right. 00181 // } 00182 // else 00183 // { 00184 // return isalpha( anyByte ); 00185 // } 00186 } 00187 00188 00189 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 00190 { 00191 // This will only work for low-ascii, everything else is assumed to be a valid 00192 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00193 // to figure out alhabetical vs. not across encoding. So take a very 00194 // conservative approach. 00195 00196 // if ( encoding == TIXML_ENCODING_UTF8 ) 00197 // { 00198 if ( anyByte < 127 ) 00199 return isalnum( anyByte ); 00200 else 00201 return 1; // What else to do? The unicode set is huge...get the english ones right. 00202 // } 00203 // else 00204 // { 00205 // return isalnum( anyByte ); 00206 // } 00207 } 00208 00209 00210 class TiXmlParsingData 00211 { 00212 friend class TiXmlDocument; 00213 public: 00214 void Stamp( const char* now, TiXmlEncoding encoding ); 00215 00216 const TiXmlCursor& Cursor() { return cursor; } 00217 00218 private: 00219 // Only used by the document! 00220 TiXmlParsingData( const char* start, int _tabsize, int row, int col ) 00221 { 00222 assert( start ); 00223 stamp = start; 00224 tabsize = _tabsize; 00225 cursor.row = row; 00226 cursor.col = col; 00227 } 00228 00229 TiXmlCursor cursor; 00230 const char* stamp; 00231 int tabsize; 00232 }; 00233 00234 00235 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) 00236 { 00237 assert( now ); 00238 00239 // Do nothing if the tabsize is 0. 00240 if ( tabsize < 1 ) 00241 { 00242 return; 00243 } 00244 00245 // Get the current row, column. 00246 int row = cursor.row; 00247 int col = cursor.col; 00248 const char* p = stamp; 00249 assert( p ); 00250 00251 while ( p < now ) 00252 { 00253 // Treat p as unsigned, so we have a happy compiler. 00254 const unsigned char* pU = (const unsigned char*)p; 00255 00256 // Code contributed by Fletcher Dunn: (modified by lee) 00257 switch (*pU) { 00258 case 0: 00259 // We *should* never get here, but in case we do, don't 00260 // advance past the terminating null character, ever 00261 return; 00262 00263 case '\r': 00264 // bump down to the next line 00265 ++row; 00266 col = 0; 00267 // Eat the character 00268 ++p; 00269 00270 // Check for \r\n sequence, and treat this as a single character 00271 if (*p == '\n') { 00272 ++p; 00273 } 00274 break; 00275 00276 case '\n': 00277 // bump down to the next line 00278 ++row; 00279 col = 0; 00280 00281 // Eat the character 00282 ++p; 00283 00284 // Check for \n\r sequence, and treat this as a single 00285 // character. (Yes, this bizarre thing does occur still 00286 // on some arcane platforms...) 00287 if (*p == '\r') { 00288 ++p; 00289 } 00290 break; 00291 00292 case '\t': 00293 // Eat the character 00294 ++p; 00295 00296 // Skip to next tab stop 00297 col = (col / tabsize + 1) * tabsize; 00298 break; 00299 00300 case TIXML_UTF_LEAD_0: 00301 if ( encoding == TIXML_ENCODING_UTF8 ) 00302 { 00303 if ( *(p+1) && *(p+2) ) 00304 { 00305 // In these cases, don't advance the column. These are 00306 // 0-width spaces. 00307 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 ) 00308 p += 3; 00309 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU ) 00310 p += 3; 00311 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU ) 00312 p += 3; 00313 else 00314 { p +=3; ++col; } // A normal character. 00315 } 00316 } 00317 else 00318 { 00319 ++p; 00320 ++col; 00321 } 00322 break; 00323 00324 default: 00325 if ( encoding == TIXML_ENCODING_UTF8 ) 00326 { 00327 // Eat the 1 to 4 byte utf8 character. 00328 int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)]; 00329 if ( step == 0 ) 00330 step = 1; // Error case from bad encoding, but handle gracefully. 00331 p += step; 00332 00333 // Just advance one column, of course. 00334 ++col; 00335 } 00336 else 00337 { 00338 ++p; 00339 ++col; 00340 } 00341 break; 00342 } 00343 } 00344 cursor.row = row; 00345 cursor.col = col; 00346 assert( cursor.row >= -1 ); 00347 assert( cursor.col >= -1 ); 00348 stamp = p; 00349 assert( stamp ); 00350 } 00351 00352 00353 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) 00354 { 00355 if ( !p || !*p ) 00356 { 00357 return 0; 00358 } 00359 if ( encoding == TIXML_ENCODING_UTF8 ) 00360 { 00361 while ( *p ) 00362 { 00363 const unsigned char* pU = (const unsigned char*)p; 00364 00365 // Skip the stupid Microsoft UTF-8 Byte order marks 00366 if ( *(pU+0)==TIXML_UTF_LEAD_0 00367 && *(pU+1)==TIXML_UTF_LEAD_1 00368 && *(pU+2)==TIXML_UTF_LEAD_2 ) 00369 { 00370 p += 3; 00371 continue; 00372 } 00373 else if(*(pU+0)==TIXML_UTF_LEAD_0 00374 && *(pU+1)==0xbfU 00375 && *(pU+2)==0xbeU ) 00376 { 00377 p += 3; 00378 continue; 00379 } 00380 else if(*(pU+0)==TIXML_UTF_LEAD_0 00381 && *(pU+1)==0xbfU 00382 && *(pU+2)==0xbfU ) 00383 { 00384 p += 3; 00385 continue; 00386 } 00387 00388 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space. 00389 ++p; 00390 else 00391 break; 00392 } 00393 } 00394 else 00395 { 00396 while ( *p && (IsWhiteSpace( *p ) || *p == '\n' || *p =='\r') ) 00397 ++p; 00398 } 00399 00400 return p; 00401 } 00402 00403 #ifdef TIXML_USE_STL 00404 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag ) 00405 { 00406 for( ;; ) 00407 { 00408 if ( !in->good() ) return false; 00409 00410 int c = in->peek(); 00411 // At this scope, we can't get to a document. So fail silently. 00412 if ( !IsWhiteSpace( c ) || c <= 0 ) 00413 return true; 00414 00415 *tag += (char) in->get(); 00416 } 00417 } 00418 00419 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag ) 00420 { 00421 //assert( character > 0 && character < 128 ); // else it won't work in utf-8 00422 while ( in->good() ) 00423 { 00424 int c = in->peek(); 00425 if ( c == character ) 00426 return true; 00427 if ( c <= 0 ) // Silent failure: can't get document at this scope 00428 return false; 00429 00430 in->get(); 00431 *tag += (char) c; 00432 } 00433 return false; 00434 } 00435 #endif 00436 00437 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) 00438 { 00439 *name = ""; 00440 assert( p ); 00441 00442 // Names start with letters or underscores. 00443 // Of course, in unicode, tinyxml has no idea what a letter *is*. The 00444 // algorithm is generous. 00445 // 00446 // After that, they can be letters, underscores, numbers, 00447 // hyphens, or colons. (Colons are valid ony for namespaces, 00448 // but tinyxml can't tell namespaces from names.) 00449 if ( p && *p 00450 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) 00451 { 00452 while( p && *p 00453 && ( IsAlphaNum( (unsigned char ) *p, encoding ) 00454 || *p == '_' 00455 || *p == '-' 00456 || *p == '.' 00457 || *p == ':' ) ) 00458 { 00459 (*name) += *p; 00460 ++p; 00461 } 00462 return p; 00463 } 00464 return 0; 00465 } 00466 00467 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) 00468 { 00469 // Presume an entity, and pull it out. 00470 TIXML_STRING ent; 00471 int i; 00472 *length = 0; 00473 00474 if ( *(p+1) && *(p+1) == '#' && *(p+2) ) 00475 { 00476 unsigned long ucs = 0; 00477 ptrdiff_t delta = 0; 00478 unsigned mult = 1; 00479 00480 if ( *(p+2) == 'x' ) 00481 { 00482 // Hexadecimal. 00483 if ( !*(p+3) ) return 0; 00484 00485 const char* q = p+3; 00486 q = strchr( q, ';' ); 00487 00488 if ( !q || !*q ) return 0; 00489 00490 delta = q-p; 00491 --q; 00492 00493 while ( *q != 'x' ) 00494 { 00495 if ( *q >= '0' && *q <= '9' ) 00496 ucs += mult * (*q - '0'); 00497 else if ( *q >= 'a' && *q <= 'f' ) 00498 ucs += mult * (*q - 'a' + 10); 00499 else if ( *q >= 'A' && *q <= 'F' ) 00500 ucs += mult * (*q - 'A' + 10 ); 00501 else 00502 return 0; 00503 mult *= 16; 00504 --q; 00505 } 00506 } 00507 else 00508 { 00509 // Decimal. 00510 if ( !*(p+2) ) return 0; 00511 00512 const char* q = p+2; 00513 q = strchr( q, ';' ); 00514 00515 if ( !q || !*q ) return 0; 00516 00517 delta = q-p; 00518 --q; 00519 00520 while ( *q != '#' ) 00521 { 00522 if ( *q >= '0' && *q <= '9' ) 00523 ucs += mult * (*q - '0'); 00524 else 00525 return 0; 00526 mult *= 10; 00527 --q; 00528 } 00529 } 00530 if ( encoding == TIXML_ENCODING_UTF8 ) 00531 { 00532 // convert the UCS to UTF-8 00533 ConvertUTF32ToUTF8( ucs, value, length ); 00534 } 00535 else 00536 { 00537 *value = (char)ucs; 00538 *length = 1; 00539 } 00540 return p + delta + 1; 00541 } 00542 00543 // Now try to match it. 00544 for( i=0; i<NUM_ENTITY; ++i ) 00545 { 00546 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) 00547 { 00548 assert( strlen( entity[i].str ) == entity[i].strLength ); 00549 *value = entity[i].chr; 00550 *length = 1; 00551 return ( p + entity[i].strLength ); 00552 } 00553 } 00554 00555 // So it wasn't an entity, its unrecognized, or something like that. 00556 *value = *p; // Don't put back the last one, since we return it! 00557 return p+1; 00558 } 00559 00560 00561 bool TiXmlBase::StringEqual( const char* p, 00562 const char* tag, 00563 bool ignoreCase, 00564 TiXmlEncoding encoding ) 00565 { 00566 assert( p ); 00567 assert( tag ); 00568 if ( !p || !*p ) 00569 { 00570 assert( 0 ); 00571 return false; 00572 } 00573 00574 const char* q = p; 00575 00576 if ( ignoreCase ) 00577 { 00578 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) 00579 { 00580 ++q; 00581 ++tag; 00582 } 00583 00584 if ( *tag == 0 ) 00585 return true; 00586 } 00587 else 00588 { 00589 while ( *q && *tag && *q == *tag ) 00590 { 00591 ++q; 00592 ++tag; 00593 } 00594 00595 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? 00596 return true; 00597 } 00598 return false; 00599 } 00600 00601 const char* TiXmlBase::ReadText( const char* p, 00602 TIXML_STRING * text, 00603 bool trimWhiteSpace, 00604 const char* endTag, 00605 bool caseInsensitive, 00606 TiXmlEncoding encoding ) 00607 { 00608 *text = ""; 00609 if ( !trimWhiteSpace // certain tags always keep whitespace 00610 || !condenseWhiteSpace ) // if true, whitespace is always kept 00611 { 00612 // Keep all the white space. 00613 while ( p && *p 00614 && !StringEqual( p, endTag, caseInsensitive, encoding ) 00615 ) 00616 { 00617 int len; 00618 char cArr[4] = { 0, 0, 0, 0 }; 00619 p = GetChar( p, cArr, &len, encoding ); 00620 text->append( cArr, len ); 00621 } 00622 } 00623 else 00624 { 00625 bool whitespace = false; 00626 00627 // Remove leading white space: 00628 p = SkipWhiteSpace( p, encoding ); 00629 while ( p && *p 00630 && !StringEqual( p, endTag, caseInsensitive, encoding ) ) 00631 { 00632 if ( *p == '\r' || *p == '\n' ) 00633 { 00634 whitespace = true; 00635 ++p; 00636 } 00637 else if ( IsWhiteSpace( *p ) ) 00638 { 00639 whitespace = true; 00640 ++p; 00641 } 00642 else 00643 { 00644 // If we've found whitespace, add it before the 00645 // new character. Any whitespace just becomes a space. 00646 if ( whitespace ) 00647 { 00648 (*text) += ' '; 00649 whitespace = false; 00650 } 00651 int len; 00652 char cArr[4] = { 0, 0, 0, 0 }; 00653 p = GetChar( p, cArr, &len, encoding ); 00654 if ( len == 1 ) 00655 (*text) += cArr[0]; // more efficient 00656 else 00657 text->append( cArr, len ); 00658 } 00659 } 00660 } 00661 return p + strlen( endTag ); 00662 } 00663 00664 #ifdef TIXML_USE_STL 00665 00666 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 00667 { 00668 // The basic issue with a document is that we don't know what we're 00669 // streaming. Read something presumed to be a tag (and hope), then 00670 // identify it, and call the appropriate stream method on the tag. 00671 // 00672 // This "pre-streaming" will never read the closing ">" so the 00673 // sub-tag can orient itself. 00674 00675 if ( !StreamTo( in, '<', tag ) ) 00676 { 00677 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00678 return; 00679 } 00680 00681 while ( in->good() ) 00682 { 00683 int tagIndex = (int) tag->length(); 00684 while ( in->good() && in->peek() != '>' ) 00685 { 00686 int c = in->get(); 00687 if ( c <= 0 ) 00688 { 00689 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00690 break; 00691 } 00692 (*tag) += (char) c; 00693 } 00694 00695 if ( in->good() ) 00696 { 00697 // We now have something we presume to be a node of 00698 // some sort. Identify it, and call the node to 00699 // continue streaming. 00700 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); 00701 00702 if ( node ) 00703 { 00704 node->StreamIn( in, tag ); 00705 bool isElement = node->ToElement() != 0; 00706 delete node; 00707 node = 0; 00708 00709 // If this is the root element, we're done. Parsing will be 00710 // done by the >> operator. 00711 if ( isElement ) 00712 { 00713 return; 00714 } 00715 } 00716 else 00717 { 00718 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00719 return; 00720 } 00721 } 00722 } 00723 // We should have returned sooner. 00724 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00725 } 00726 00727 #endif 00728 00729 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) 00730 { 00731 ClearError(); 00732 00733 // Parse away, at the document level. Since a document 00734 // contains nothing but other tags, most of what happens 00735 // here is skipping white space. 00736 if ( !p || !*p ) 00737 { 00738 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00739 return 0; 00740 } 00741 00742 // Note that, for a document, this needs to come 00743 // before the while space skip, so that parsing 00744 // starts from the pointer we are given. 00745 location.Clear(); 00746 if ( prevData ) 00747 { 00748 location.row = prevData->cursor.row; 00749 location.col = prevData->cursor.col; 00750 } 00751 else 00752 { 00753 location.row = 0; 00754 location.col = 0; 00755 } 00756 TiXmlParsingData data( p, TabSize(), location.row, location.col ); 00757 location = data.Cursor(); 00758 00759 if ( encoding == TIXML_ENCODING_UNKNOWN ) 00760 { 00761 // Check for the Microsoft UTF-8 lead bytes. 00762 const unsigned char* pU = (const unsigned char*)p; 00763 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 00764 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 00765 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 ) 00766 { 00767 encoding = TIXML_ENCODING_UTF8; 00768 useMicrosoftBOM = true; 00769 } 00770 } 00771 00772 p = SkipWhiteSpace( p, encoding ); 00773 if ( !p ) 00774 { 00775 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00776 return 0; 00777 } 00778 00779 while ( p && *p ) 00780 { 00781 TiXmlNode* node = Identify( p, encoding ); 00782 if ( node ) 00783 { 00784 p = node->Parse( p, &data, encoding ); 00785 LinkEndChild( node ); 00786 } 00787 else 00788 { 00789 break; 00790 } 00791 00792 // Did we get encoding info? 00793 if ( encoding == TIXML_ENCODING_UNKNOWN 00794 && node->ToDeclaration() ) 00795 { 00796 TiXmlDeclaration* dec = node->ToDeclaration(); 00797 const char* enc = dec->Encoding(); 00798 assert( enc ); 00799 00800 if ( *enc == 0 ) 00801 encoding = TIXML_ENCODING_UTF8; 00802 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) 00803 encoding = TIXML_ENCODING_UTF8; 00804 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) 00805 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice 00806 else 00807 encoding = TIXML_ENCODING_LEGACY; 00808 } 00809 00810 p = SkipWhiteSpace( p, encoding ); 00811 } 00812 00813 // Was this empty? 00814 if ( !firstChild ) { 00815 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); 00816 return 0; 00817 } 00818 00819 // All is well. 00820 return p; 00821 } 00822 00823 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) 00824 { 00825 // The first error in a chain is more accurate - don't set again! 00826 if ( error ) 00827 return; 00828 00829 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); 00830 error = true; 00831 errorId = err; 00832 errorDesc = errorString[ errorId ]; 00833 00834 errorLocation.Clear(); 00835 if ( pError && data ) 00836 { 00837 data->Stamp( pError, encoding ); 00838 errorLocation = data->Cursor(); 00839 } 00840 } 00841 00842 00843 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) 00844 { 00845 TiXmlNode* returnNode = 0; 00846 00847 p = SkipWhiteSpace( p, encoding ); 00848 if( !p || !*p || *p != '<' ) 00849 { 00850 return 0; 00851 } 00852 00853 TiXmlDocument* doc = GetDocument(); 00854 p = SkipWhiteSpace( p, encoding ); 00855 00856 if ( !p || !*p ) 00857 { 00858 return 0; 00859 } 00860 00861 // What is this thing? 00862 // - Elements start with a letter or underscore, but xml is reserved. 00863 // - Comments: <!-- 00864 // - Decleration: <?xml 00865 // - Everthing else is unknown to tinyxml. 00866 // 00867 00868 const char* xmlHeader = { "<?xml" }; 00869 const char* commentHeader = { "<!--" }; 00870 const char* dtdHeader = { "<!" }; 00871 const char* cdataHeader = { "<![CDATA[" }; 00872 00873 if ( StringEqual( p, xmlHeader, true, encoding ) ) 00874 { 00875 #ifdef DEBUG_PARSER 00876 TIXML_LOG( "XML parsing Declaration\n" ); 00877 #endif 00878 returnNode = new TiXmlDeclaration(); 00879 } 00880 else if ( StringEqual( p, commentHeader, false, encoding ) ) 00881 { 00882 #ifdef DEBUG_PARSER 00883 TIXML_LOG( "XML parsing Comment\n" ); 00884 #endif 00885 returnNode = new TiXmlComment(); 00886 } 00887 else if ( StringEqual( p, cdataHeader, false, encoding ) ) 00888 { 00889 #ifdef DEBUG_PARSER 00890 TIXML_LOG( "XML parsing CDATA\n" ); 00891 #endif 00892 TiXmlText* text = new TiXmlText( "" ); 00893 text->SetCDATA( true ); 00894 returnNode = text; 00895 } 00896 else if ( StringEqual( p, dtdHeader, false, encoding ) ) 00897 { 00898 #ifdef DEBUG_PARSER 00899 TIXML_LOG( "XML parsing Unknown(1)\n" ); 00900 #endif 00901 returnNode = new TiXmlUnknown(); 00902 } 00903 else if ( IsAlpha( *(p+1), encoding ) 00904 || *(p+1) == '_' ) 00905 { 00906 #ifdef DEBUG_PARSER 00907 TIXML_LOG( "XML parsing Element\n" ); 00908 #endif 00909 returnNode = new TiXmlElement( "" ); 00910 } 00911 else 00912 { 00913 #ifdef DEBUG_PARSER 00914 TIXML_LOG( "XML parsing Unknown(2)\n" ); 00915 #endif 00916 returnNode = new TiXmlUnknown(); 00917 } 00918 00919 if ( returnNode ) 00920 { 00921 // Set the parent, so it can report errors 00922 returnNode->parent = this; 00923 } 00924 else 00925 { 00926 if ( doc ) 00927 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00928 } 00929 return returnNode; 00930 } 00931 00932 #ifdef TIXML_USE_STL 00933 00934 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag) 00935 { 00936 // We're called with some amount of pre-parsing. That is, some of "this" 00937 // element is in "tag". Go ahead and stream to the closing ">" 00938 while( in->good() ) 00939 { 00940 int c = in->get(); 00941 if ( c <= 0 ) 00942 { 00943 TiXmlDocument* document = GetDocument(); 00944 if ( document ) 00945 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00946 return; 00947 } 00948 (*tag) += (char) c ; 00949 00950 if ( c == '>' ) 00951 break; 00952 } 00953 00954 if ( tag->length() < 3 ) return; 00955 00956 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. 00957 // If not, identify and stream. 00958 00959 if ( tag->at( tag->length() - 1 ) == '>' 00960 && tag->at( tag->length() - 2 ) == '/' ) 00961 { 00962 // All good! 00963 return; 00964 } 00965 else if ( tag->at( tag->length() - 1 ) == '>' ) 00966 { 00967 // There is more. Could be: 00968 // text 00969 // closing tag 00970 // another node. 00971 for ( ;; ) 00972 { 00973 StreamWhiteSpace( in, tag ); 00974 00975 // Do we have text? 00976 if ( in->good() && in->peek() != '<' ) 00977 { 00978 // Yep, text. 00979 TiXmlText text( "" ); 00980 text.StreamIn( in, tag ); 00981 00982 // What follows text is a closing tag or another node. 00983 // Go around again and figure it out. 00984 continue; 00985 } 00986 00987 // We now have either a closing tag...or another node. 00988 // We should be at a "<", regardless. 00989 if ( !in->good() ) return; 00990 assert( in->peek() == '<' ); 00991 int tagIndex = (int) tag->length(); 00992 00993 bool closingTag = false; 00994 bool firstCharFound = false; 00995 00996 for( ;; ) 00997 { 00998 if ( !in->good() ) 00999 return; 01000 01001 int c = in->peek(); 01002 if ( c <= 0 ) 01003 { 01004 TiXmlDocument* document = GetDocument(); 01005 if ( document ) 01006 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01007 return; 01008 } 01009 01010 if ( c == '>' ) 01011 break; 01012 01013 *tag += (char) c; 01014 in->get(); 01015 01016 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) 01017 { 01018 firstCharFound = true; 01019 if ( c == '/' ) 01020 closingTag = true; 01021 } 01022 } 01023 // If it was a closing tag, then read in the closing '>' to clean up the input stream. 01024 // If it was not, the streaming will be done by the tag. 01025 if ( closingTag ) 01026 { 01027 if ( !in->good() ) 01028 return; 01029 01030 int c = in->get(); 01031 if ( c <= 0 ) 01032 { 01033 TiXmlDocument* document = GetDocument(); 01034 if ( document ) 01035 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01036 return; 01037 } 01038 assert( c == '>' ); 01039 *tag += (char) c; 01040 01041 // We are done, once we've found our closing tag. 01042 return; 01043 } 01044 else 01045 { 01046 // If not a closing tag, id it, and stream. 01047 const char* tagloc = tag->c_str() + tagIndex; 01048 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); 01049 if ( !node ) 01050 return; 01051 node->StreamIn( in, tag ); 01052 delete node; 01053 node = 0; 01054 01055 // No return: go around from the beginning: text, closing tag, or node. 01056 } 01057 } 01058 } 01059 } 01060 #endif 01061 01062 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01063 { 01064 p = SkipWhiteSpace( p, encoding ); 01065 TiXmlDocument* document = GetDocument(); 01066 01067 if ( !p || !*p ) 01068 { 01069 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); 01070 return 0; 01071 } 01072 01073 if ( data ) 01074 { 01075 data->Stamp( p, encoding ); 01076 location = data->Cursor(); 01077 } 01078 01079 if ( *p != '<' ) 01080 { 01081 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); 01082 return 0; 01083 } 01084 01085 p = SkipWhiteSpace( p+1, encoding ); 01086 01087 // Read the name. 01088 const char* pErr = p; 01089 01090 p = ReadName( p, &value, encoding ); 01091 if ( !p || !*p ) 01092 { 01093 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); 01094 return 0; 01095 } 01096 01097 TIXML_STRING endTag ("</"); 01098 endTag += value; 01099 endTag += ">"; 01100 01101 // Check for and read attributes. Also look for an empty 01102 // tag or an end tag. 01103 while ( p && *p ) 01104 { 01105 pErr = p; 01106 p = SkipWhiteSpace( p, encoding ); 01107 if ( !p || !*p ) 01108 { 01109 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 01110 return 0; 01111 } 01112 if ( *p == '/' ) 01113 { 01114 ++p; 01115 // Empty tag. 01116 if ( *p != '>' ) 01117 { 01118 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); 01119 return 0; 01120 } 01121 return (p+1); 01122 } 01123 else if ( *p == '>' ) 01124 { 01125 // Done with attributes (if there were any.) 01126 // Read the value -- which can include other 01127 // elements -- read the end tag, and return. 01128 ++p; 01129 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. 01130 if ( !p || !*p ) 01131 return 0; 01132 01133 // We should find the end tag now 01134 if ( StringEqual( p, endTag.c_str(), false, encoding ) ) 01135 { 01136 p += endTag.length(); 01137 return p; 01138 } 01139 else 01140 { 01141 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 01142 return 0; 01143 } 01144 } 01145 else 01146 { 01147 // Try to read an attribute: 01148 TiXmlAttribute* attrib = new TiXmlAttribute(); 01149 if ( !attrib ) 01150 { 01151 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding ); 01152 return 0; 01153 } 01154 01155 attrib->SetDocument( document ); 01156 const char* pErr = p; 01157 p = attrib->Parse( p, data, encoding ); 01158 01159 if ( !p || !*p ) 01160 { 01161 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); 01162 delete attrib; 01163 return 0; 01164 } 01165 01166 // Handle the strange case of double attributes: 01167 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() ); 01168 if ( node ) 01169 { 01170 node->SetValue( attrib->Value() ); 01171 delete attrib; 01172 return 0; 01173 } 01174 01175 attributeSet.Add( attrib ); 01176 } 01177 } 01178 return p; 01179 } 01180 01181 01182 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01183 { 01184 TiXmlDocument* document = GetDocument(); 01185 01186 // Read in text and elements in any order. 01187 const char* pWithWhiteSpace = p; 01188 p = SkipWhiteSpace( p, encoding ); 01189 01190 while ( p && *p ) 01191 { 01192 if ( *p != '<' ) 01193 { 01194 // Take what we have, make a text element. 01195 TiXmlText* textNode = new TiXmlText( "" ); 01196 01197 if ( !textNode ) 01198 { 01199 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); 01200 return 0; 01201 } 01202 01203 if ( TiXmlBase::IsWhiteSpaceCondensed() ) 01204 { 01205 p = textNode->Parse( p, data, encoding ); 01206 } 01207 else 01208 { 01209 // Special case: we want to keep the white space 01210 // so that leading spaces aren't removed. 01211 p = textNode->Parse( pWithWhiteSpace, data, encoding ); 01212 } 01213 01214 if ( !textNode->Blank() ) 01215 LinkEndChild( textNode ); 01216 else 01217 delete textNode; 01218 } 01219 else 01220 { 01221 // We hit a '<' 01222 // Have we hit a new element or an end tag? This could also be 01223 // a TiXmlText in the "CDATA" style. 01224 if ( StringEqual( p, "</", false, encoding ) ) 01225 { 01226 return p; 01227 } 01228 else 01229 { 01230 TiXmlNode* node = Identify( p, encoding ); 01231 if ( node ) 01232 { 01233 p = node->Parse( p, data, encoding ); 01234 LinkEndChild( node ); 01235 } 01236 else 01237 { 01238 return 0; 01239 } 01240 } 01241 } 01242 pWithWhiteSpace = p; 01243 p = SkipWhiteSpace( p, encoding ); 01244 } 01245 01246 if ( !p ) 01247 { 01248 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); 01249 } 01250 return p; 01251 } 01252 01253 01254 #ifdef TIXML_USE_STL 01255 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01256 { 01257 while ( in->good() ) 01258 { 01259 int c = in->get(); 01260 if ( c <= 0 ) 01261 { 01262 TiXmlDocument* document = GetDocument(); 01263 if ( document ) 01264 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01265 return; 01266 } 01267 (*tag) += (char) c; 01268 01269 if ( c == '>' ) 01270 { 01271 // All is well. 01272 return; 01273 } 01274 } 01275 } 01276 #endif 01277 01278 01279 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01280 { 01281 TiXmlDocument* document = GetDocument(); 01282 p = SkipWhiteSpace( p, encoding ); 01283 01284 if ( data ) 01285 { 01286 data->Stamp( p, encoding ); 01287 location = data->Cursor(); 01288 } 01289 if ( !p || !*p || *p != '<' ) 01290 { 01291 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); 01292 return 0; 01293 } 01294 ++p; 01295 value = ""; 01296 01297 while ( p && *p && *p != '>' ) 01298 { 01299 value += *p; 01300 ++p; 01301 } 01302 01303 if ( !p ) 01304 { 01305 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); 01306 } 01307 if ( *p == '>' ) 01308 return p+1; 01309 return p; 01310 } 01311 01312 #ifdef TIXML_USE_STL 01313 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01314 { 01315 while ( in->good() ) 01316 { 01317 int c = in->get(); 01318 if ( c <= 0 ) 01319 { 01320 TiXmlDocument* document = GetDocument(); 01321 if ( document ) 01322 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01323 return; 01324 } 01325 01326 (*tag) += (char) c; 01327 01328 if ( c == '>' 01329 && tag->at( tag->length() - 2 ) == '-' 01330 && tag->at( tag->length() - 3 ) == '-' ) 01331 { 01332 // All is well. 01333 return; 01334 } 01335 } 01336 } 01337 #endif 01338 01339 01340 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01341 { 01342 TiXmlDocument* document = GetDocument(); 01343 value = ""; 01344 01345 p = SkipWhiteSpace( p, encoding ); 01346 01347 if ( data ) 01348 { 01349 data->Stamp( p, encoding ); 01350 location = data->Cursor(); 01351 } 01352 const char* startTag = "<!--"; 01353 const char* endTag = "-->"; 01354 01355 if ( !StringEqual( p, startTag, false, encoding ) ) 01356 { 01357 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); 01358 return 0; 01359 } 01360 p += strlen( startTag ); 01361 p = ReadText( p, &value, false, endTag, false, encoding ); 01362 return p; 01363 } 01364 01365 01366 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01367 { 01368 p = SkipWhiteSpace( p, encoding ); 01369 if ( !p || !*p ) return 0; 01370 01371 int tabsize = 4; 01372 if ( document ) 01373 tabsize = document->TabSize(); 01374 01375 if ( data ) 01376 { 01377 data->Stamp( p, encoding ); 01378 location = data->Cursor(); 01379 } 01380 // Read the name, the '=' and the value. 01381 const char* pErr = p; 01382 p = ReadName( p, &name, encoding ); 01383 if ( !p || !*p ) 01384 { 01385 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 01386 return 0; 01387 } 01388 p = SkipWhiteSpace( p, encoding ); 01389 if ( !p || !*p || *p != '=' ) 01390 { 01391 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01392 return 0; 01393 } 01394 01395 ++p; // skip '=' 01396 p = SkipWhiteSpace( p, encoding ); 01397 if ( !p || !*p ) 01398 { 01399 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01400 return 0; 01401 } 01402 01403 const char* end; 01404 01405 if ( *p == '\'' ) 01406 { 01407 ++p; 01408 end = "\'"; 01409 p = ReadText( p, &value, false, end, false, encoding ); 01410 } 01411 else if ( *p == '"' ) 01412 { 01413 ++p; 01414 end = "\""; 01415 p = ReadText( p, &value, false, end, false, encoding ); 01416 } 01417 else 01418 { 01419 // All attribute values should be in single or double quotes. 01420 // But this is such a common error that the parser will try 01421 // its best, even without them. 01422 value = ""; 01423 while ( p && *p // existence 01424 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace 01425 && *p != '/' && *p != '>' ) // tag end 01426 { 01427 value += *p; 01428 ++p; 01429 } 01430 } 01431 return p; 01432 } 01433 01434 #ifdef TIXML_USE_STL 01435 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01436 { 01437 if ( cdata ) 01438 { 01439 int c = in->get(); 01440 if ( c <= 0 ) 01441 { 01442 TiXmlDocument* document = GetDocument(); 01443 if ( document ) 01444 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01445 return; 01446 } 01447 01448 (*tag) += (char) c; 01449 01450 if ( c == '>' 01451 && tag->at( tag->length() - 2 ) == ']' 01452 && tag->at( tag->length() - 3 ) == ']' ) 01453 { 01454 // All is well. 01455 return; 01456 } 01457 } 01458 else 01459 { 01460 while ( in->good() ) 01461 { 01462 int c = in->peek(); 01463 if ( c == '<' ) 01464 return; 01465 if ( c <= 0 ) 01466 { 01467 TiXmlDocument* document = GetDocument(); 01468 if ( document ) 01469 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01470 return; 01471 } 01472 01473 (*tag) += (char) c; 01474 in->get(); 01475 } 01476 } 01477 } 01478 #endif 01479 01480 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01481 { 01482 value = ""; 01483 TiXmlDocument* document = GetDocument(); 01484 01485 if ( data ) 01486 { 01487 data->Stamp( p, encoding ); 01488 location = data->Cursor(); 01489 } 01490 01491 const char* const startTag = "<![CDATA["; 01492 const char* const endTag = "]]>"; 01493 01494 if ( cdata || StringEqual( p, startTag, false, encoding ) ) 01495 { 01496 cdata = true; 01497 01498 if ( !StringEqual( p, startTag, false, encoding ) ) 01499 { 01500 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding ); 01501 return 0; 01502 } 01503 p += strlen( startTag ); 01504 01505 // Keep all the white space, ignore the encoding, etc. 01506 while ( p && *p 01507 && !StringEqual( p, endTag, false, encoding ) 01508 ) 01509 { 01510 value += *p; 01511 ++p; 01512 } 01513 01514 TIXML_STRING dummy; 01515 p = ReadText( p, &dummy, false, endTag, false, encoding ); 01516 return p; 01517 } 01518 else 01519 { 01520 bool ignoreWhite = true; 01521 01522 const char* end = "<"; 01523 p = ReadText( p, &value, ignoreWhite, end, false, encoding ); 01524 if ( p ) 01525 return p-1; // don't truncate the '<' 01526 return 0; 01527 } 01528 } 01529 01530 #ifdef TIXML_USE_STL 01531 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01532 { 01533 while ( in->good() ) 01534 { 01535 int c = in->get(); 01536 if ( c <= 0 ) 01537 { 01538 TiXmlDocument* document = GetDocument(); 01539 if ( document ) 01540 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01541 return; 01542 } 01543 (*tag) += (char) c; 01544 01545 if ( c == '>' ) 01546 { 01547 // All is well. 01548 return; 01549 } 01550 } 01551 } 01552 #endif 01553 01554 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) 01555 { 01556 p = SkipWhiteSpace( p, _encoding ); 01557 // Find the beginning, find the end, and look for 01558 // the stuff in-between. 01559 TiXmlDocument* document = GetDocument(); 01560 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) 01561 { 01562 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); 01563 return 0; 01564 } 01565 if ( data ) 01566 { 01567 data->Stamp( p, _encoding ); 01568 location = data->Cursor(); 01569 } 01570 p += 5; 01571 01572 version = ""; 01573 encoding = ""; 01574 standalone = ""; 01575 01576 while ( p && *p ) 01577 { 01578 if ( *p == '>' ) 01579 { 01580 ++p; 01581 return p; 01582 } 01583 01584 p = SkipWhiteSpace( p, _encoding ); 01585 if ( StringEqual( p, "version", true, _encoding ) ) 01586 { 01587 TiXmlAttribute attrib; 01588 p = attrib.Parse( p, data, _encoding ); 01589 version = attrib.Value(); 01590 } 01591 else if ( StringEqual( p, "encoding", true, _encoding ) ) 01592 { 01593 TiXmlAttribute attrib; 01594 p = attrib.Parse( p, data, _encoding ); 01595 encoding = attrib.Value(); 01596 } 01597 else if ( StringEqual( p, "standalone", true, _encoding ) ) 01598 { 01599 TiXmlAttribute attrib; 01600 p = attrib.Parse( p, data, _encoding ); 01601 standalone = attrib.Value(); 01602 } 01603 else 01604 { 01605 // Read over whatever it is. 01606 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) 01607 ++p; 01608 } 01609 } 01610 return 0; 01611 } 01612 01613 bool TiXmlText::Blank() const 01614 { 01615 for ( unsigned i=0; i<value.length(); i++ ) 01616 if ( !IsWhiteSpace( value[i] ) ) 01617 return false; 01618 return true; 01619 } 01620 01621 }}