00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <ctype.h>
00026 #include <stddef.h>
00027
00028 #include "tinyxml.h"
00029
00030
00031 #if defined( DEBUG_PARSER )
00032 # if defined( DEBUG ) && defined( _MSC_VER )
00033 # include <windows.h>
00034 # define TIXML_LOG OutputDebugString
00035 # else
00036 # define TIXML_LOG printf
00037 # endif
00038 #endif
00039
00040
00041
00042
00043 TiXmlBase::Entity TiXmlBase::entity[NUM_ENTITY] = { { "&", 5, '&' }, {
00044 "<", 4, '<' }, { ">", 4, '>' }, { """, 6, '\"' }, {
00045 "'", 6, '\'' } };
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00058 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00059 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00060
00061 const int TiXmlBase::utf8ByteTable[256] = {
00062
00063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00066 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00075 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00076 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00077 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00078 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00079 };
00080
00081 void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char* output,
00082 int* length) {
00083 const unsigned long BYTE_MASK = 0xBF;
00084 const unsigned long BYTE_MARK = 0x80;
00085 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0,
00086 0xF8, 0xFC };
00087
00088 if (input < 0x80)
00089 *length = 1;
00090 else if (input < 0x800)
00091 *length = 2;
00092 else if (input < 0x10000)
00093 *length = 3;
00094 else if (input < 0x200000)
00095 *length = 4;
00096 else {
00097 *length = 0;
00098 return;
00099 }
00100
00101 output += *length;
00102
00103
00104 switch (*length) {
00105 case 4:
00106 --output;
00107 *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00108 input >>= 6;
00109 case 3:
00110 --output;
00111 *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00112 input >>= 6;
00113 case 2:
00114 --output;
00115 *output = (char) ((input | BYTE_MARK) & BYTE_MASK);
00116 input >>= 6;
00117 case 1:
00118 --output;
00119 *output = (char) (input | FIRST_BYTE_MARK[*length]);
00120 }
00121 }
00122
00123 int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding ) {
00124
00125
00126
00127
00128
00129
00130
00131 if (anyByte < 127)
00132 return isalpha(anyByte);
00133 else
00134 return 1;
00135
00136
00137
00138
00139
00140 }
00141
00142 int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding ) {
00143
00144
00145
00146
00147
00148
00149
00150 if (anyByte < 127)
00151 return isalnum(anyByte);
00152 else
00153 return 1;
00154
00155
00156
00157
00158
00159 }
00160
00161 class TiXmlParsingData {
00162 friend class TiXmlDocument;
00163 public:
00164 void Stamp(const char* now, TiXmlEncoding encoding);
00165
00166 const TiXmlCursor& Cursor() {
00167 return cursor;
00168 }
00169
00170 private:
00171
00172 TiXmlParsingData(const char* start, int _tabsize, int row, int col) {
00173 assert( start );
00174 stamp = start;
00175 tabsize = _tabsize;
00176 cursor.row = row;
00177 cursor.col = col;
00178 }
00179
00180 TiXmlCursor cursor;
00181 const char* stamp;
00182 int tabsize;
00183 };
00184
00185 void TiXmlParsingData::Stamp(const char* now, TiXmlEncoding encoding) {
00186 assert( now );
00187
00188
00189 if (tabsize < 1) {
00190 return;
00191 }
00192
00193
00194 int row = cursor.row;
00195 int col = cursor.col;
00196 const char* p = stamp;
00197 assert( p );
00198
00199 while (p < now) {
00200
00201 const unsigned char* pU = (const unsigned char*) p;
00202
00203
00204 switch (*pU) {
00205 case 0:
00206
00207
00208 return;
00209
00210 case '\r':
00211
00212 ++row;
00213 col = 0;
00214
00215 ++p;
00216
00217
00218 if (*p == '\n') {
00219 ++p;
00220 }
00221 break;
00222
00223 case '\n':
00224
00225 ++row;
00226 col = 0;
00227
00228
00229 ++p;
00230
00231
00232
00233
00234 if (*p == '\r') {
00235 ++p;
00236 }
00237 break;
00238
00239 case '\t':
00240
00241 ++p;
00242
00243
00244 col = (col / tabsize + 1) * tabsize;
00245 break;
00246
00247 case TIXML_UTF_LEAD_0:
00248 if (encoding == TIXML_ENCODING_UTF8) {
00249 if (*(p + 1) && *(p + 2)) {
00250
00251
00252 if (*(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2)
00253 == TIXML_UTF_LEAD_2)
00254 p += 3;
00255 else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbeU)
00256 p += 3;
00257 else if (*(pU + 1) == 0xbfU && *(pU + 2) == 0xbfU)
00258 p += 3;
00259 else {
00260 p += 3;
00261 ++col;
00262 }
00263 }
00264 } else {
00265 ++p;
00266 ++col;
00267 }
00268 break;
00269
00270 default:
00271 if (encoding == TIXML_ENCODING_UTF8) {
00272
00273 int step =
00274 TiXmlBase::utf8ByteTable[*((const unsigned char*) p)];
00275 if (step == 0)
00276 step = 1;
00277 p += step;
00278
00279
00280 ++col;
00281 } else {
00282 ++p;
00283 ++col;
00284 }
00285 break;
00286 }
00287 }
00288 cursor.row = row;
00289 cursor.col = col;
00290 assert( cursor.row >= -1 );
00291 assert( cursor.col >= -1 );
00292 stamp = p;
00293 assert( stamp );
00294 }
00295
00296 const char* TiXmlBase::SkipWhiteSpace(const char* p, TiXmlEncoding encoding) {
00297 if (!p || !*p) {
00298 return 0;
00299 }
00300 if (encoding == TIXML_ENCODING_UTF8) {
00301 while (*p) {
00302 const unsigned char* pU = (const unsigned char*) p;
00303
00304
00305 if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == TIXML_UTF_LEAD_1
00306 && *(pU + 2) == TIXML_UTF_LEAD_2) {
00307 p += 3;
00308 continue;
00309 } else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU
00310 && *(pU + 2) == 0xbeU) {
00311 p += 3;
00312 continue;
00313 } else if (*(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1) == 0xbfU
00314 && *(pU + 2) == 0xbfU) {
00315 p += 3;
00316 continue;
00317 }
00318
00319 if (IsWhiteSpace(*p) || *p == '\n' || *p == '\r')
00320 ++p;
00321 else
00322 break;
00323 }
00324 } else {
00325 while (*p && IsWhiteSpace(*p) || *p == '\n' || *p == '\r')
00326 ++p;
00327 }
00328
00329 return p;
00330 }
00331
00332 #ifdef TIXML_USE_STL
00333 bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
00334 {
00335 for(;; )
00336 {
00337 if ( !in->good() ) return false;
00338
00339 int c = in->peek();
00340
00341 if ( !IsWhiteSpace( c ) || c <= 0 )
00342 return true;
00343
00344 *tag += (char) in->get();
00345 }
00346 }
00347
00348 bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
00349 {
00350
00351 while ( in->good() )
00352 {
00353 int c = in->peek();
00354 if ( c == character )
00355 return true;
00356 if ( c <= 0 )
00357 return false;
00358
00359 in->get();
00360 *tag += (char) c;
00361 }
00362 return false;
00363 }
00364 #endif
00365
00366
00367
00368
00369 const char* TiXmlBase::ReadName(const char* p, TIXML_STRING * name,
00370 TiXmlEncoding encoding) {
00371
00372
00373
00374 *name = "";
00375 assert( p );
00376
00377
00378
00379
00380
00381
00382
00383
00384 if (p && *p && (IsAlpha((unsigned char) *p, encoding) || *p == '_')) {
00385 const char* start = p;
00386 while (p && *p && (IsAlphaNum((unsigned char) *p, encoding) || *p
00387 == '_' || *p == '-' || *p == '.' || *p == ':')) {
00388
00389 ++p;
00390 }
00391 if (p - start > 0) {
00392 name->assign(start, p - start);
00393 }
00394 return p;
00395 }
00396 return 0;
00397 }
00398
00399 const char* TiXmlBase::GetEntity(const char* p, char* value, int* length,
00400 TiXmlEncoding encoding) {
00401
00402 TIXML_STRING ent;
00403 int i;
00404 *length = 0;
00405
00406 if (*(p + 1) && *(p + 1) == '#' && *(p + 2)) {
00407 unsigned long ucs = 0;
00408 ptrdiff_t delta = 0;
00409 unsigned mult = 1;
00410
00411 if (*(p + 2) == 'x') {
00412
00413 if (!*(p + 3))
00414 return 0;
00415
00416 const char* q = p + 3;
00417 q = strchr(q, ';');
00418
00419 if (!q || !*q)
00420 return 0;
00421
00422 delta = q - p;
00423 --q;
00424
00425 while (*q != 'x') {
00426 if (*q >= '0' && *q <= '9')
00427 ucs += mult * (*q - '0');
00428 else if (*q >= 'a' && *q <= 'f')
00429 ucs += mult * (*q - 'a' + 10);
00430 else if (*q >= 'A' && *q <= 'F')
00431 ucs += mult * (*q - 'A' + 10);
00432 else
00433 return 0;
00434 mult *= 16;
00435 --q;
00436 }
00437 } else {
00438
00439 if (!*(p + 2))
00440 return 0;
00441
00442 const char* q = p + 2;
00443 q = strchr(q, ';');
00444
00445 if (!q || !*q)
00446 return 0;
00447
00448 delta = q - p;
00449 --q;
00450
00451 while (*q != '#') {
00452 if (*q >= '0' && *q <= '9')
00453 ucs += mult * (*q - '0');
00454 else
00455 return 0;
00456 mult *= 10;
00457 --q;
00458 }
00459 }
00460 if (encoding == TIXML_ENCODING_UTF8) {
00461
00462 ConvertUTF32ToUTF8(ucs, value, length);
00463 } else {
00464 *value = (char) ucs;
00465 *length = 1;
00466 }
00467 return p + delta + 1;
00468 }
00469
00470
00471 for (i = 0; i < NUM_ENTITY; ++i) {
00472 if (strncmp(entity[i].str, p, entity[i].strLength) == 0) {
00473 assert( strlen( entity[i].str ) == entity[i].strLength );
00474 *value = entity[i].chr;
00475 *length = 1;
00476 return (p + entity[i].strLength);
00477 }
00478 }
00479
00480
00481 *value = *p;
00482
00483
00484 return p + 1;
00485 }
00486
00487 bool TiXmlBase::StringEqual(const char* p, const char* tag, bool ignoreCase,
00488 TiXmlEncoding encoding) {
00489 assert( p );
00490 assert( tag );
00491 if (!p || !*p) {
00492 assert( 0 );
00493 return false;
00494 }
00495
00496 const char* q = p;
00497
00498 if (ignoreCase) {
00499 while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding)) {
00500 ++q;
00501 ++tag;
00502 }
00503
00504 if (*tag == 0)
00505 return true;
00506 } else {
00507 while (*q && *tag && *q == *tag) {
00508 ++q;
00509 ++tag;
00510 }
00511
00512 if (*tag == 0)
00513 return true;
00514 }
00515 return false;
00516 }
00517
00518 const char* TiXmlBase::ReadText(const char* p, TIXML_STRING * text,
00519 bool trimWhiteSpace, const char* endTag, bool caseInsensitive,
00520 TiXmlEncoding encoding) {
00521 *text = "";
00522 if (!trimWhiteSpace
00523 || !condenseWhiteSpace)
00524 {
00525
00526 while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding)) {
00527 int len;
00528 char cArr[4] = { 0, 0, 0, 0 };
00529 p = GetChar(p, cArr, &len, encoding);
00530 text->append(cArr, len);
00531 }
00532 } else {
00533 bool whitespace = false;
00534
00535
00536 p = SkipWhiteSpace(p, encoding);
00537 while (p && *p && !StringEqual(p, endTag, caseInsensitive, encoding)) {
00538 if (*p == '\r' || *p == '\n') {
00539 whitespace = true;
00540 ++p;
00541 } else if (IsWhiteSpace(*p)) {
00542 whitespace = true;
00543 ++p;
00544 } else {
00545
00546
00547 if (whitespace) {
00548 (*text) += ' ';
00549 whitespace = false;
00550 }
00551 int len;
00552 char cArr[4] = { 0, 0, 0, 0 };
00553 p = GetChar(p, cArr, &len, encoding);
00554 if (len == 1)
00555 (*text) += cArr[0];
00556 else
00557 text->append(cArr, len);
00558 }
00559 }
00560 }
00561 if (p)
00562 p += strlen(endTag);
00563 return p;
00564 }
00565
00566 #ifdef TIXML_USE_STL
00567
00568 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
00569 {
00570
00571
00572
00573
00574
00575
00576
00577 if ( !StreamTo( in, '<', tag ) )
00578 {
00579 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00580 return;
00581 }
00582
00583 while ( in->good() )
00584 {
00585 int tagIndex = (int) tag->length();
00586 while ( in->good() && in->peek() != '>' )
00587 {
00588 int c = in->get();
00589 if ( c <= 0 )
00590 {
00591 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00592 break;
00593 }
00594 (*tag) += (char) c;
00595 }
00596
00597 if ( in->good() )
00598 {
00599
00600
00601
00602 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00603
00604 if ( node )
00605 {
00606 node->StreamIn( in, tag );
00607 bool isElement = node->ToElement() != 0;
00608 delete node;
00609 node = 0;
00610
00611
00612
00613 if ( isElement )
00614 {
00615 return;
00616 }
00617 }
00618 else
00619 {
00620 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00621 return;
00622 }
00623 }
00624 }
00625
00626 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00627 }
00628
00629 #endif
00630
00631 const char* TiXmlDocument::Parse(const char* p, TiXmlParsingData* prevData,
00632 TiXmlEncoding encoding) {
00633 ClearError();
00634
00635
00636
00637
00638 if (!p || !*p) {
00639 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
00640 return 0;
00641 }
00642
00643
00644
00645
00646 location.Clear();
00647 if (prevData) {
00648 location.row = prevData->cursor.row;
00649 location.col = prevData->cursor.col;
00650 } else {
00651 location.row = 0;
00652 location.col = 0;
00653 }
00654 TiXmlParsingData data(p, TabSize(), location.row, location.col);
00655 location = data.Cursor();
00656
00657 if (encoding == TIXML_ENCODING_UNKNOWN) {
00658
00659 const unsigned char* pU = (const unsigned char*) p;
00660 if (*(pU + 0) && *(pU + 0) == TIXML_UTF_LEAD_0 && *(pU + 1)
00661 && *(pU + 1) == TIXML_UTF_LEAD_1 && *(pU + 2) && *(pU + 2)
00662 == TIXML_UTF_LEAD_2) {
00663 encoding = TIXML_ENCODING_UTF8;
00664 useMicrosoftBOM = true;
00665 }
00666 }
00667
00668 p = SkipWhiteSpace(p, encoding);
00669 if (!p) {
00670 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
00671 return 0;
00672 }
00673
00674 while (p && *p) {
00675 TiXmlNode* node = Identify(p, encoding);
00676 if (node) {
00677 p = node->Parse(p, &data, encoding);
00678 LinkEndChild(node);
00679 } else {
00680 break;
00681 }
00682
00683
00684 if (encoding == TIXML_ENCODING_UNKNOWN && node->ToDeclaration()) {
00685 TiXmlDeclaration* dec = node->ToDeclaration();
00686 const char* enc = dec->Encoding();
00687 assert( enc );
00688
00689 if (*enc == 0)
00690 encoding = TIXML_ENCODING_UTF8;
00691 else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN))
00692 encoding = TIXML_ENCODING_UTF8;
00693 else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN))
00694 encoding = TIXML_ENCODING_UTF8;
00695 else
00696 encoding = TIXML_ENCODING_LEGACY;
00697 }
00698
00699 p = SkipWhiteSpace(p, encoding);
00700 }
00701
00702
00703 if (!firstChild) {
00704 SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding);
00705 return 0;
00706 }
00707
00708
00709 return p;
00710 }
00711
00712 void TiXmlDocument::SetError(int err, const char* pError,
00713 TiXmlParsingData* data, TiXmlEncoding encoding) {
00714
00715 if (error)
00716 return;
00717
00718 assert( err> 0 && err < TIXML_ERROR_STRING_COUNT );
00719 error = true;
00720 errorId = err;
00721 errorDesc = errorString[errorId];
00722
00723 errorLocation.Clear();
00724 if (pError && data) {
00725 data->Stamp(pError, encoding);
00726 errorLocation = data->Cursor();
00727 }
00728 }
00729
00730 TiXmlNode* TiXmlNode::Identify(const char* p, TiXmlEncoding encoding) {
00731 TiXmlNode* returnNode = 0;
00732
00733 p = SkipWhiteSpace(p, encoding);
00734 if (!p || !*p || *p != '<') {
00735 return 0;
00736 }
00737
00738 TiXmlDocument* doc = GetDocument();
00739 p = SkipWhiteSpace(p, encoding);
00740
00741 if (!p || !*p) {
00742 return 0;
00743 }
00744
00745
00746
00747
00748
00749
00750
00751
00752 const char* xmlHeader = { "<?xml" };
00753 const char* commentHeader = { "<!--" };
00754 const char* dtdHeader = { "<!" };
00755 const char* cdataHeader = { "<![CDATA[" };
00756
00757 if (StringEqual(p, xmlHeader, true, encoding)) {
00758 #ifdef DEBUG_PARSER
00759 TIXML_LOG( "XML parsing Declaration\n" );
00760 #endif
00761 returnNode = new TiXmlDeclaration();
00762 } else if (StringEqual(p, commentHeader, false, encoding)) {
00763 #ifdef DEBUG_PARSER
00764 TIXML_LOG( "XML parsing Comment\n" );
00765 #endif
00766 returnNode = new TiXmlComment();
00767 } else if (StringEqual(p, cdataHeader, false, encoding)) {
00768 #ifdef DEBUG_PARSER
00769 TIXML_LOG( "XML parsing CDATA\n" );
00770 #endif
00771 TiXmlText* text = new TiXmlText("");
00772 text->SetCDATA(true);
00773 returnNode = text;
00774 } else if (StringEqual(p, dtdHeader, false, encoding)) {
00775 #ifdef DEBUG_PARSER
00776 TIXML_LOG( "XML parsing Unknown(1)\n" );
00777 #endif
00778 returnNode = new TiXmlUnknown();
00779 } else if (IsAlpha(*(p + 1), encoding) || *(p + 1) == '_') {
00780 #ifdef DEBUG_PARSER
00781 TIXML_LOG( "XML parsing Element\n" );
00782 #endif
00783 returnNode = new TiXmlElement("");
00784 } else {
00785 #ifdef DEBUG_PARSER
00786 TIXML_LOG( "XML parsing Unknown(2)\n" );
00787 #endif
00788 returnNode = new TiXmlUnknown();
00789 }
00790
00791 if (returnNode) {
00792
00793 returnNode->parent = this;
00794 } else {
00795 if (doc)
00796 doc->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0,
00797 TIXML_ENCODING_UNKNOWN);
00798 }
00799 return returnNode;
00800 }
00801
00802 #ifdef TIXML_USE_STL
00803
00804 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
00805 {
00806
00807
00808 while( in->good() )
00809 {
00810 int c = in->get();
00811 if ( c <= 0 )
00812 {
00813 TiXmlDocument* document = GetDocument();
00814 if ( document )
00815 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00816 return;
00817 }
00818 (*tag) += (char) c;
00819
00820 if ( c == '>' )
00821 break;
00822 }
00823
00824 if ( tag->length() < 3 ) return;
00825
00826
00827
00828
00829 if ( tag->at( tag->length() - 1 ) == '>'
00830 && tag->at( tag->length() - 2 ) == '/' )
00831 {
00832
00833 return;
00834 }
00835 else if ( tag->at( tag->length() - 1 ) == '>' )
00836 {
00837
00838
00839
00840
00841
00842 for (;; )
00843 {
00844 StreamWhiteSpace( in, tag );
00845
00846
00847 if ( in->good() && in->peek() != '<' )
00848 {
00849
00850 TiXmlText text( "" );
00851 text.StreamIn( in, tag );
00852
00853
00854
00855 continue;
00856 }
00857
00858
00859
00860 if ( !in->good() ) return;
00861 assert( in->peek() == '<' );
00862 int tagIndex = (int) tag->length();
00863
00864 bool closingTag = false;
00865 bool firstCharFound = false;
00866
00867 for(;; )
00868 {
00869 if ( !in->good() )
00870 return;
00871
00872 int c = in->peek();
00873 if ( c <= 0 )
00874 {
00875 TiXmlDocument* document = GetDocument();
00876 if ( document )
00877 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00878 return;
00879 }
00880
00881 if ( c == '>' )
00882 break;
00883
00884 *tag += (char) c;
00885 in->get();
00886
00887
00888 if ( c == '[' && tag->size() >= 9 )
00889 {
00890 size_t len = tag->size();
00891 const char* start = tag->c_str() + len - 9;
00892 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
00893 assert( !closingTag );
00894 break;
00895 }
00896 }
00897
00898 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00899 {
00900 firstCharFound = true;
00901 if ( c == '/' )
00902 closingTag = true;
00903 }
00904 }
00905
00906
00907 if ( closingTag )
00908 {
00909 if ( !in->good() )
00910 return;
00911
00912 int c = in->get();
00913 if ( c <= 0 )
00914 {
00915 TiXmlDocument* document = GetDocument();
00916 if ( document )
00917 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00918 return;
00919 }
00920 assert( c == '>' );
00921 *tag += (char) c;
00922
00923
00924 return;
00925 }
00926 else
00927 {
00928
00929 const char* tagloc = tag->c_str() + tagIndex;
00930 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
00931 if ( !node )
00932 return;
00933 node->StreamIn( in, tag );
00934 delete node;
00935 node = 0;
00936
00937
00938 }
00939 }
00940 }
00941 }
00942 #endif
00943
00944 const char* TiXmlElement::Parse(const char* p, TiXmlParsingData* data,
00945 TiXmlEncoding encoding) {
00946 p = SkipWhiteSpace(p, encoding);
00947 TiXmlDocument* document = GetDocument();
00948
00949 if (!p || !*p) {
00950 if (document)
00951 document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding);
00952 return 0;
00953 }
00954
00955 if (data) {
00956 data->Stamp(p, encoding);
00957 location = data->Cursor();
00958 }
00959
00960 if (*p != '<') {
00961 if (document)
00962 document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding);
00963 return 0;
00964 }
00965
00966 p = SkipWhiteSpace(p + 1, encoding);
00967
00968
00969 const char* pErr = p;
00970
00971 p = ReadName(p, &value, encoding);
00972 if (!p || !*p) {
00973 if (document)
00974 document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr,
00975 data, encoding);
00976 return 0;
00977 }
00978 TIXML_STRING endTag("</");
00979 endTag += value;
00980 endTag += ">";
00981
00982
00983
00984 while (p && *p) {
00985 pErr = p;
00986 p = SkipWhiteSpace(p, encoding);
00987 if (!p || !*p) {
00988 if (document)
00989 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data,
00990 encoding);
00991 return 0;
00992 }
00993 if (*p == '/') {
00994 ++p;
00995
00996 if (*p != '>') {
00997 if (document)
00998 document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data,
00999 encoding);
01000 return 0;
01001 }
01002 return (p + 1);
01003 } else if (*p == '>') {
01004
01005
01006
01007 ++p;
01008 p = ReadValue(p, data, encoding);
01009 if (!p || !*p) {
01010
01011
01012 if (document)
01013 document->SetError(TIXML_ERROR_READING_END_TAG, p, data,
01014 encoding);
01015 return 0;
01016 }
01017
01018
01019 if (StringEqual(p, endTag.c_str(), false, encoding)) {
01020 p += endTag.length();
01021 return p;
01022 } else {
01023 if (document)
01024 document->SetError(TIXML_ERROR_READING_END_TAG, p, data,
01025 encoding);
01026 return 0;
01027 }
01028 } else {
01029
01030 TiXmlAttribute* attrib = new TiXmlAttribute();
01031 if (!attrib) {
01032 if (document)
01033 document->SetError(TIXML_ERROR_OUT_OF_MEMORY, pErr, data,
01034 encoding);
01035 return 0;
01036 }
01037
01038 attrib->SetDocument(document);
01039 pErr = p;
01040 p = attrib->Parse(p, data, encoding);
01041
01042 if (!p || !*p) {
01043 if (document)
01044 document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data,
01045 encoding);
01046 delete attrib;
01047 return 0;
01048 }
01049
01050
01051 #ifdef TIXML_USE_STL
01052 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01053 #else
01054 TiXmlAttribute* node = attributeSet.Find(attrib->Name());
01055 #endif
01056 if (node) {
01057 node->SetValue(attrib->Value());
01058 delete attrib;
01059 return 0;
01060 }
01061
01062 attributeSet.Add(attrib);
01063 }
01064 }
01065 return p;
01066 }
01067
01068 const char* TiXmlElement::ReadValue(const char* p, TiXmlParsingData* data,
01069 TiXmlEncoding encoding) {
01070 TiXmlDocument* document = GetDocument();
01071
01072
01073 const char* pWithWhiteSpace = p;
01074 p = SkipWhiteSpace(p, encoding);
01075
01076 while (p && *p) {
01077 if (*p != '<') {
01078
01079 TiXmlText* textNode = new TiXmlText("");
01080
01081 if (!textNode) {
01082 if (document)
01083 document->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0,
01084 encoding);
01085 return 0;
01086 }
01087
01088 if (TiXmlBase::IsWhiteSpaceCondensed()) {
01089 p = textNode->Parse(p, data, encoding);
01090 } else {
01091
01092
01093 p = textNode->Parse(pWithWhiteSpace, data, encoding);
01094 }
01095
01096 if (!textNode->Blank())
01097 LinkEndChild(textNode);
01098 else
01099 delete textNode;
01100 } else {
01101
01102
01103
01104 if (StringEqual(p, "</", false, encoding)) {
01105 return p;
01106 } else {
01107 TiXmlNode* node = Identify(p, encoding);
01108 if (node) {
01109 p = node->Parse(p, data, encoding);
01110 LinkEndChild(node);
01111 } else {
01112 return 0;
01113 }
01114 }
01115 }
01116 pWithWhiteSpace = p;
01117 p = SkipWhiteSpace(p, encoding);
01118 }
01119
01120 if (!p) {
01121 if (document)
01122 document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0,
01123 encoding);
01124 }
01125 return p;
01126 }
01127
01128 #ifdef TIXML_USE_STL
01129 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
01130 {
01131 while ( in->good() )
01132 {
01133 int c = in->get();
01134 if ( c <= 0 )
01135 {
01136 TiXmlDocument* document = GetDocument();
01137 if ( document )
01138 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01139 return;
01140 }
01141 (*tag) += (char) c;
01142
01143 if ( c == '>' )
01144 {
01145
01146 return;
01147 }
01148 }
01149 }
01150 #endif
01151
01152 const char* TiXmlUnknown::Parse(const char* p, TiXmlParsingData* data,
01153 TiXmlEncoding encoding) {
01154 TiXmlDocument* document = GetDocument();
01155 p = SkipWhiteSpace(p, encoding);
01156
01157 if (data) {
01158 data->Stamp(p, encoding);
01159 location = data->Cursor();
01160 }
01161 if (!p || !*p || *p != '<') {
01162 if (document)
01163 document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding);
01164 return 0;
01165 }
01166 ++p;
01167 value = "";
01168
01169 while (p && *p && *p != '>') {
01170 value += *p;
01171 ++p;
01172 }
01173
01174 if (!p) {
01175 if (document)
01176 document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding);
01177 }
01178 if (*p == '>')
01179 return p + 1;
01180 return p;
01181 }
01182
01183 #ifdef TIXML_USE_STL
01184 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
01185 {
01186 while ( in->good() )
01187 {
01188 int c = in->get();
01189 if ( c <= 0 )
01190 {
01191 TiXmlDocument* document = GetDocument();
01192 if ( document )
01193 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01194 return;
01195 }
01196
01197 (*tag) += (char) c;
01198
01199 if ( c == '>'
01200 && tag->at( tag->length() - 2 ) == '-'
01201 && tag->at( tag->length() - 3 ) == '-' )
01202 {
01203
01204 return;
01205 }
01206 }
01207 }
01208 #endif
01209
01210 const char* TiXmlComment::Parse(const char* p, TiXmlParsingData* data,
01211 TiXmlEncoding encoding) {
01212 TiXmlDocument* document = GetDocument();
01213 value = "";
01214
01215 p = SkipWhiteSpace(p, encoding);
01216
01217 if (data) {
01218 data->Stamp(p, encoding);
01219 location = data->Cursor();
01220 }
01221 const char* startTag = "<!--";
01222 const char* endTag = "-->";
01223
01224 if (!StringEqual(p, startTag, false, encoding)) {
01225 document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding);
01226 return 0;
01227 }
01228 p += strlen(startTag);
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248 value = "";
01249
01250 while (p && *p && !StringEqual(p, endTag, false, encoding)) {
01251 value.append(p, 1);
01252 ++p;
01253 }
01254 if (p)
01255 p += strlen(endTag);
01256
01257 return p;
01258 }
01259
01260 const char* TiXmlAttribute::Parse(const char* p, TiXmlParsingData* data,
01261 TiXmlEncoding encoding) {
01262 p = SkipWhiteSpace(p, encoding);
01263 if (!p || !*p)
01264 return 0;
01265
01266
01267
01268
01269
01270 if (data) {
01271 data->Stamp(p, encoding);
01272 location = data->Cursor();
01273 }
01274
01275 const char* pErr = p;
01276 p = ReadName(p, &name, encoding);
01277 if (!p || !*p) {
01278 if (document)
01279 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data,
01280 encoding);
01281 return 0;
01282 }
01283 p = SkipWhiteSpace(p, encoding);
01284 if (!p || !*p || *p != '=') {
01285 if (document)
01286 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01287 encoding);
01288 return 0;
01289 }
01290
01291 ++p;
01292 p = SkipWhiteSpace(p, encoding);
01293 if (!p || !*p) {
01294 if (document)
01295 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01296 encoding);
01297 return 0;
01298 }
01299
01300 const char* end;
01301 const char SINGLE_QUOTE = '\'';
01302 const char DOUBLE_QUOTE = '\"';
01303
01304 if (*p == SINGLE_QUOTE) {
01305 ++p;
01306 end = "\'";
01307 p = ReadText(p, &value, false, end, false, encoding);
01308 } else if (*p == DOUBLE_QUOTE) {
01309 ++p;
01310 end = "\"";
01311 p = ReadText(p, &value, false, end, false, encoding);
01312 } else {
01313
01314
01315
01316 value = "";
01317 while (p && *p
01318 && !IsWhiteSpace(*p) && *p != '\n' && *p != '\r'
01319 && *p != '/' && *p != '>')
01320 {
01321 if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE) {
01322
01323
01324
01325 if (document)
01326 document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data,
01327 encoding);
01328 return 0;
01329 }
01330 value += *p;
01331 ++p;
01332 }
01333 }
01334 return p;
01335 }
01336
01337 #ifdef TIXML_USE_STL
01338 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
01339 {
01340 while ( in->good() )
01341 {
01342 int c = in->peek();
01343 if ( !cdata && (c == '<' ) )
01344 {
01345 return;
01346 }
01347 if ( c <= 0 )
01348 {
01349 TiXmlDocument* document = GetDocument();
01350 if ( document )
01351 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01352 return;
01353 }
01354
01355 (*tag) += (char) c;
01356 in->get();
01357
01358 if ( cdata && c == '>' && tag->size() >= 3 ) {
01359 size_t len = tag->size();
01360 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
01361
01362 return;
01363 }
01364 }
01365 }
01366 }
01367 #endif
01368
01369 const char* TiXmlText::Parse(const char* p, TiXmlParsingData* data,
01370 TiXmlEncoding encoding) {
01371 value = "";
01372 TiXmlDocument* document = GetDocument();
01373
01374 if (data) {
01375 data->Stamp(p, encoding);
01376 location = data->Cursor();
01377 }
01378
01379 const char* const startTag = "<![CDATA[";
01380 const char* const endTag = "]]>";
01381
01382 if (cdata || StringEqual(p, startTag, false, encoding)) {
01383 cdata = true;
01384
01385 if (!StringEqual(p, startTag, false, encoding)) {
01386 document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding);
01387 return 0;
01388 }
01389 p += strlen(startTag);
01390
01391
01392 while (p && *p && !StringEqual(p, endTag, false, encoding)) {
01393 value += *p;
01394 ++p;
01395 }
01396 TIXML_STRING dummy;
01397 p = ReadText(p, &dummy, false, endTag, false, encoding);
01398 return p;
01399 } else {
01400 bool ignoreWhite = true;
01401
01402 const char* end = "<";
01403 p = ReadText(p, &value, ignoreWhite, end, false, encoding);
01404 if (p)
01405 return p - 1;
01406 return 0;
01407 }
01408 }
01409
01410 #ifdef TIXML_USE_STL
01411 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
01412 {
01413 while ( in->good() )
01414 {
01415 int c = in->get();
01416 if ( c <= 0 )
01417 {
01418 TiXmlDocument* document = GetDocument();
01419 if ( document )
01420 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01421 return;
01422 }
01423 (*tag) += (char) c;
01424
01425 if ( c == '>' )
01426 {
01427
01428 return;
01429 }
01430 }
01431 }
01432 #endif
01433
01434 const char* TiXmlDeclaration::Parse(const char* p, TiXmlParsingData* data,
01435 TiXmlEncoding _encoding) {
01436 p = SkipWhiteSpace(p, _encoding);
01437
01438
01439 TiXmlDocument* document = GetDocument();
01440 if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding)) {
01441 if (document)
01442 document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding);
01443 return 0;
01444 }
01445 if (data) {
01446 data->Stamp(p, _encoding);
01447 location = data->Cursor();
01448 }
01449 p += 5;
01450
01451 version = "";
01452 encoding = "";
01453 standalone = "";
01454
01455 while (p && *p) {
01456 if (*p == '>') {
01457 ++p;
01458 return p;
01459 }
01460
01461 p = SkipWhiteSpace(p, _encoding);
01462 if (StringEqual(p, "version", true, _encoding)) {
01463 TiXmlAttribute attrib;
01464 p = attrib.Parse(p, data, _encoding);
01465 version = attrib.Value();
01466 } else if (StringEqual(p, "encoding", true, _encoding)) {
01467 TiXmlAttribute attrib;
01468 p = attrib.Parse(p, data, _encoding);
01469 encoding = attrib.Value();
01470 } else if (StringEqual(p, "standalone", true, _encoding)) {
01471 TiXmlAttribute attrib;
01472 p = attrib.Parse(p, data, _encoding);
01473 standalone = attrib.Value();
01474 } else {
01475
01476 while (p && *p && *p != '>' && !IsWhiteSpace(*p))
01477 ++p;
01478 }
01479 }
01480 return 0;
01481 }
01482
01483 bool TiXmlText::Blank() const {
01484 for (unsigned i = 0; i < value.length(); i++)
01485 if (!IsWhiteSpace(value[i]))
01486 return false;
01487 return true;
01488 }
01489