00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "tinyxml.h"
00026 #include <ctype.h>
00027 #include <stddef.h>
00028
00029
00030 #if defined( DEBUG_PARSER )
00031 # if defined( DEBUG ) && defined( _MSC_VER )
00032 # include <windows.h>
00033 # define TIXML_LOG OutputDebugString
00034 # else
00035 # define TIXML_LOG printf
00036 # endif
00037 #endif
00038
00039
00040
00041
00042 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00043 {
00044 { "&", 5, '&' },
00045 { "<", 4, '<' },
00046 { ">", 4, '>' },
00047 { """, 6, '\"' },
00048 { "'", 6, '\'' }
00049 };
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00062 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00063 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00064
00065 const int TiXmlBase::utf8ByteTable[256] =
00066 {
00067
00068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00080 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00081 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00082 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00083 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00084 };
00085
00086
00087 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00088 {
00089 const unsigned long BYTE_MASK = 0xBF;
00090 const unsigned long BYTE_MARK = 0x80;
00091 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00092
00093 if (input < 0x80)
00094 *length = 1;
00095 else if ( input < 0x800 )
00096 *length = 2;
00097 else if ( input < 0x10000 )
00098 *length = 3;
00099 else if ( input < 0x200000 )
00100 *length = 4;
00101 else
00102 { *length = 0; return; }
00103
00104 output += *length;
00105
00106
00107 switch (*length)
00108 {
00109 case 4:
00110 --output;
00111 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00112 input >>= 6;
00113 case 3:
00114 --output;
00115 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00116 input >>= 6;
00117 case 2:
00118 --output;
00119 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00120 input >>= 6;
00121 case 1:
00122 --output;
00123 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00124 }
00125 }
00126
00127
00128 int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
00129 {
00130
00131
00132
00133
00134
00135
00136
00137 if ( anyByte < 127 )
00138 return isalpha( anyByte );
00139 else
00140 return 1;
00141
00142
00143
00144
00145
00146 }
00147
00148
00149 int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
00150 {
00151
00152
00153
00154
00155
00156
00157
00158 if ( anyByte < 127 )
00159 return isalnum( anyByte );
00160 else
00161 return 1;
00162
00163
00164
00165
00166
00167 }
00168
00169
00170 class TiXmlParsingData
00171 {
00172 friend class TiXmlDocument;
00173 public:
00174 void Stamp( const char* now, TiXmlEncoding encoding );
00175
00176 const TiXmlCursor& Cursor() { return cursor; }
00177
00178 private:
00179
00180 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00181 {
00182 assert( start );
00183 stamp = start;
00184 tabsize = _tabsize;
00185 cursor.row = row;
00186 cursor.col = col;
00187 }
00188
00189 TiXmlCursor cursor;
00190 const char* stamp;
00191 int tabsize;
00192 };
00193
00194
00195 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00196 {
00197 assert( now );
00198
00199
00200 if ( tabsize < 1 )
00201 {
00202 return;
00203 }
00204
00205
00206 int row = cursor.row;
00207 int col = cursor.col;
00208 const char* p = stamp;
00209 assert( p );
00210
00211 while ( p < now )
00212 {
00213
00214 const unsigned char* pU = (const unsigned char*)p;
00215
00216
00217 switch (*pU) {
00218 case 0:
00219
00220
00221 return;
00222
00223 case '\r':
00224
00225 ++row;
00226 col = 0;
00227
00228 ++p;
00229
00230
00231 if (*p == '\n') {
00232 ++p;
00233 }
00234 break;
00235
00236 case '\n':
00237
00238 ++row;
00239 col = 0;
00240
00241
00242 ++p;
00243
00244
00245
00246
00247 if (*p == '\r') {
00248 ++p;
00249 }
00250 break;
00251
00252 case '\t':
00253
00254 ++p;
00255
00256
00257 col = (col / tabsize + 1) * tabsize;
00258 break;
00259
00260 case TIXML_UTF_LEAD_0:
00261 if ( encoding == TIXML_ENCODING_UTF8 )
00262 {
00263 if ( *(p+1) && *(p+2) )
00264 {
00265
00266
00267 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00268 p += 3;
00269 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00270 p += 3;
00271 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00272 p += 3;
00273 else
00274 { p +=3; ++col; }
00275 }
00276 }
00277 else
00278 {
00279 ++p;
00280 ++col;
00281 }
00282 break;
00283
00284 default:
00285 if ( encoding == TIXML_ENCODING_UTF8 )
00286 {
00287
00288 int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
00289 if ( step == 0 )
00290 step = 1;
00291 p += step;
00292
00293
00294 ++col;
00295 }
00296 else
00297 {
00298 ++p;
00299 ++col;
00300 }
00301 break;
00302 }
00303 }
00304 cursor.row = row;
00305 cursor.col = col;
00306 assert( cursor.row >= -1 );
00307 assert( cursor.col >= -1 );
00308 stamp = p;
00309 assert( stamp );
00310 }
00311
00312
00313 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00314 {
00315 if ( !p || !*p )
00316 {
00317 return 0;
00318 }
00319 if ( encoding == TIXML_ENCODING_UTF8 )
00320 {
00321 while ( *p )
00322 {
00323 const unsigned char* pU = (const unsigned char*)p;
00324
00325
00326 if ( *(pU+0)==TIXML_UTF_LEAD_0
00327 && *(pU+1)==TIXML_UTF_LEAD_1
00328 && *(pU+2)==TIXML_UTF_LEAD_2 )
00329 {
00330 p += 3;
00331 continue;
00332 }
00333 else if(*(pU+0)==TIXML_UTF_LEAD_0
00334 && *(pU+1)==0xbfU
00335 && *(pU+2)==0xbeU )
00336 {
00337 p += 3;
00338 continue;
00339 }
00340 else if(*(pU+0)==TIXML_UTF_LEAD_0
00341 && *(pU+1)==0xbfU
00342 && *(pU+2)==0xbfU )
00343 {
00344 p += 3;
00345 continue;
00346 }
00347
00348 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00349 ++p;
00350 else
00351 break;
00352 }
00353 }
00354 else
00355 {
00356 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00357 ++p;
00358 }
00359
00360 return p;
00361 }
00362
00363 #ifdef TIXML_USE_STL
00364 bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
00365 {
00366 for( ;; )
00367 {
00368 if ( !in->good() ) return false;
00369
00370 int c = in->peek();
00371
00372 if ( !IsWhiteSpace( c ) || c <= 0 )
00373 return true;
00374
00375 *tag += (char) in->get();
00376 }
00377 }
00378
00379 bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
00380 {
00381
00382 while ( in->good() )
00383 {
00384 int c = in->peek();
00385 if ( c == character )
00386 return true;
00387 if ( c <= 0 )
00388 return false;
00389
00390 in->get();
00391 *tag += (char) c;
00392 }
00393 return false;
00394 }
00395 #endif
00396
00397 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00398 {
00399 *name = "";
00400 assert( p );
00401
00402
00403
00404
00405
00406
00407
00408
00409 if ( p && *p
00410 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00411 {
00412 while( p && *p
00413 && ( IsAlphaNum( (unsigned char ) *p, encoding )
00414 || *p == '_'
00415 || *p == '-'
00416 || *p == '.'
00417 || *p == ':' ) )
00418 {
00419 (*name) += *p;
00420 ++p;
00421 }
00422 return p;
00423 }
00424 return 0;
00425 }
00426
00427 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00428 {
00429
00430 TIXML_STRING ent;
00431 int i;
00432 *length = 0;
00433
00434 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00435 {
00436 unsigned long ucs = 0;
00437 ptrdiff_t delta = 0;
00438 unsigned mult = 1;
00439
00440 if ( *(p+2) == 'x' )
00441 {
00442
00443 if ( !*(p+3) ) return 0;
00444
00445 const char* q = p+3;
00446 q = strchr( q, ';' );
00447
00448 if ( !q || !*q ) return 0;
00449
00450 delta = q-p;
00451 --q;
00452
00453 while ( *q != 'x' )
00454 {
00455 if ( *q >= '0' && *q <= '9' )
00456 ucs += mult * (*q - '0');
00457 else if ( *q >= 'a' && *q <= 'f' )
00458 ucs += mult * (*q - 'a' + 10);
00459 else if ( *q >= 'A' && *q <= 'F' )
00460 ucs += mult * (*q - 'A' + 10 );
00461 else
00462 return 0;
00463 mult *= 16;
00464 --q;
00465 }
00466 }
00467 else
00468 {
00469
00470 if ( !*(p+2) ) return 0;
00471
00472 const char* q = p+2;
00473 q = strchr( q, ';' );
00474
00475 if ( !q || !*q ) return 0;
00476
00477 delta = q-p;
00478 --q;
00479
00480 while ( *q != '#' )
00481 {
00482 if ( *q >= '0' && *q <= '9' )
00483 ucs += mult * (*q - '0');
00484 else
00485 return 0;
00486 mult *= 10;
00487 --q;
00488 }
00489 }
00490 if ( encoding == TIXML_ENCODING_UTF8 )
00491 {
00492
00493 ConvertUTF32ToUTF8( ucs, value, length );
00494 }
00495 else
00496 {
00497 *value = (char)ucs;
00498 *length = 1;
00499 }
00500 return p + delta + 1;
00501 }
00502
00503
00504 for( i=0; i<NUM_ENTITY; ++i )
00505 {
00506 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00507 {
00508 assert( strlen( entity[i].str ) == entity[i].strLength );
00509 *value = entity[i].chr;
00510 *length = 1;
00511 return ( p + entity[i].strLength );
00512 }
00513 }
00514
00515
00516 *value = *p;
00517 return p+1;
00518 }
00519
00520
00521 bool TiXmlBase::StringEqual( const char* p,
00522 const char* tag,
00523 bool ignoreCase,
00524 TiXmlEncoding encoding )
00525 {
00526 assert( p );
00527 assert( tag );
00528 if ( !p || !*p )
00529 {
00530 assert( 0 );
00531 return false;
00532 }
00533
00534 const char* q = p;
00535
00536 if ( ignoreCase )
00537 {
00538 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00539 {
00540 ++q;
00541 ++tag;
00542 }
00543
00544 if ( *tag == 0 )
00545 return true;
00546 }
00547 else
00548 {
00549 while ( *q && *tag && *q == *tag )
00550 {
00551 ++q;
00552 ++tag;
00553 }
00554
00555 if ( *tag == 0 )
00556 return true;
00557 }
00558 return false;
00559 }
00560
00561 const char* TiXmlBase::ReadText( const char* p,
00562 TIXML_STRING * text,
00563 bool trimWhiteSpace,
00564 const char* endTag,
00565 bool caseInsensitive,
00566 TiXmlEncoding encoding )
00567 {
00568 *text = "";
00569 if ( !trimWhiteSpace
00570 || !condenseWhiteSpace )
00571 {
00572
00573 while ( p && *p
00574 && !StringEqual( p, endTag, caseInsensitive, encoding )
00575 )
00576 {
00577 int len;
00578 char cArr[4] = { 0, 0, 0, 0 };
00579 p = GetChar( p, cArr, &len, encoding );
00580 text->append( cArr, len );
00581 }
00582 }
00583 else
00584 {
00585 bool whitespace = false;
00586
00587
00588 p = SkipWhiteSpace( p, encoding );
00589 while ( p && *p
00590 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00591 {
00592 if ( *p == '\r' || *p == '\n' )
00593 {
00594 whitespace = true;
00595 ++p;
00596 }
00597 else if ( IsWhiteSpace( *p ) )
00598 {
00599 whitespace = true;
00600 ++p;
00601 }
00602 else
00603 {
00604
00605
00606 if ( whitespace )
00607 {
00608 (*text) += ' ';
00609 whitespace = false;
00610 }
00611 int len;
00612 char cArr[4] = { 0, 0, 0, 0 };
00613 p = GetChar( p, cArr, &len, encoding );
00614 if ( len == 1 )
00615 (*text) += cArr[0];
00616 else
00617 text->append( cArr, len );
00618 }
00619 }
00620 }
00621 return p + strlen( endTag );
00622 }
00623
00624 #ifdef TIXML_USE_STL
00625
00626 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
00627 {
00628
00629
00630
00631
00632
00633
00634
00635 if ( !StreamTo( in, '<', tag ) )
00636 {
00637 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00638 return;
00639 }
00640
00641 while ( in->good() )
00642 {
00643 int tagIndex = (int) tag->length();
00644 while ( in->good() && in->peek() != '>' )
00645 {
00646 int c = in->get();
00647 if ( c <= 0 )
00648 {
00649 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00650 break;
00651 }
00652 (*tag) += (char) c;
00653 }
00654
00655 if ( in->good() )
00656 {
00657
00658
00659
00660 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00661
00662 if ( node )
00663 {
00664 node->StreamIn( in, tag );
00665 bool isElement = node->ToElement() != 0;
00666 delete node;
00667 node = 0;
00668
00669
00670
00671 if ( isElement )
00672 {
00673 return;
00674 }
00675 }
00676 else
00677 {
00678 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00679 return;
00680 }
00681 }
00682 }
00683
00684 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00685 }
00686
00687 #endif
00688
00689 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00690 {
00691 ClearError();
00692
00693
00694
00695
00696 if ( !p || !*p )
00697 {
00698 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00699 return 0;
00700 }
00701
00702
00703
00704
00705 location.Clear();
00706 if ( prevData )
00707 {
00708 location.row = prevData->cursor.row;
00709 location.col = prevData->cursor.col;
00710 }
00711 else
00712 {
00713 location.row = 0;
00714 location.col = 0;
00715 }
00716 TiXmlParsingData data( p, TabSize(), location.row, location.col );
00717 location = data.Cursor();
00718
00719 if ( encoding == TIXML_ENCODING_UNKNOWN )
00720 {
00721
00722 const unsigned char* pU = (const unsigned char*)p;
00723 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00724 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00725 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00726 {
00727 encoding = TIXML_ENCODING_UTF8;
00728 useMicrosoftBOM = true;
00729 }
00730 }
00731
00732 p = SkipWhiteSpace( p, encoding );
00733 if ( !p )
00734 {
00735 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00736 return 0;
00737 }
00738
00739 while ( p && *p )
00740 {
00741 TiXmlNode* node = Identify( p, encoding );
00742 if ( node )
00743 {
00744 p = node->Parse( p, &data, encoding );
00745 LinkEndChild( node );
00746 }
00747 else
00748 {
00749 break;
00750 }
00751
00752
00753 if ( encoding == TIXML_ENCODING_UNKNOWN
00754 && node->ToDeclaration() )
00755 {
00756 TiXmlDeclaration* dec = node->ToDeclaration();
00757 const char* enc = dec->Encoding();
00758 assert( enc );
00759
00760 if ( *enc == 0 )
00761 encoding = TIXML_ENCODING_UTF8;
00762 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00763 encoding = TIXML_ENCODING_UTF8;
00764 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00765 encoding = TIXML_ENCODING_UTF8;
00766 else
00767 encoding = TIXML_ENCODING_LEGACY;
00768 }
00769
00770 p = SkipWhiteSpace( p, encoding );
00771 }
00772
00773
00774 if ( !firstChild ) {
00775 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00776 return 0;
00777 }
00778
00779
00780 return p;
00781 }
00782
00783 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00784 {
00785
00786 if ( error )
00787 return;
00788
00789 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00790 error = true;
00791 errorId = err;
00792 errorDesc = errorString[ errorId ];
00793
00794 errorLocation.Clear();
00795 if ( pError && data )
00796 {
00797 data->Stamp( pError, encoding );
00798 errorLocation = data->Cursor();
00799 }
00800 }
00801
00802
00803 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00804 {
00805 TiXmlNode* returnNode = 0;
00806
00807 p = SkipWhiteSpace( p, encoding );
00808 if( !p || !*p || *p != '<' )
00809 {
00810 return 0;
00811 }
00812
00813 TiXmlDocument* doc = GetDocument();
00814 p = SkipWhiteSpace( p, encoding );
00815
00816 if ( !p || !*p )
00817 {
00818 return 0;
00819 }
00820
00821
00822
00823
00824
00825
00826
00827
00828 const char* xmlHeader = { "<?xml" };
00829 const char* commentHeader = { "<!--" };
00830 const char* dtdHeader = { "<!" };
00831 const char* cdataHeader = { "<![CDATA[" };
00832
00833 if ( StringEqual( p, xmlHeader, true, encoding ) )
00834 {
00835 #ifdef DEBUG_PARSER
00836 TIXML_LOG( "XML parsing Declaration\n" );
00837 #endif
00838 returnNode = new TiXmlDeclaration();
00839 }
00840 else if ( StringEqual( p, commentHeader, false, encoding ) )
00841 {
00842 #ifdef DEBUG_PARSER
00843 TIXML_LOG( "XML parsing Comment\n" );
00844 #endif
00845 returnNode = new TiXmlComment();
00846 }
00847 else if ( StringEqual( p, cdataHeader, false, encoding ) )
00848 {
00849 #ifdef DEBUG_PARSER
00850 TIXML_LOG( "XML parsing CDATA\n" );
00851 #endif
00852 TiXmlText* text = new TiXmlText( "" );
00853 text->SetCDATA( true );
00854 returnNode = text;
00855 }
00856 else if ( StringEqual( p, dtdHeader, false, encoding ) )
00857 {
00858 #ifdef DEBUG_PARSER
00859 TIXML_LOG( "XML parsing Unknown(1)\n" );
00860 #endif
00861 returnNode = new TiXmlUnknown();
00862 }
00863 else if ( IsAlpha( *(p+1), encoding )
00864 || *(p+1) == '_' )
00865 {
00866 #ifdef DEBUG_PARSER
00867 TIXML_LOG( "XML parsing Element\n" );
00868 #endif
00869 returnNode = new TiXmlElement( "" );
00870 }
00871 else
00872 {
00873 #ifdef DEBUG_PARSER
00874 TIXML_LOG( "XML parsing Unknown(2)\n" );
00875 #endif
00876 returnNode = new TiXmlUnknown();
00877 }
00878
00879 if ( returnNode )
00880 {
00881
00882 returnNode->parent = this;
00883 }
00884 else
00885 {
00886 if ( doc )
00887 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00888 }
00889 return returnNode;
00890 }
00891
00892 #ifdef TIXML_USE_STL
00893
00894 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
00895 {
00896
00897
00898 while( in->good() )
00899 {
00900 int c = in->get();
00901 if ( c <= 0 )
00902 {
00903 TiXmlDocument* document = GetDocument();
00904 if ( document )
00905 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00906 return;
00907 }
00908 (*tag) += (char) c ;
00909
00910 if ( c == '>' )
00911 break;
00912 }
00913
00914 if ( tag->length() < 3 ) return;
00915
00916
00917
00918
00919 if ( tag->at( tag->length() - 1 ) == '>'
00920 && tag->at( tag->length() - 2 ) == '/' )
00921 {
00922
00923 return;
00924 }
00925 else if ( tag->at( tag->length() - 1 ) == '>' )
00926 {
00927
00928
00929
00930
00931 for ( ;; )
00932 {
00933 StreamWhiteSpace( in, tag );
00934
00935
00936 if ( in->good() && in->peek() != '<' )
00937 {
00938
00939 TiXmlText text( "" );
00940 text.StreamIn( in, tag );
00941
00942
00943
00944 continue;
00945 }
00946
00947
00948
00949 if ( !in->good() ) return;
00950 assert( in->peek() == '<' );
00951 int tagIndex = (int) tag->length();
00952
00953 bool closingTag = false;
00954 bool firstCharFound = false;
00955
00956 for( ;; )
00957 {
00958 if ( !in->good() )
00959 return;
00960
00961 int c = in->peek();
00962 if ( c <= 0 )
00963 {
00964 TiXmlDocument* document = GetDocument();
00965 if ( document )
00966 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00967 return;
00968 }
00969
00970 if ( c == '>' )
00971 break;
00972
00973 *tag += (char) c;
00974 in->get();
00975
00976 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00977 {
00978 firstCharFound = true;
00979 if ( c == '/' )
00980 closingTag = true;
00981 }
00982 }
00983
00984
00985 if ( closingTag )
00986 {
00987 if ( !in->good() )
00988 return;
00989
00990 int c = in->get();
00991 if ( c <= 0 )
00992 {
00993 TiXmlDocument* document = GetDocument();
00994 if ( document )
00995 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00996 return;
00997 }
00998 assert( c == '>' );
00999 *tag += (char) c;
01000
01001
01002 return;
01003 }
01004 else
01005 {
01006
01007 const char* tagloc = tag->c_str() + tagIndex;
01008 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01009 if ( !node )
01010 return;
01011 node->StreamIn( in, tag );
01012 delete node;
01013 node = 0;
01014
01015
01016 }
01017 }
01018 }
01019 }
01020 #endif
01021
01022 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01023 {
01024 p = SkipWhiteSpace( p, encoding );
01025 TiXmlDocument* document = GetDocument();
01026
01027 if ( !p || !*p )
01028 {
01029 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01030 return 0;
01031 }
01032
01033 if ( data )
01034 {
01035 data->Stamp( p, encoding );
01036 location = data->Cursor();
01037 }
01038
01039 if ( *p != '<' )
01040 {
01041 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01042 return 0;
01043 }
01044
01045 p = SkipWhiteSpace( p+1, encoding );
01046
01047
01048 const char* pErr = p;
01049
01050 p = ReadName( p, &value, encoding );
01051 if ( !p || !*p )
01052 {
01053 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01054 return 0;
01055 }
01056
01057 TIXML_STRING endTag ("</");
01058 endTag += value;
01059 endTag += ">";
01060
01061
01062
01063 while ( p && *p )
01064 {
01065 pErr = p;
01066 p = SkipWhiteSpace( p, encoding );
01067 if ( !p || !*p )
01068 {
01069 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01070 return 0;
01071 }
01072 if ( *p == '/' )
01073 {
01074 ++p;
01075
01076 if ( *p != '>' )
01077 {
01078 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01079 return 0;
01080 }
01081 return (p+1);
01082 }
01083 else if ( *p == '>' )
01084 {
01085
01086
01087
01088 ++p;
01089 p = ReadValue( p, data, encoding );
01090 if ( !p || !*p )
01091 return 0;
01092
01093
01094 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01095 {
01096 p += endTag.length();
01097 return p;
01098 }
01099 else
01100 {
01101 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01102 return 0;
01103 }
01104 }
01105 else
01106 {
01107
01108 TiXmlAttribute* attrib = new TiXmlAttribute();
01109 if ( !attrib )
01110 {
01111 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01112 return 0;
01113 }
01114
01115 attrib->SetDocument( document );
01116 const char* pErr = p;
01117 p = attrib->Parse( p, data, encoding );
01118
01119 if ( !p || !*p )
01120 {
01121 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01122 delete attrib;
01123 return 0;
01124 }
01125
01126
01127 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01128 if ( node )
01129 {
01130 node->SetValue( attrib->Value() );
01131 delete attrib;
01132 return 0;
01133 }
01134
01135 attributeSet.Add( attrib );
01136 }
01137 }
01138 return p;
01139 }
01140
01141
01142 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01143 {
01144 TiXmlDocument* document = GetDocument();
01145
01146
01147 const char* pWithWhiteSpace = p;
01148 p = SkipWhiteSpace( p, encoding );
01149
01150 while ( p && *p )
01151 {
01152 if ( *p != '<' )
01153 {
01154
01155 TiXmlText* textNode = new TiXmlText( "" );
01156
01157 if ( !textNode )
01158 {
01159 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01160 return 0;
01161 }
01162
01163 if ( TiXmlBase::IsWhiteSpaceCondensed() )
01164 {
01165 p = textNode->Parse( p, data, encoding );
01166 }
01167 else
01168 {
01169
01170
01171 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01172 }
01173
01174 if ( !textNode->Blank() )
01175 LinkEndChild( textNode );
01176 else
01177 delete textNode;
01178 }
01179 else
01180 {
01181
01182
01183
01184 if ( StringEqual( p, "</", false, encoding ) )
01185 {
01186 return p;
01187 }
01188 else
01189 {
01190 TiXmlNode* node = Identify( p, encoding );
01191 if ( node )
01192 {
01193 p = node->Parse( p, data, encoding );
01194 LinkEndChild( node );
01195 }
01196 else
01197 {
01198 return 0;
01199 }
01200 }
01201 }
01202 pWithWhiteSpace = p;
01203 p = SkipWhiteSpace( p, encoding );
01204 }
01205
01206 if ( !p )
01207 {
01208 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01209 }
01210 return p;
01211 }
01212
01213
01214 #ifdef TIXML_USE_STL
01215 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01216 {
01217 while ( in->good() )
01218 {
01219 int c = in->get();
01220 if ( c <= 0 )
01221 {
01222 TiXmlDocument* document = GetDocument();
01223 if ( document )
01224 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01225 return;
01226 }
01227 (*tag) += (char) c;
01228
01229 if ( c == '>' )
01230 {
01231
01232 return;
01233 }
01234 }
01235 }
01236 #endif
01237
01238
01239 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01240 {
01241 TiXmlDocument* document = GetDocument();
01242 p = SkipWhiteSpace( p, encoding );
01243
01244 if ( data )
01245 {
01246 data->Stamp( p, encoding );
01247 location = data->Cursor();
01248 }
01249 if ( !p || !*p || *p != '<' )
01250 {
01251 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01252 return 0;
01253 }
01254 ++p;
01255 value = "";
01256
01257 while ( p && *p && *p != '>' )
01258 {
01259 value += *p;
01260 ++p;
01261 }
01262
01263 if ( !p )
01264 {
01265 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01266 }
01267 if ( *p == '>' )
01268 return p+1;
01269 return p;
01270 }
01271
01272 #ifdef TIXML_USE_STL
01273 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01274 {
01275 while ( in->good() )
01276 {
01277 int c = in->get();
01278 if ( c <= 0 )
01279 {
01280 TiXmlDocument* document = GetDocument();
01281 if ( document )
01282 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01283 return;
01284 }
01285
01286 (*tag) += (char) c;
01287
01288 if ( c == '>'
01289 && tag->at( tag->length() - 2 ) == '-'
01290 && tag->at( tag->length() - 3 ) == '-' )
01291 {
01292
01293 return;
01294 }
01295 }
01296 }
01297 #endif
01298
01299
01300 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01301 {
01302 TiXmlDocument* document = GetDocument();
01303 value = "";
01304
01305 p = SkipWhiteSpace( p, encoding );
01306
01307 if ( data )
01308 {
01309 data->Stamp( p, encoding );
01310 location = data->Cursor();
01311 }
01312 const char* startTag = "<!--";
01313 const char* endTag = "-->";
01314
01315 if ( !StringEqual( p, startTag, false, encoding ) )
01316 {
01317 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01318 return 0;
01319 }
01320 p += strlen( startTag );
01321 p = ReadText( p, &value, false, endTag, false, encoding );
01322 return p;
01323 }
01324
01325
01326 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01327 {
01328 p = SkipWhiteSpace( p, encoding );
01329 if ( !p || !*p ) return 0;
01330
01331 int tabsize = 4;
01332 if ( document )
01333 tabsize = document->TabSize();
01334
01335 if ( data )
01336 {
01337 data->Stamp( p, encoding );
01338 location = data->Cursor();
01339 }
01340
01341 const char* pErr = p;
01342 p = ReadName( p, &name, encoding );
01343 if ( !p || !*p )
01344 {
01345 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01346 return 0;
01347 }
01348 p = SkipWhiteSpace( p, encoding );
01349 if ( !p || !*p || *p != '=' )
01350 {
01351 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01352 return 0;
01353 }
01354
01355 ++p;
01356 p = SkipWhiteSpace( p, encoding );
01357 if ( !p || !*p )
01358 {
01359 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01360 return 0;
01361 }
01362
01363 const char* end;
01364
01365 if ( *p == '\'' )
01366 {
01367 ++p;
01368 end = "\'";
01369 p = ReadText( p, &value, false, end, false, encoding );
01370 }
01371 else if ( *p == '"' )
01372 {
01373 ++p;
01374 end = "\"";
01375 p = ReadText( p, &value, false, end, false, encoding );
01376 }
01377 else
01378 {
01379
01380
01381
01382 value = "";
01383 while ( p && *p
01384 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'
01385 && *p != '/' && *p != '>' )
01386 {
01387 value += *p;
01388 ++p;
01389 }
01390 }
01391 return p;
01392 }
01393
01394 #ifdef TIXML_USE_STL
01395 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01396 {
01397 if ( cdata )
01398 {
01399 int c = in->get();
01400 if ( c <= 0 )
01401 {
01402 TiXmlDocument* document = GetDocument();
01403 if ( document )
01404 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01405 return;
01406 }
01407
01408 (*tag) += (char) c;
01409
01410 if ( c == '>'
01411 && tag->at( tag->length() - 2 ) == ']'
01412 && tag->at( tag->length() - 3 ) == ']' )
01413 {
01414
01415 return;
01416 }
01417 }
01418 else
01419 {
01420 while ( in->good() )
01421 {
01422 int c = in->peek();
01423 if ( c == '<' )
01424 return;
01425 if ( c <= 0 )
01426 {
01427 TiXmlDocument* document = GetDocument();
01428 if ( document )
01429 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01430 return;
01431 }
01432
01433 (*tag) += (char) c;
01434 in->get();
01435 }
01436 }
01437 }
01438 #endif
01439
01440 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01441 {
01442 value = "";
01443 TiXmlDocument* document = GetDocument();
01444
01445 if ( data )
01446 {
01447 data->Stamp( p, encoding );
01448 location = data->Cursor();
01449 }
01450
01451 const char* const startTag = "<![CDATA[";
01452 const char* const endTag = "]]>";
01453
01454 if ( cdata || StringEqual( p, startTag, false, encoding ) )
01455 {
01456 cdata = true;
01457
01458 if ( !StringEqual( p, startTag, false, encoding ) )
01459 {
01460 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01461 return 0;
01462 }
01463 p += strlen( startTag );
01464
01465
01466 while ( p && *p
01467 && !StringEqual( p, endTag, false, encoding )
01468 )
01469 {
01470 value += *p;
01471 ++p;
01472 }
01473
01474 TIXML_STRING dummy;
01475 p = ReadText( p, &dummy, false, endTag, false, encoding );
01476 return p;
01477 }
01478 else
01479 {
01480 bool ignoreWhite = true;
01481
01482 const char* end = "<";
01483 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01484 if ( p )
01485 return p-1;
01486 return 0;
01487 }
01488 }
01489
01490 #ifdef TIXML_USE_STL
01491 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01492 {
01493 while ( in->good() )
01494 {
01495 int c = in->get();
01496 if ( c <= 0 )
01497 {
01498 TiXmlDocument* document = GetDocument();
01499 if ( document )
01500 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01501 return;
01502 }
01503 (*tag) += (char) c;
01504
01505 if ( c == '>' )
01506 {
01507
01508 return;
01509 }
01510 }
01511 }
01512 #endif
01513
01514 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01515 {
01516 p = SkipWhiteSpace( p, _encoding );
01517
01518
01519 TiXmlDocument* document = GetDocument();
01520 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01521 {
01522 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01523 return 0;
01524 }
01525 if ( data )
01526 {
01527 data->Stamp( p, _encoding );
01528 location = data->Cursor();
01529 }
01530 p += 5;
01531
01532 version = "";
01533 encoding = "";
01534 standalone = "";
01535
01536 while ( p && *p )
01537 {
01538 if ( *p == '>' )
01539 {
01540 ++p;
01541 return p;
01542 }
01543
01544 p = SkipWhiteSpace( p, _encoding );
01545 if ( StringEqual( p, "version", true, _encoding ) )
01546 {
01547 TiXmlAttribute attrib;
01548 p = attrib.Parse( p, data, _encoding );
01549 version = attrib.Value();
01550 }
01551 else if ( StringEqual( p, "encoding", true, _encoding ) )
01552 {
01553 TiXmlAttribute attrib;
01554 p = attrib.Parse( p, data, _encoding );
01555 encoding = attrib.Value();
01556 }
01557 else if ( StringEqual( p, "standalone", true, _encoding ) )
01558 {
01559 TiXmlAttribute attrib;
01560 p = attrib.Parse( p, data, _encoding );
01561 standalone = attrib.Value();
01562 }
01563 else
01564 {
01565
01566 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01567 ++p;
01568 }
01569 }
01570 return 0;
01571 }
01572
01573 bool TiXmlText::Blank() const
01574 {
01575 for ( unsigned i=0; i<value.length(); i++ )
01576 if ( !IsWhiteSpace( value[i] ) )
01577 return false;
01578 return true;
01579 }
01580