DSACSS Operational Code: tinyxmlparser.cpp Source File

00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied 
00006 warranty. In no event will the authors be held liable for any 
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any 
00010 purpose, including commercial applications, and to alter it and 
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must 
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and 
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source 
00022 distribution.
00023 */
00024 
00025 #include "tinyxml.h"
00026 #include <ctype.h>
00027 #include <stddef.h>
00028 
00029 //#define DEBUG_PARSER
00030 #if defined( DEBUG_PARSER )
00031 #       if defined( DEBUG ) && defined( _MSC_VER )
00032 #               include <windows.h>
00033 #               define TIXML_LOG OutputDebugString
00034 #       else
00035 #               define TIXML_LOG printf
00036 #       endif
00037 #endif
00038 
00039 // Note tha "PutString" hardcodes the same list. This
00040 // is less flexible than it appears. Changing the entries
00041 // or order will break putstring.       
00042 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00043 {
00044         { "&amp;",  5, '&' },
00045         { "&lt;",   4, '<' },
00046         { "&gt;",   4, '>' },
00047         { "&quot;", 6, '\"' },
00048         { "&apos;", 6, '\'' }
00049 };
00050 
00051 // Bunch of unicode info at:
00052 //              http://www.unicode.org/faq/utf_bom.html
00053 // Including the basic of this table, which determines the #bytes in the
00054 // sequence from the lead byte. 1 placed for invalid sequences --
00055 // although the result will be junk, pass it through as much as possible.
00056 // Beware of the non-characters in UTF-8:       
00057 //                              ef bb bf (Microsoft "lead bytes")
00058 //                              ef bf be
00059 //                              ef bf bf 
00060 
00061 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00062 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00063 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00064 
00065 const int TiXmlBase::utf8ByteTable[256] = 
00066 {
00067         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00068                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00069                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00070                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00071                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00072                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00073                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00074                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00075                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00076                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00077                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90 
00078                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0 
00079                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0 
00080                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00081                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00082                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00083                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00084 };
00085 
00086 
00087 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00088 {
00089         const unsigned long BYTE_MASK = 0xBF;
00090         const unsigned long BYTE_MARK = 0x80;
00091         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00092 
00093         if (input < 0x80) 
00094                 *length = 1;
00095         else if ( input < 0x800 )
00096                 *length = 2;
00097         else if ( input < 0x10000 )
00098                 *length = 3;
00099         else if ( input < 0x200000 )
00100                 *length = 4;
00101         else
00102                 { *length = 0; return; }        // This code won't covert this correctly anyway.
00103 
00104         output += *length;
00105 
00106         // Scary scary fall throughs.
00107         switch (*length) 
00108         {
00109                 case 4:
00110                         --output; 
00111                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00112                         input >>= 6;
00113                 case 3:
00114                         --output; 
00115                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00116                         input >>= 6;
00117                 case 2:
00118                         --output; 
00119                         *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00120                         input >>= 6;
00121                 case 1:
00122                         --output; 
00123                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
00124         }
00125 }
00126 
00127 
00128 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00129 {
00130         // This will only work for low-ascii, everything else is assumed to be a valid
00131         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00132         // to figure out alhabetical vs. not across encoding. So take a very 
00133         // conservative approach.
00134 
00135 //      if ( encoding == TIXML_ENCODING_UTF8 )
00136 //      {
00137                 if ( anyByte < 127 )
00138                         return isalpha( anyByte );
00139                 else
00140                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00141 //      }
00142 //      else
00143 //      {
00144 //              return isalpha( anyByte );
00145 //      }
00146 }
00147 
00148 
00149 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00150 {
00151         // This will only work for low-ascii, everything else is assumed to be a valid
00152         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00153         // to figure out alhabetical vs. not across encoding. So take a very 
00154         // conservative approach.
00155 
00156 //      if ( encoding == TIXML_ENCODING_UTF8 )
00157 //      {
00158                 if ( anyByte < 127 )
00159                         return isalnum( anyByte );
00160                 else
00161                         return 1;       // What else to do? The unicode set is huge...get the english ones right.
00162 //      }
00163 //      else
00164 //      {
00165 //              return isalnum( anyByte );
00166 //      }
00167 }
00168 
00169 
00170 class TiXmlParsingData
00171 {
00172         friend class TiXmlDocument;
00173   public:
00174         void Stamp( const char* now, TiXmlEncoding encoding );
00175 
00176         const TiXmlCursor& Cursor()     { return cursor; }
00177 
00178   private:
00179         // Only used by the document!
00180         TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00181         {
00182                 assert( start );
00183                 stamp = start;
00184                 tabsize = _tabsize;
00185                 cursor.row = row;
00186                 cursor.col = col;
00187         }
00188 
00189         TiXmlCursor             cursor;
00190         const char*             stamp;
00191         int                             tabsize;
00192 };
00193 
00194 
00195 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00196 {
00197         assert( now );
00198 
00199         // Do nothing if the tabsize is 0.
00200         if ( tabsize < 1 )
00201         {
00202                 return;
00203         }
00204 
00205         // Get the current row, column.
00206         int row = cursor.row;
00207         int col = cursor.col;
00208         const char* p = stamp;
00209         assert( p );
00210 
00211         while ( p < now )
00212         {
00213                 // Treat p as unsigned, so we have a happy compiler.
00214                 const unsigned char* pU = (const unsigned char*)p;
00215 
00216                 // Code contributed by Fletcher Dunn: (modified by lee)
00217                 switch (*pU) {
00218                         case 0:
00219                                 // We *should* never get here, but in case we do, don't
00220                                 // advance past the terminating null character, ever
00221                                 return;
00222 
00223                         case '\r':
00224                                 // bump down to the next line
00225                                 ++row;
00226                                 col = 0;                                
00227                                 // Eat the character
00228                                 ++p;
00229 
00230                                 // Check for \r\n sequence, and treat this as a single character
00231                                 if (*p == '\n') {
00232                                         ++p;
00233                                 }
00234                                 break;
00235 
00236                         case '\n':
00237                                 // bump down to the next line
00238                                 ++row;
00239                                 col = 0;
00240 
00241                                 // Eat the character
00242                                 ++p;
00243 
00244                                 // Check for \n\r sequence, and treat this as a single
00245                                 // character.  (Yes, this bizarre thing does occur still
00246                                 // on some arcane platforms...)
00247                                 if (*p == '\r') {
00248                                         ++p;
00249                                 }
00250                                 break;
00251 
00252                         case '\t':
00253                                 // Eat the character
00254                                 ++p;
00255 
00256                                 // Skip to next tab stop
00257                                 col = (col / tabsize + 1) * tabsize;
00258                                 break;
00259 
00260                         case TIXML_UTF_LEAD_0:
00261                                 if ( encoding == TIXML_ENCODING_UTF8 )
00262                                 {
00263                                         if ( *(p+1) && *(p+2) )
00264                                         {
00265                                                 // In these cases, don't advance the column. These are
00266                                                 // 0-width spaces.
00267                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00268                                                         p += 3; 
00269                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00270                                                         p += 3; 
00271                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00272                                                         p += 3; 
00273                                                 else
00274                                                         { p +=3; ++col; }       // A normal character.
00275                                         }
00276                                 }
00277                                 else
00278                                 {
00279                                         ++p;
00280                                         ++col;
00281                                 }
00282                                 break;
00283 
00284                         default:
00285                                 if ( encoding == TIXML_ENCODING_UTF8 )
00286                                 {
00287                                         // Eat the 1 to 4 byte utf8 character.
00288                                         int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
00289                                         if ( step == 0 )
00290                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
00291                                         p += step;
00292 
00293                                         // Just advance one column, of course.
00294                                         ++col;
00295                                 }
00296                                 else
00297                                 {
00298                                         ++p;
00299                                         ++col;
00300                                 }
00301                                 break;
00302                 }
00303         }
00304         cursor.row = row;
00305         cursor.col = col;
00306         assert( cursor.row >= -1 );
00307         assert( cursor.col >= -1 );
00308         stamp = p;
00309         assert( stamp );
00310 }
00311 
00312 
00313 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00314 {
00315         if ( !p || !*p )
00316         {
00317                 return 0;
00318         }
00319         if ( encoding == TIXML_ENCODING_UTF8 )
00320         {
00321                 while ( *p )
00322                 {
00323                         const unsigned char* pU = (const unsigned char*)p;
00324                         
00325                         // Skip the stupid Microsoft UTF-8 Byte order marks
00326                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00327                                  && *(pU+1)==TIXML_UTF_LEAD_1 
00328                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00329                         {
00330                                 p += 3;
00331                                 continue;
00332                         }
00333                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00334                                  && *(pU+1)==0xbfU
00335                                  && *(pU+2)==0xbeU )
00336                         {
00337                                 p += 3;
00338                                 continue;
00339                         }
00340                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00341                                  && *(pU+1)==0xbfU
00342                                  && *(pU+2)==0xbfU )
00343                         {
00344                                 p += 3;
00345                                 continue;
00346                         }
00347 
00348                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00349                                 ++p;
00350                         else
00351                                 break;
00352                 }
00353         }
00354         else
00355         {
00356                 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00357                         ++p;
00358         }
00359 
00360         return p;
00361 }
00362 
00363 #ifdef TIXML_USE_STL
00364 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
00365 {
00366         for( ;; )
00367         {
00368                 if ( !in->good() ) return false;
00369 
00370                 int c = in->peek();
00371                 // At this scope, we can't get to a document. So fail silently.
00372                 if ( !IsWhiteSpace( c ) || c <= 0 )
00373                         return true;
00374 
00375                 *tag += (char) in->get();
00376         }
00377 }
00378 
00379 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
00380 {
00381         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00382         while ( in->good() )
00383         {
00384                 int c = in->peek();
00385                 if ( c == character )
00386                         return true;
00387                 if ( c <= 0 )           // Silent failure: can't get document at this scope
00388                         return false;
00389 
00390                 in->get();
00391                 *tag += (char) c;
00392         }
00393         return false;
00394 }
00395 #endif
00396 
00397 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00398 {
00399         *name = "";
00400         assert( p );
00401 
00402         // Names start with letters or underscores.
00403         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00404         // algorithm is generous.
00405         //
00406         // After that, they can be letters, underscores, numbers,
00407         // hyphens, or colons. (Colons are valid ony for namespaces,
00408         // but tinyxml can't tell namespaces from names.)
00409         if (    p && *p 
00410                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00411         {
00412                 while(          p && *p
00413                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding ) 
00414                                                  || *p == '_'
00415                                                  || *p == '-'
00416                                                  || *p == '.'
00417                                                  || *p == ':' ) )
00418                 {
00419                         (*name) += *p;
00420                         ++p;
00421                 }
00422                 return p;
00423         }
00424         return 0;
00425 }
00426 
00427 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00428 {
00429         // Presume an entity, and pull it out.
00430     TIXML_STRING ent;
00431         int i;
00432         *length = 0;
00433 
00434         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00435         {
00436                 unsigned long ucs = 0;
00437                 ptrdiff_t delta = 0;
00438                 unsigned mult = 1;
00439 
00440                 if ( *(p+2) == 'x' )
00441                 {
00442                         // Hexadecimal.
00443                         if ( !*(p+3) ) return 0;
00444 
00445                         const char* q = p+3;
00446                         q = strchr( q, ';' );
00447 
00448                         if ( !q || !*q ) return 0;
00449 
00450                         delta = q-p;
00451                         --q;
00452 
00453                         while ( *q != 'x' )
00454                         {
00455                                 if ( *q >= '0' && *q <= '9' )
00456                                         ucs += mult * (*q - '0');
00457                                 else if ( *q >= 'a' && *q <= 'f' )
00458                                         ucs += mult * (*q - 'a' + 10);
00459                                 else if ( *q >= 'A' && *q <= 'F' )
00460                                         ucs += mult * (*q - 'A' + 10 );
00461                                 else 
00462                                         return 0;
00463                                 mult *= 16;
00464                                 --q;
00465                         }
00466                 }
00467                 else
00468                 {
00469                         // Decimal.
00470                         if ( !*(p+2) ) return 0;
00471 
00472                         const char* q = p+2;
00473                         q = strchr( q, ';' );
00474 
00475                         if ( !q || !*q ) return 0;
00476 
00477                         delta = q-p;
00478                         --q;
00479 
00480                         while ( *q != '#' )
00481                         {
00482                                 if ( *q >= '0' && *q <= '9' )
00483                                         ucs += mult * (*q - '0');
00484                                 else 
00485                                         return 0;
00486                                 mult *= 10;
00487                                 --q;
00488                         }
00489                 }
00490                 if ( encoding == TIXML_ENCODING_UTF8 )
00491                 {
00492                         // convert the UCS to UTF-8
00493                         ConvertUTF32ToUTF8( ucs, value, length );
00494                 }
00495                 else
00496                 {
00497                         *value = (char)ucs;
00498                         *length = 1;
00499                 }
00500                 return p + delta + 1;
00501         }
00502 
00503         // Now try to match it.
00504         for( i=0; i<NUM_ENTITY; ++i )
00505         {
00506                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00507                 {
00508                         assert( strlen( entity[i].str ) == entity[i].strLength );
00509                         *value = entity[i].chr;
00510                         *length = 1;
00511                         return ( p + entity[i].strLength );
00512                 }
00513         }
00514 
00515         // So it wasn't an entity, its unrecognized, or something like that.
00516         *value = *p;    // Don't put back the last one, since we return it!
00517         return p+1;
00518 }
00519 
00520 
00521 bool TiXmlBase::StringEqual( const char* p,
00522                                                          const char* tag,
00523                                                          bool ignoreCase,
00524                                                          TiXmlEncoding encoding )
00525 {
00526         assert( p );
00527         assert( tag );
00528         if ( !p || !*p )
00529         {
00530                 assert( 0 );
00531                 return false;
00532         }
00533 
00534         const char* q = p;
00535 
00536         if ( ignoreCase )
00537         {
00538                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00539                 {
00540                         ++q;
00541                         ++tag;
00542                 }
00543 
00544                 if ( *tag == 0 )
00545                         return true;
00546         }
00547         else
00548         {
00549                 while ( *q && *tag && *q == *tag )
00550                 {
00551                         ++q;
00552                         ++tag;
00553                 }
00554 
00555                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00556                         return true;
00557         }
00558         return false;
00559 }
00560 
00561 const char* TiXmlBase::ReadText(        const char* p, 
00562                                                                         TIXML_STRING * text, 
00563                                                                         bool trimWhiteSpace, 
00564                                                                         const char* endTag, 
00565                                                                         bool caseInsensitive,
00566                                                                         TiXmlEncoding encoding )
00567 {
00568     *text = "";
00569         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00570                  || !condenseWhiteSpace )       // if true, whitespace is always kept
00571         {
00572                 // Keep all the white space.
00573                 while (    p && *p
00574                                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00575                           )
00576                 {
00577                         int len;
00578                         char cArr[4] = { 0, 0, 0, 0 };
00579                         p = GetChar( p, cArr, &len, encoding );
00580                         text->append( cArr, len );
00581                 }
00582         }
00583         else
00584         {
00585                 bool whitespace = false;
00586 
00587                 // Remove leading white space:
00588                 p = SkipWhiteSpace( p, encoding );
00589                 while (    p && *p
00590                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00591                 {
00592                         if ( *p == '\r' || *p == '\n' )
00593                         {
00594                                 whitespace = true;
00595                                 ++p;
00596                         }
00597                         else if ( IsWhiteSpace( *p ) )
00598                         {
00599                                 whitespace = true;
00600                                 ++p;
00601                         }
00602                         else
00603                         {
00604                                 // If we've found whitespace, add it before the
00605                                 // new character. Any whitespace just becomes a space.
00606                                 if ( whitespace )
00607                                 {
00608                                         (*text) += ' ';
00609                                         whitespace = false;
00610                                 }
00611                                 int len;
00612                                 char cArr[4] = { 0, 0, 0, 0 };
00613                                 p = GetChar( p, cArr, &len, encoding );
00614                                 if ( len == 1 )
00615                                         (*text) += cArr[0];     // more efficient
00616                                 else
00617                                         text->append( cArr, len );
00618                         }
00619                 }
00620         }
00621         return p + strlen( endTag );
00622 }
00623 
00624 #ifdef TIXML_USE_STL
00625 
00626 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
00627 {
00628         // The basic issue with a document is that we don't know what we're
00629         // streaming. Read something presumed to be a tag (and hope), then
00630         // identify it, and call the appropriate stream method on the tag.
00631         //
00632         // This "pre-streaming" will never read the closing ">" so the
00633         // sub-tag can orient itself.
00634 
00635         if ( !StreamTo( in, '<', tag ) ) 
00636         {
00637                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00638                 return;
00639         }
00640 
00641         while ( in->good() )
00642         {
00643                 int tagIndex = (int) tag->length();
00644                 while ( in->good() && in->peek() != '>' )
00645                 {
00646                         int c = in->get();
00647                         if ( c <= 0 )
00648                         {
00649                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00650                                 break;
00651                         }
00652                         (*tag) += (char) c;
00653                 }
00654 
00655                 if ( in->good() )
00656                 {
00657                         // We now have something we presume to be a node of 
00658                         // some sort. Identify it, and call the node to
00659                         // continue streaming.
00660                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00661 
00662                         if ( node )
00663                         {
00664                                 node->StreamIn( in, tag );
00665                                 bool isElement = node->ToElement() != 0;
00666                                 delete node;
00667                                 node = 0;
00668 
00669                                 // If this is the root element, we're done. Parsing will be
00670                                 // done by the >> operator.
00671                                 if ( isElement )
00672                                 {
00673                                         return;
00674                                 }
00675                         }
00676                         else
00677                         {
00678                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00679                                 return;
00680                         }
00681                 }
00682         }
00683         // We should have returned sooner.
00684         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00685 }
00686 
00687 #endif
00688 
00689 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00690 {
00691         ClearError();
00692 
00693         // Parse away, at the document level. Since a document
00694         // contains nothing but other tags, most of what happens
00695         // here is skipping white space.
00696         if ( !p || !*p )
00697         {
00698                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00699                 return 0;
00700         }
00701 
00702         // Note that, for a document, this needs to come
00703         // before the while space skip, so that parsing
00704         // starts from the pointer we are given.
00705         location.Clear();
00706         if ( prevData )
00707         {
00708                 location.row = prevData->cursor.row;
00709                 location.col = prevData->cursor.col;
00710         }
00711         else
00712         {
00713                 location.row = 0;
00714                 location.col = 0;
00715         }
00716         TiXmlParsingData data( p, TabSize(), location.row, location.col );
00717         location = data.Cursor();
00718 
00719         if ( encoding == TIXML_ENCODING_UNKNOWN )
00720         {
00721                 // Check for the Microsoft UTF-8 lead bytes.
00722                 const unsigned char* pU = (const unsigned char*)p;
00723                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00724                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00725                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00726                 {
00727                         encoding = TIXML_ENCODING_UTF8;
00728                         useMicrosoftBOM = true;
00729                 }
00730         }
00731 
00732     p = SkipWhiteSpace( p, encoding );
00733         if ( !p )
00734         {
00735                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00736                 return 0;
00737         }
00738 
00739         while ( p && *p )
00740         {
00741                 TiXmlNode* node = Identify( p, encoding );
00742                 if ( node )
00743                 {
00744                         p = node->Parse( p, &data, encoding );
00745                         LinkEndChild( node );
00746                 }
00747                 else
00748                 {
00749                         break;
00750                 }
00751 
00752                 // Did we get encoding info?
00753                 if (    encoding == TIXML_ENCODING_UNKNOWN
00754                          && node->ToDeclaration() )
00755                 {
00756                         TiXmlDeclaration* dec = node->ToDeclaration();
00757                         const char* enc = dec->Encoding();
00758                         assert( enc );
00759 
00760                         if ( *enc == 0 )
00761                                 encoding = TIXML_ENCODING_UTF8;
00762                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00763                                 encoding = TIXML_ENCODING_UTF8;
00764                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00765                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00766                         else 
00767                                 encoding = TIXML_ENCODING_LEGACY;
00768                 }
00769 
00770                 p = SkipWhiteSpace( p, encoding );
00771         }
00772 
00773         // Was this empty?
00774         if ( !firstChild ) {
00775                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00776                 return 0;
00777         }
00778 
00779         // All is well.
00780         return p;
00781 }
00782 
00783 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00784 {       
00785         // The first error in a chain is more accurate - don't set again!
00786         if ( error )
00787                 return;
00788 
00789         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00790         error   = true;
00791         errorId = err;
00792         errorDesc = errorString[ errorId ];
00793 
00794         errorLocation.Clear();
00795         if ( pError && data )
00796         {
00797                 data->Stamp( pError, encoding );
00798                 errorLocation = data->Cursor();
00799         }
00800 }
00801 
00802 
00803 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00804 {
00805         TiXmlNode* returnNode = 0;
00806 
00807         p = SkipWhiteSpace( p, encoding );
00808         if( !p || !*p || *p != '<' )
00809         {
00810                 return 0;
00811         }
00812 
00813         TiXmlDocument* doc = GetDocument();
00814         p = SkipWhiteSpace( p, encoding );
00815 
00816         if ( !p || !*p )
00817         {
00818                 return 0;
00819         }
00820 
00821         // What is this thing? 
00822         // - Elements start with a letter or underscore, but xml is reserved.
00823         // - Comments: <!--
00824         // - Decleration: <?xml
00825         // - Everthing else is unknown to tinyxml.
00826         //
00827 
00828         const char* xmlHeader = { "<?xml" };
00829         const char* commentHeader = { "<!--" };
00830         const char* dtdHeader = { "<!" };
00831         const char* cdataHeader = { "<![CDATA[" };
00832 
00833         if ( StringEqual( p, xmlHeader, true, encoding ) )
00834         {
00835                 #ifdef DEBUG_PARSER
00836                         TIXML_LOG( "XML parsing Declaration\n" );
00837                 #endif
00838                 returnNode = new TiXmlDeclaration();
00839         }
00840         else if ( StringEqual( p, commentHeader, false, encoding ) )
00841         {
00842                 #ifdef DEBUG_PARSER
00843                         TIXML_LOG( "XML parsing Comment\n" );
00844                 #endif
00845                 returnNode = new TiXmlComment();
00846         }
00847         else if ( StringEqual( p, cdataHeader, false, encoding ) )
00848         {
00849                 #ifdef DEBUG_PARSER
00850                         TIXML_LOG( "XML parsing CDATA\n" );
00851                 #endif
00852                 TiXmlText* text = new TiXmlText( "" );
00853                 text->SetCDATA( true );
00854                 returnNode = text;
00855         }
00856         else if ( StringEqual( p, dtdHeader, false, encoding ) )
00857         {
00858                 #ifdef DEBUG_PARSER
00859                         TIXML_LOG( "XML parsing Unknown(1)\n" );
00860                 #endif
00861                 returnNode = new TiXmlUnknown();
00862         }
00863         else if (    IsAlpha( *(p+1), encoding )
00864                           || *(p+1) == '_' )
00865         {
00866                 #ifdef DEBUG_PARSER
00867                         TIXML_LOG( "XML parsing Element\n" );
00868                 #endif
00869                 returnNode = new TiXmlElement( "" );
00870         }
00871         else
00872         {
00873                 #ifdef DEBUG_PARSER
00874                         TIXML_LOG( "XML parsing Unknown(2)\n" );
00875                 #endif
00876                 returnNode = new TiXmlUnknown();
00877         }
00878 
00879         if ( returnNode )
00880         {
00881                 // Set the parent, so it can report errors
00882                 returnNode->parent = this;
00883         }
00884         else
00885         {
00886                 if ( doc )
00887                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00888         }
00889         return returnNode;
00890 }
00891 
00892 #ifdef TIXML_USE_STL
00893 
00894 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
00895 {
00896         // We're called with some amount of pre-parsing. That is, some of "this"
00897         // element is in "tag". Go ahead and stream to the closing ">"
00898         while( in->good() )
00899         {
00900                 int c = in->get();
00901                 if ( c <= 0 )
00902                 {
00903                         TiXmlDocument* document = GetDocument();
00904                         if ( document )
00905                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00906                         return;
00907                 }
00908                 (*tag) += (char) c ;
00909                 
00910                 if ( c == '>' )
00911                         break;
00912         }
00913 
00914         if ( tag->length() < 3 ) return;
00915 
00916         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00917         // If not, identify and stream.
00918 
00919         if (    tag->at( tag->length() - 1 ) == '>' 
00920                  && tag->at( tag->length() - 2 ) == '/' )
00921         {
00922                 // All good!
00923                 return;
00924         }
00925         else if ( tag->at( tag->length() - 1 ) == '>' )
00926         {
00927                 // There is more. Could be:
00928                 //              text
00929                 //              closing tag
00930                 //              another node.
00931                 for ( ;; )
00932                 {
00933                         StreamWhiteSpace( in, tag );
00934 
00935                         // Do we have text?
00936                         if ( in->good() && in->peek() != '<' ) 
00937                         {
00938                                 // Yep, text.
00939                                 TiXmlText text( "" );
00940                                 text.StreamIn( in, tag );
00941 
00942                                 // What follows text is a closing tag or another node.
00943                                 // Go around again and figure it out.
00944                                 continue;
00945                         }
00946 
00947                         // We now have either a closing tag...or another node.
00948                         // We should be at a "<", regardless.
00949                         if ( !in->good() ) return;
00950                         assert( in->peek() == '<' );
00951                         int tagIndex = (int) tag->length();
00952 
00953                         bool closingTag = false;
00954                         bool firstCharFound = false;
00955 
00956                         for( ;; )
00957                         {
00958                                 if ( !in->good() )
00959                                         return;
00960 
00961                                 int c = in->peek();
00962                                 if ( c <= 0 )
00963                                 {
00964                                         TiXmlDocument* document = GetDocument();
00965                                         if ( document )
00966                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00967                                         return;
00968                                 }
00969                                 
00970                                 if ( c == '>' )
00971                                         break;
00972 
00973                                 *tag += (char) c;
00974                                 in->get();
00975 
00976                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00977                                 {
00978                                         firstCharFound = true;
00979                                         if ( c == '/' )
00980                                                 closingTag = true;
00981                                 }
00982                         }
00983                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.
00984                         // If it was not, the streaming will be done by the tag.
00985                         if ( closingTag )
00986                         {
00987                                 if ( !in->good() )
00988                                         return;
00989 
00990                                 int c = in->get();
00991                                 if ( c <= 0 )
00992                                 {
00993                                         TiXmlDocument* document = GetDocument();
00994                                         if ( document )
00995                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00996                                         return;
00997                                 }
00998                                 assert( c == '>' );
00999                                 *tag += (char) c;
01000 
01001                                 // We are done, once we've found our closing tag.
01002                                 return;
01003                         }
01004                         else
01005                         {
01006                                 // If not a closing tag, id it, and stream.
01007                                 const char* tagloc = tag->c_str() + tagIndex;
01008                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01009                                 if ( !node )
01010                                         return;
01011                                 node->StreamIn( in, tag );
01012                                 delete node;
01013                                 node = 0;
01014 
01015                                 // No return: go around from the beginning: text, closing tag, or node.
01016                         }
01017                 }
01018         }
01019 }
01020 #endif
01021 
01022 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01023 {
01024         p = SkipWhiteSpace( p, encoding );
01025         TiXmlDocument* document = GetDocument();
01026 
01027         if ( !p || !*p )
01028         {
01029                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01030                 return 0;
01031         }
01032 
01033         if ( data )
01034         {
01035                 data->Stamp( p, encoding );
01036                 location = data->Cursor();
01037         }
01038 
01039         if ( *p != '<' )
01040         {
01041                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01042                 return 0;
01043         }
01044 
01045         p = SkipWhiteSpace( p+1, encoding );
01046 
01047         // Read the name.
01048         const char* pErr = p;
01049 
01050     p = ReadName( p, &value, encoding );
01051         if ( !p || !*p )
01052         {
01053                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01054                 return 0;
01055         }
01056 
01057     TIXML_STRING endTag ("</");
01058         endTag += value;
01059         endTag += ">";
01060 
01061         // Check for and read attributes. Also look for an empty
01062         // tag or an end tag.
01063         while ( p && *p )
01064         {
01065                 pErr = p;
01066                 p = SkipWhiteSpace( p, encoding );
01067                 if ( !p || !*p )
01068                 {
01069                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01070                         return 0;
01071                 }
01072                 if ( *p == '/' )
01073                 {
01074                         ++p;
01075                         // Empty tag.
01076                         if ( *p  != '>' )
01077                         {
01078                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );             
01079                                 return 0;
01080                         }
01081                         return (p+1);
01082                 }
01083                 else if ( *p == '>' )
01084                 {
01085                         // Done with attributes (if there were any.)
01086                         // Read the value -- which can include other
01087                         // elements -- read the end tag, and return.
01088                         ++p;
01089                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01090                         if ( !p || !*p )
01091                                 return 0;
01092 
01093                         // We should find the end tag now
01094                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01095                         {
01096                                 p += endTag.length();
01097                                 return p;
01098                         }
01099                         else
01100                         {
01101                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01102                                 return 0;
01103                         }
01104                 }
01105                 else
01106                 {
01107                         // Try to read an attribute:
01108                         TiXmlAttribute* attrib = new TiXmlAttribute();
01109                         if ( !attrib )
01110                         {
01111                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01112                                 return 0;
01113                         }
01114 
01115                         attrib->SetDocument( document );
01116                         const char* pErr = p;
01117                         p = attrib->Parse( p, data, encoding );
01118 
01119                         if ( !p || !*p )
01120                         {
01121                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01122                                 delete attrib;
01123                                 return 0;
01124                         }
01125 
01126                         // Handle the strange case of double attributes:
01127                         TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01128                         if ( node )
01129                         {
01130                                 node->SetValue( attrib->Value() );
01131                                 delete attrib;
01132                                 return 0;
01133                         }
01134 
01135                         attributeSet.Add( attrib );
01136                 }
01137         }
01138         return p;
01139 }
01140 
01141 
01142 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01143 {
01144         TiXmlDocument* document = GetDocument();
01145 
01146         // Read in text and elements in any order.
01147         const char* pWithWhiteSpace = p;
01148         p = SkipWhiteSpace( p, encoding );
01149 
01150         while ( p && *p )
01151         {
01152                 if ( *p != '<' )
01153                 {
01154                         // Take what we have, make a text element.
01155                         TiXmlText* textNode = new TiXmlText( "" );
01156 
01157                         if ( !textNode )
01158                         {
01159                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01160                                     return 0;
01161                         }
01162 
01163                         if ( TiXmlBase::IsWhiteSpaceCondensed() )
01164                         {
01165                                 p = textNode->Parse( p, data, encoding );
01166                         }
01167                         else
01168                         {
01169                                 // Special case: we want to keep the white space
01170                                 // so that leading spaces aren't removed.
01171                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01172                         }
01173 
01174                         if ( !textNode->Blank() )
01175                                 LinkEndChild( textNode );
01176                         else
01177                                 delete textNode;
01178                 } 
01179                 else 
01180                 {
01181                         // We hit a '<'
01182                         // Have we hit a new element or an end tag? This could also be
01183                         // a TiXmlText in the "CDATA" style.
01184                         if ( StringEqual( p, "</", false, encoding ) )
01185                         {
01186                                 return p;
01187                         }
01188                         else
01189                         {
01190                                 TiXmlNode* node = Identify( p, encoding );
01191                                 if ( node )
01192                                 {
01193                                         p = node->Parse( p, data, encoding );
01194                                         LinkEndChild( node );
01195                                 }                               
01196                                 else
01197                                 {
01198                                         return 0;
01199                                 }
01200                         }
01201                 }
01202                 pWithWhiteSpace = p;
01203                 p = SkipWhiteSpace( p, encoding );
01204         }
01205 
01206         if ( !p )
01207         {
01208                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01209         }       
01210         return p;
01211 }
01212 
01213 
01214 #ifdef TIXML_USE_STL
01215 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01216 {
01217         while ( in->good() )
01218         {
01219                 int c = in->get();      
01220                 if ( c <= 0 )
01221                 {
01222                         TiXmlDocument* document = GetDocument();
01223                         if ( document )
01224                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01225                         return;
01226                 }
01227                 (*tag) += (char) c;
01228 
01229                 if ( c == '>' )
01230                 {
01231                         // All is well.
01232                         return;         
01233                 }
01234         }
01235 }
01236 #endif
01237 
01238 
01239 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01240 {
01241         TiXmlDocument* document = GetDocument();
01242         p = SkipWhiteSpace( p, encoding );
01243 
01244         if ( data )
01245         {
01246                 data->Stamp( p, encoding );
01247                 location = data->Cursor();
01248         }
01249         if ( !p || !*p || *p != '<' )
01250         {
01251                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01252                 return 0;
01253         }
01254         ++p;
01255     value = "";
01256 
01257         while ( p && *p && *p != '>' )
01258         {
01259                 value += *p;
01260                 ++p;
01261         }
01262 
01263         if ( !p )
01264         {
01265                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01266         }
01267         if ( *p == '>' )
01268                 return p+1;
01269         return p;
01270 }
01271 
01272 #ifdef TIXML_USE_STL
01273 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01274 {
01275         while ( in->good() )
01276         {
01277                 int c = in->get();      
01278                 if ( c <= 0 )
01279                 {
01280                         TiXmlDocument* document = GetDocument();
01281                         if ( document )
01282                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01283                         return;
01284                 }
01285 
01286                 (*tag) += (char) c;
01287 
01288                 if ( c == '>' 
01289                          && tag->at( tag->length() - 2 ) == '-'
01290                          && tag->at( tag->length() - 3 ) == '-' )
01291                 {
01292                         // All is well.
01293                         return;         
01294                 }
01295         }
01296 }
01297 #endif
01298 
01299 
01300 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01301 {
01302         TiXmlDocument* document = GetDocument();
01303         value = "";
01304 
01305         p = SkipWhiteSpace( p, encoding );
01306 
01307         if ( data )
01308         {
01309                 data->Stamp( p, encoding );
01310                 location = data->Cursor();
01311         }
01312         const char* startTag = "<!--";
01313         const char* endTag   = "-->";
01314 
01315         if ( !StringEqual( p, startTag, false, encoding ) )
01316         {
01317                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01318                 return 0;
01319         }
01320         p += strlen( startTag );
01321         p = ReadText( p, &value, false, endTag, false, encoding );
01322         return p;
01323 }
01324 
01325 
01326 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01327 {
01328         p = SkipWhiteSpace( p, encoding );
01329         if ( !p || !*p ) return 0;
01330 
01331         int tabsize = 4;
01332         if ( document )
01333                 tabsize = document->TabSize();
01334 
01335         if ( data )
01336         {
01337                 data->Stamp( p, encoding );
01338                 location = data->Cursor();
01339         }
01340         // Read the name, the '=' and the value.
01341         const char* pErr = p;
01342         p = ReadName( p, &name, encoding );
01343         if ( !p || !*p )
01344         {
01345                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01346                 return 0;
01347         }
01348         p = SkipWhiteSpace( p, encoding );
01349         if ( !p || !*p || *p != '=' )
01350         {
01351                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01352                 return 0;
01353         }
01354 
01355         ++p;    // skip '='
01356         p = SkipWhiteSpace( p, encoding );
01357         if ( !p || !*p )
01358         {
01359                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01360                 return 0;
01361         }
01362         
01363         const char* end;
01364 
01365         if ( *p == '\'' )
01366         {
01367                 ++p;
01368                 end = "\'";
01369                 p = ReadText( p, &value, false, end, false, encoding );
01370         }
01371         else if ( *p == '"' )
01372         {
01373                 ++p;
01374                 end = "\"";
01375                 p = ReadText( p, &value, false, end, false, encoding );
01376         }
01377         else
01378         {
01379                 // All attribute values should be in single or double quotes.
01380                 // But this is such a common error that the parser will try
01381                 // its best, even without them.
01382                 value = "";
01383                 while (    p && *p                                                                              // existence
01384                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01385                                 && *p != '/' && *p != '>' )                                             // tag end
01386                 {
01387                         value += *p;
01388                         ++p;
01389                 }
01390         }
01391         return p;
01392 }
01393 
01394 #ifdef TIXML_USE_STL
01395 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01396 {
01397         if ( cdata )
01398         {
01399                 int c = in->get();      
01400                 if ( c <= 0 )
01401                 {
01402                         TiXmlDocument* document = GetDocument();
01403                         if ( document )
01404                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01405                         return;
01406                 }
01407 
01408                 (*tag) += (char) c;
01409 
01410                 if ( c == '>' 
01411                          && tag->at( tag->length() - 2 ) == ']'
01412                          && tag->at( tag->length() - 3 ) == ']' )
01413                 {
01414                         // All is well.
01415                         return;         
01416                 }
01417         }
01418         else
01419         {
01420                 while ( in->good() )
01421                 {
01422                         int c = in->peek();     
01423                         if ( c == '<' )
01424                                 return;
01425                         if ( c <= 0 )
01426                         {
01427                                 TiXmlDocument* document = GetDocument();
01428                                 if ( document )
01429                                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01430                                 return;
01431                         }
01432 
01433                         (*tag) += (char) c;
01434                         in->get();
01435                 }
01436         }
01437 }
01438 #endif
01439 
01440 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01441 {
01442         value = "";
01443         TiXmlDocument* document = GetDocument();
01444 
01445         if ( data )
01446         {
01447                 data->Stamp( p, encoding );
01448                 location = data->Cursor();
01449         }
01450 
01451         const char* const startTag = "<![CDATA[";
01452         const char* const endTag   = "]]>";
01453 
01454         if ( cdata || StringEqual( p, startTag, false, encoding ) )
01455         {
01456                 cdata = true;
01457 
01458                 if ( !StringEqual( p, startTag, false, encoding ) )
01459                 {
01460                         document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01461                         return 0;
01462                 }
01463                 p += strlen( startTag );
01464 
01465                 // Keep all the white space, ignore the encoding, etc.
01466                 while (    p && *p
01467                                 && !StringEqual( p, endTag, false, encoding )
01468                           )
01469                 {
01470                         value += *p;
01471                         ++p;
01472                 }
01473 
01474                 TIXML_STRING dummy; 
01475                 p = ReadText( p, &dummy, false, endTag, false, encoding );
01476                 return p;
01477         }
01478         else
01479         {
01480                 bool ignoreWhite = true;
01481 
01482                 const char* end = "<";
01483                 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01484                 if ( p )
01485                         return p-1;     // don't truncate the '<'
01486                 return 0;
01487         }
01488 }
01489 
01490 #ifdef TIXML_USE_STL
01491 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01492 {
01493         while ( in->good() )
01494         {
01495                 int c = in->get();
01496                 if ( c <= 0 )
01497                 {
01498                         TiXmlDocument* document = GetDocument();
01499                         if ( document )
01500                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01501                         return;
01502                 }
01503                 (*tag) += (char) c;
01504 
01505                 if ( c == '>' )
01506                 {
01507                         // All is well.
01508                         return;
01509                 }
01510         }
01511 }
01512 #endif
01513 
01514 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01515 {
01516         p = SkipWhiteSpace( p, _encoding );
01517         // Find the beginning, find the end, and look for
01518         // the stuff in-between.
01519         TiXmlDocument* document = GetDocument();
01520         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01521         {
01522                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01523                 return 0;
01524         }
01525         if ( data )
01526         {
01527                 data->Stamp( p, _encoding );
01528                 location = data->Cursor();
01529         }
01530         p += 5;
01531 
01532         version = "";
01533         encoding = "";
01534         standalone = "";
01535 
01536         while ( p && *p )
01537         {
01538                 if ( *p == '>' )
01539                 {
01540                         ++p;
01541                         return p;
01542                 }
01543 
01544                 p = SkipWhiteSpace( p, _encoding );
01545                 if ( StringEqual( p, "version", true, _encoding ) )
01546                 {
01547                         TiXmlAttribute attrib;
01548                         p = attrib.Parse( p, data, _encoding );         
01549                         version = attrib.Value();
01550                 }
01551                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01552                 {
01553                         TiXmlAttribute attrib;
01554                         p = attrib.Parse( p, data, _encoding );         
01555                         encoding = attrib.Value();
01556                 }
01557                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01558                 {
01559                         TiXmlAttribute attrib;
01560                         p = attrib.Parse( p, data, _encoding );         
01561                         standalone = attrib.Value();
01562                 }
01563                 else
01564                 {
01565                         // Read over whatever it is.
01566                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01567                                 ++p;
01568                 }
01569         }
01570         return 0;
01571 }
01572 
01573 bool TiXmlText::Blank() const
01574 {
01575         for ( unsigned i=0; i<value.length(); i++ )
01576                 if ( !IsWhiteSpace( value[i] ) )
01577                         return false;
01578         return true;
01579 }
01580