parser.c revision dab4cb37d8fceb7457c40effc1ae43559221610b
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscelaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAx callbacks or as standalones functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * Daniel.Veillard@w3.org 31 * 32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue 33 * and xmlDoValidityCheckingDefaultValue for VMS 34 */ 35 36#ifdef WIN32 37#include "win32config.h" 38#define XML_DIR_SEP '\\' 39#else 40#include "config.h" 41#define XML_DIR_SEP '/' 42#endif 43 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <libxml/xmlmemory.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57 58#ifdef HAVE_CTYPE_H 59#include <ctype.h> 60#endif 61#ifdef HAVE_STDLIB_H 62#include <stdlib.h> 63#endif 64#ifdef HAVE_SYS_STAT_H 65#include <sys/stat.h> 66#endif 67#ifdef HAVE_FCNTL_H 68#include <fcntl.h> 69#endif 70#ifdef HAVE_UNISTD_H 71#include <unistd.h> 72#endif 73#ifdef HAVE_ZLIB_H 74#include <zlib.h> 75#endif 76 77 78#define XML_PARSER_BIG_BUFFER_SIZE 300 79#define XML_PARSER_BUFFER_SIZE 100 80 81/* 82 * Various global defaults for parsing 83 */ 84int xmlGetWarningsDefaultValue = 1; 85int xmlParserDebugEntities = 0; 86#ifdef VMS 87int xmlSubstituteEntitiesDefaultVal = 0; 88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal 89int xmlDoValidityCheckingDefaultVal = 0; 90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal 91#else 92int xmlSubstituteEntitiesDefaultValue = 0; 93int xmlDoValidityCheckingDefaultValue = 0; 94#endif 95int xmlLoadExtDtdDefaultValue = 0; 96int xmlPedanticParserDefaultValue = 0; 97int xmlKeepBlanksDefaultValue = 1; 98 99/* 100 * List of XML prefixed PI allowed by W3C specs 101 */ 102 103const char *xmlW3CPIs[] = { 104 "xml-stylesheet", 105 NULL 106}; 107 108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 111 const xmlChar **str); 112 113 114/************************************************************************ 115 * * 116 * Parser stacks related functions and macros * 117 * * 118 ************************************************************************/ 119 120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 121 const xmlChar ** str); 122 123/* 124 * Generic function for accessing stacks in the Parser Context 125 */ 126 127#define PUSH_AND_POP(scope, type, name) \ 128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 129 if (ctxt->name##Nr >= ctxt->name##Max) { \ 130 ctxt->name##Max *= 2; \ 131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 133 if (ctxt->name##Tab == NULL) { \ 134 xmlGenericError(xmlGenericErrorContext, \ 135 "realloc failed !\n"); \ 136 return(0); \ 137 } \ 138 } \ 139 ctxt->name##Tab[ctxt->name##Nr] = value; \ 140 ctxt->name = value; \ 141 return(ctxt->name##Nr++); \ 142} \ 143scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 144 type ret; \ 145 if (ctxt->name##Nr <= 0) return(0); \ 146 ctxt->name##Nr--; \ 147 if (ctxt->name##Nr > 0) \ 148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 149 else \ 150 ctxt->name = NULL; \ 151 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 152 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 153 return(ret); \ 154} \ 155 156/* 157 * Those macros actually generate the functions 158 */ 159PUSH_AND_POP(extern, xmlParserInputPtr, input) 160PUSH_AND_POP(extern, xmlNodePtr, node) 161PUSH_AND_POP(extern, xmlChar*, name) 162 163static int spacePush(xmlParserCtxtPtr ctxt, int val) { 164 if (ctxt->spaceNr >= ctxt->spaceMax) { 165 ctxt->spaceMax *= 2; 166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 168 if (ctxt->spaceTab == NULL) { 169 xmlGenericError(xmlGenericErrorContext, 170 "realloc failed !\n"); 171 return(0); 172 } 173 } 174 ctxt->spaceTab[ctxt->spaceNr] = val; 175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 176 return(ctxt->spaceNr++); 177} 178 179static int spacePop(xmlParserCtxtPtr ctxt) { 180 int ret; 181 if (ctxt->spaceNr <= 0) return(0); 182 ctxt->spaceNr--; 183 if (ctxt->spaceNr > 0) 184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 185 else 186 ctxt->space = NULL; 187 ret = ctxt->spaceTab[ctxt->spaceNr]; 188 ctxt->spaceTab[ctxt->spaceNr] = -1; 189 return(ret); 190} 191 192/* 193 * Macros for accessing the content. Those should be used only by the parser, 194 * and not exported. 195 * 196 * Dirty macros, i.e. one often need to make assumption on the context to 197 * use them 198 * 199 * CUR_PTR return the current pointer to the xmlChar to be parsed. 200 * To be used with extreme caution since operations consuming 201 * characters may move the input buffer to a different location ! 202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 203 * This should be used internally by the parser 204 * only to compare to ASCII values otherwise it would break when 205 * running with UTF-8 encoding. 206 * RAW same as CUR but in the input buffer, bypass any token 207 * extraction that may have been done 208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 209 * to compare on ASCII based substring. 210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 211 * strings within the parser. 212 * 213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 214 * 215 * NEXT Skip to the next character, this does the proper decoding 216 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 217 * NEXTL(l) Skip l xmlChars in the input buffer 218 * CUR_CHAR(l) returns the current unicode character (int), set l 219 * to the number of xmlChars used for the encoding [0-5]. 220 * CUR_SCHAR same but operate on a string instead of the context 221 * COPY_BUF copy the current unicode char to the target buffer, increment 222 * the index 223 * GROW, SHRINK handling of input buffers 224 */ 225 226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 228#define NXT(val) ctxt->input->cur[(val)] 229#define CUR_PTR ctxt->input->cur 230 231#define SKIP(val) do { \ 232 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 234 if ((*ctxt->input->cur == 0) && \ 235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 236 xmlPopInput(ctxt); \ 237 } while (0) 238 239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ 240 xmlParserInputShrink(ctxt->input); \ 241 if ((*ctxt->input->cur == 0) && \ 242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 243 xmlPopInput(ctxt); \ 244 } 245 246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ 247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 248 if ((*ctxt->input->cur == 0) && \ 249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 250 xmlPopInput(ctxt); \ 251 } 252 253#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 254 255#define NEXT xmlNextChar(ctxt) 256 257#define NEXT1 { \ 258 ctxt->input->cur++; \ 259 ctxt->nbChars++; \ 260 if (*ctxt->input->cur == 0) \ 261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 262 } 263 264#define NEXTL(l) do { \ 265 if (*(ctxt->input->cur) == '\n') { \ 266 ctxt->input->line++; ctxt->input->col = 1; \ 267 } else ctxt->input->col++; \ 268 ctxt->token = 0; ctxt->input->cur += l; \ 269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 270 } while (0) 271 272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 274 275#define COPY_BUF(l,b,i,v) \ 276 if (l == 1) b[i++] = (xmlChar) v; \ 277 else i += xmlCopyCharMultiByte(&b[i],v) 278 279/** 280 * xmlSkipBlankChars: 281 * @ctxt: the XML parser context 282 * 283 * skip all blanks character found at that point in the input streams. 284 * It pops up finished entities in the process if allowable at that point. 285 * 286 * Returns the number of space chars skipped 287 */ 288 289int 290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 291 int cur, res = 0; 292 293 /* 294 * It's Okay to use CUR/NEXT here since all the blanks are on 295 * the ASCII range. 296 */ 297 do { 298 cur = CUR; 299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 300 NEXT; 301 cur = CUR; 302 res++; 303 } 304 while ((cur == 0) && (ctxt->inputNr > 1) && 305 (ctxt->instate != XML_PARSER_COMMENT)) { 306 xmlPopInput(ctxt); 307 cur = CUR; 308 } 309 /* 310 * Need to handle support of entities branching here 311 */ 312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */ 314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 315 return(res); 316} 317 318/************************************************************************ 319 * * 320 * Commodity functions to handle entities * 321 * * 322 ************************************************************************/ 323 324/** 325 * xmlPopInput: 326 * @ctxt: an XML parser context 327 * 328 * xmlPopInput: the current input pointed by ctxt->input came to an end 329 * pop it and return the next char. 330 * 331 * Returns the current xmlChar in the parser context 332 */ 333xmlChar 334xmlPopInput(xmlParserCtxtPtr ctxt) { 335 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 336 if (xmlParserDebugEntities) 337 xmlGenericError(xmlGenericErrorContext, 338 "Popping input %d\n", ctxt->inputNr); 339 xmlFreeInputStream(inputPop(ctxt)); 340 if ((*ctxt->input->cur == 0) && 341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 342 return(xmlPopInput(ctxt)); 343 return(CUR); 344} 345 346/** 347 * xmlPushInput: 348 * @ctxt: an XML parser context 349 * @input: an XML parser input fragment (entity, XML fragment ...). 350 * 351 * xmlPushInput: switch to a new input stream which is stacked on top 352 * of the previous one(s). 353 */ 354void 355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 356 if (input == NULL) return; 357 358 if (xmlParserDebugEntities) { 359 if ((ctxt->input != NULL) && (ctxt->input->filename)) 360 xmlGenericError(xmlGenericErrorContext, 361 "%s(%d): ", ctxt->input->filename, 362 ctxt->input->line); 363 xmlGenericError(xmlGenericErrorContext, 364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 365 } 366 inputPush(ctxt, input); 367 GROW; 368} 369 370/** 371 * xmlParseCharRef: 372 * @ctxt: an XML parser context 373 * 374 * parse Reference declarations 375 * 376 * [66] CharRef ::= '&#' [0-9]+ ';' | 377 * '&#x' [0-9a-fA-F]+ ';' 378 * 379 * [ WFC: Legal Character ] 380 * Characters referred to using character references must match the 381 * production for Char. 382 * 383 * Returns the value parsed (as an int), 0 in case of error 384 */ 385int 386xmlParseCharRef(xmlParserCtxtPtr ctxt) { 387 unsigned int val = 0; 388 int count = 0; 389 390 if (ctxt->token != 0) { 391 val = ctxt->token; 392 ctxt->token = 0; 393 return(val); 394 } 395 /* 396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 397 */ 398 if ((RAW == '&') && (NXT(1) == '#') && 399 (NXT(2) == 'x')) { 400 SKIP(3); 401 GROW; 402 while (RAW != ';') { /* loop blocked by count */ 403 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 404 val = val * 16 + (CUR - '0'); 405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 406 val = val * 16 + (CUR - 'a') + 10; 407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 408 val = val * 16 + (CUR - 'A') + 10; 409 else { 410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 412 ctxt->sax->error(ctxt->userData, 413 "xmlParseCharRef: invalid hexadecimal value\n"); 414 ctxt->wellFormed = 0; 415 ctxt->disableSAX = 1; 416 val = 0; 417 break; 418 } 419 NEXT; 420 count++; 421 } 422 if (RAW == ';') { 423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 424 ctxt->nbChars ++; 425 ctxt->input->cur++; 426 } 427 } else if ((RAW == '&') && (NXT(1) == '#')) { 428 SKIP(2); 429 GROW; 430 while (RAW != ';') { /* loop blocked by count */ 431 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 432 val = val * 10 + (CUR - '0'); 433 else { 434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 436 ctxt->sax->error(ctxt->userData, 437 "xmlParseCharRef: invalid decimal value\n"); 438 ctxt->wellFormed = 0; 439 ctxt->disableSAX = 1; 440 val = 0; 441 break; 442 } 443 NEXT; 444 count++; 445 } 446 if (RAW == ';') { 447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 448 ctxt->nbChars ++; 449 ctxt->input->cur++; 450 } 451 } else { 452 ctxt->errNo = XML_ERR_INVALID_CHARREF; 453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 454 ctxt->sax->error(ctxt->userData, 455 "xmlParseCharRef: invalid value\n"); 456 ctxt->wellFormed = 0; 457 ctxt->disableSAX = 1; 458 } 459 460 /* 461 * [ WFC: Legal Character ] 462 * Characters referred to using character references must match the 463 * production for Char. 464 */ 465 if (IS_CHAR(val)) { 466 return(val); 467 } else { 468 ctxt->errNo = XML_ERR_INVALID_CHAR; 469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 471 val); 472 ctxt->wellFormed = 0; 473 ctxt->disableSAX = 1; 474 } 475 return(0); 476} 477 478/** 479 * xmlParseStringCharRef: 480 * @ctxt: an XML parser context 481 * @str: a pointer to an index in the string 482 * 483 * parse Reference declarations, variant parsing from a string rather 484 * than an an input flow. 485 * 486 * [66] CharRef ::= '&#' [0-9]+ ';' | 487 * '&#x' [0-9a-fA-F]+ ';' 488 * 489 * [ WFC: Legal Character ] 490 * Characters referred to using character references must match the 491 * production for Char. 492 * 493 * Returns the value parsed (as an int), 0 in case of error, str will be 494 * updated to the current value of the index 495 */ 496static int 497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 498 const xmlChar *ptr; 499 xmlChar cur; 500 int val = 0; 501 502 if ((str == NULL) || (*str == NULL)) return(0); 503 ptr = *str; 504 cur = *ptr; 505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 506 ptr += 3; 507 cur = *ptr; 508 while (cur != ';') { /* Non input consuming loop */ 509 if ((cur >= '0') && (cur <= '9')) 510 val = val * 16 + (cur - '0'); 511 else if ((cur >= 'a') && (cur <= 'f')) 512 val = val * 16 + (cur - 'a') + 10; 513 else if ((cur >= 'A') && (cur <= 'F')) 514 val = val * 16 + (cur - 'A') + 10; 515 else { 516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 518 ctxt->sax->error(ctxt->userData, 519 "xmlParseStringCharRef: invalid hexadecimal value\n"); 520 ctxt->wellFormed = 0; 521 ctxt->disableSAX = 1; 522 val = 0; 523 break; 524 } 525 ptr++; 526 cur = *ptr; 527 } 528 if (cur == ';') 529 ptr++; 530 } else if ((cur == '&') && (ptr[1] == '#')){ 531 ptr += 2; 532 cur = *ptr; 533 while (cur != ';') { /* Non input consuming loops */ 534 if ((cur >= '0') && (cur <= '9')) 535 val = val * 10 + (cur - '0'); 536 else { 537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 539 ctxt->sax->error(ctxt->userData, 540 "xmlParseStringCharRef: invalid decimal value\n"); 541 ctxt->wellFormed = 0; 542 ctxt->disableSAX = 1; 543 val = 0; 544 break; 545 } 546 ptr++; 547 cur = *ptr; 548 } 549 if (cur == ';') 550 ptr++; 551 } else { 552 ctxt->errNo = XML_ERR_INVALID_CHARREF; 553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 554 ctxt->sax->error(ctxt->userData, 555 "xmlParseCharRef: invalid value\n"); 556 ctxt->wellFormed = 0; 557 ctxt->disableSAX = 1; 558 return(0); 559 } 560 *str = ptr; 561 562 /* 563 * [ WFC: Legal Character ] 564 * Characters referred to using character references must match the 565 * production for Char. 566 */ 567 if (IS_CHAR(val)) { 568 return(val); 569 } else { 570 ctxt->errNo = XML_ERR_INVALID_CHAR; 571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 572 ctxt->sax->error(ctxt->userData, 573 "CharRef: invalid xmlChar value %d\n", val); 574 ctxt->wellFormed = 0; 575 ctxt->disableSAX = 1; 576 } 577 return(0); 578} 579 580/** 581 * xmlParserHandlePEReference: 582 * @ctxt: the parser context 583 * 584 * [69] PEReference ::= '%' Name ';' 585 * 586 * [ WFC: No Recursion ] 587 * A parsed entity must not contain a recursive 588 * reference to itself, either directly or indirectly. 589 * 590 * [ WFC: Entity Declared ] 591 * In a document without any DTD, a document with only an internal DTD 592 * subset which contains no parameter entity references, or a document 593 * with "standalone='yes'", ... ... The declaration of a parameter 594 * entity must precede any reference to it... 595 * 596 * [ VC: Entity Declared ] 597 * In a document with an external subset or external parameter entities 598 * with "standalone='no'", ... ... The declaration of a parameter entity 599 * must precede any reference to it... 600 * 601 * [ WFC: In DTD ] 602 * Parameter-entity references may only appear in the DTD. 603 * NOTE: misleading but this is handled. 604 * 605 * A PEReference may have been detected in the current input stream 606 * the handling is done accordingly to 607 * http://www.w3.org/TR/REC-xml#entproc 608 * i.e. 609 * - Included in literal in entity values 610 * - Included as Paraemeter Entity reference within DTDs 611 */ 612void 613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 614 xmlChar *name; 615 xmlEntityPtr entity = NULL; 616 xmlParserInputPtr input; 617 618 if (ctxt->token != 0) { 619 return; 620 } 621 if (RAW != '%') return; 622 switch(ctxt->instate) { 623 case XML_PARSER_CDATA_SECTION: 624 return; 625 case XML_PARSER_COMMENT: 626 return; 627 case XML_PARSER_START_TAG: 628 return; 629 case XML_PARSER_END_TAG: 630 return; 631 case XML_PARSER_EOF: 632 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 635 ctxt->wellFormed = 0; 636 ctxt->disableSAX = 1; 637 return; 638 case XML_PARSER_PROLOG: 639 case XML_PARSER_START: 640 case XML_PARSER_MISC: 641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 644 ctxt->wellFormed = 0; 645 ctxt->disableSAX = 1; 646 return; 647 case XML_PARSER_ENTITY_DECL: 648 case XML_PARSER_CONTENT: 649 case XML_PARSER_ATTRIBUTE_VALUE: 650 case XML_PARSER_PI: 651 case XML_PARSER_SYSTEM_LITERAL: 652 /* we just ignore it there */ 653 return; 654 case XML_PARSER_EPILOG: 655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 658 ctxt->wellFormed = 0; 659 ctxt->disableSAX = 1; 660 return; 661 case XML_PARSER_ENTITY_VALUE: 662 /* 663 * NOTE: in the case of entity values, we don't do the 664 * substitution here since we need the literal 665 * entity value to be able to save the internal 666 * subset of the document. 667 * This will be handled by xmlStringDecodeEntities 668 */ 669 return; 670 case XML_PARSER_DTD: 671 /* 672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 673 * In the internal DTD subset, parameter-entity references 674 * can occur only where markup declarations can occur, not 675 * within markup declarations. 676 * In that case this is handled in xmlParseMarkupDecl 677 */ 678 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 679 return; 680 break; 681 case XML_PARSER_IGNORE: 682 return; 683 } 684 685 NEXT; 686 name = xmlParseName(ctxt); 687 if (xmlParserDebugEntities) 688 xmlGenericError(xmlGenericErrorContext, 689 "PE Reference: %s\n", name); 690 if (name == NULL) { 691 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 694 ctxt->wellFormed = 0; 695 ctxt->disableSAX = 1; 696 } else { 697 if (RAW == ';') { 698 NEXT; 699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 701 if (entity == NULL) { 702 703 /* 704 * [ WFC: Entity Declared ] 705 * In a document without any DTD, a document with only an 706 * internal DTD subset which contains no parameter entity 707 * references, or a document with "standalone='yes'", ... 708 * ... The declaration of a parameter entity must precede 709 * any reference to it... 710 */ 711 if ((ctxt->standalone == 1) || 712 ((ctxt->hasExternalSubset == 0) && 713 (ctxt->hasPErefs == 0))) { 714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 715 ctxt->sax->error(ctxt->userData, 716 "PEReference: %%%s; not found\n", name); 717 ctxt->wellFormed = 0; 718 ctxt->disableSAX = 1; 719 } else { 720 /* 721 * [ VC: Entity Declared ] 722 * In a document with an external subset or external 723 * parameter entities with "standalone='no'", ... 724 * ... The declaration of a parameter entity must precede 725 * any reference to it... 726 */ 727 if ((!ctxt->disableSAX) && 728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 729 ctxt->vctxt.error(ctxt->vctxt.userData, 730 "PEReference: %%%s; not found\n", name); 731 } else if ((!ctxt->disableSAX) && 732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 733 ctxt->sax->warning(ctxt->userData, 734 "PEReference: %%%s; not found\n", name); 735 ctxt->valid = 0; 736 } 737 } else { 738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 740 /* 741 * handle the extra spaces added before and after 742 * c.f. http://www.w3.org/TR/REC-xml#as-PE 743 * this is done independantly. 744 */ 745 input = xmlNewEntityInputStream(ctxt, entity); 746 xmlPushInput(ctxt, input); 747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 748 (RAW == '<') && (NXT(1) == '?') && 749 (NXT(2) == 'x') && (NXT(3) == 'm') && 750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 751 xmlParseTextDecl(ctxt); 752 } 753 if (ctxt->token == 0) 754 ctxt->token = ' '; 755 } else { 756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 757 ctxt->sax->error(ctxt->userData, 758 "xmlHandlePEReference: %s is not a parameter entity\n", 759 name); 760 ctxt->wellFormed = 0; 761 ctxt->disableSAX = 1; 762 } 763 } 764 } else { 765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 767 ctxt->sax->error(ctxt->userData, 768 "xmlHandlePEReference: expecting ';'\n"); 769 ctxt->wellFormed = 0; 770 ctxt->disableSAX = 1; 771 } 772 xmlFree(name); 773 } 774} 775 776/* 777 * Macro used to grow the current buffer. 778 */ 779#define growBuffer(buffer) { \ 780 buffer##_size *= 2; \ 781 buffer = (xmlChar *) \ 782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 783 if (buffer == NULL) { \ 784 perror("realloc failed"); \ 785 return(NULL); \ 786 } \ 787} 788 789/** 790 * xmlStringDecodeEntities: 791 * @ctxt: the parser context 792 * @str: the input string 793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 794 * @end: an end marker xmlChar, 0 if none 795 * @end2: an end marker xmlChar, 0 if none 796 * @end3: an end marker xmlChar, 0 if none 797 * 798 * Takes a entity string content and process to do the adequate subtitutions. 799 * 800 * [67] Reference ::= EntityRef | CharRef 801 * 802 * [69] PEReference ::= '%' Name ';' 803 * 804 * Returns A newly allocated string with the substitution done. The caller 805 * must deallocate it ! 806 */ 807xmlChar * 808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 809 xmlChar end, xmlChar end2, xmlChar end3) { 810 xmlChar *buffer = NULL; 811 int buffer_size = 0; 812 813 xmlChar *current = NULL; 814 xmlEntityPtr ent; 815 int c,l; 816 int nbchars = 0; 817 818 if (str == NULL) 819 return(NULL); 820 821 if (ctxt->depth > 40) { 822 ctxt->errNo = XML_ERR_ENTITY_LOOP; 823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 824 ctxt->sax->error(ctxt->userData, 825 "Detected entity reference loop\n"); 826 ctxt->wellFormed = 0; 827 ctxt->disableSAX = 1; 828 return(NULL); 829 } 830 831 /* 832 * allocate a translation buffer. 833 */ 834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 836 if (buffer == NULL) { 837 perror("xmlDecodeEntities: malloc failed"); 838 return(NULL); 839 } 840 841 /* 842 * Ok loop until we reach one of the ending char or a size limit. 843 * we are operating on already parsed values. 844 */ 845 c = CUR_SCHAR(str, l); 846 while ((c != 0) && (c != end) && /* non input consuming loop */ 847 (c != end2) && (c != end3)) { 848 849 if (c == 0) break; 850 if ((c == '&') && (str[1] == '#')) { 851 int val = xmlParseStringCharRef(ctxt, &str); 852 if (val != 0) { 853 COPY_BUF(0,buffer,nbchars,val); 854 } 855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 856 if (xmlParserDebugEntities) 857 xmlGenericError(xmlGenericErrorContext, 858 "String decoding Entity Reference: %.30s\n", 859 str); 860 ent = xmlParseStringEntityRef(ctxt, &str); 861 if ((ent != NULL) && 862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 863 if (ent->content != NULL) { 864 COPY_BUF(0,buffer,nbchars,ent->content[0]); 865 } else { 866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 867 ctxt->sax->error(ctxt->userData, 868 "internal error entity has no content\n"); 869 } 870 } else if ((ent != NULL) && (ent->content != NULL)) { 871 xmlChar *rep; 872 873 ctxt->depth++; 874 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 875 0, 0, 0); 876 ctxt->depth--; 877 if (rep != NULL) { 878 current = rep; 879 while (*current != 0) { /* non input consuming loop */ 880 buffer[nbchars++] = *current++; 881 if (nbchars > 882 buffer_size - XML_PARSER_BUFFER_SIZE) { 883 growBuffer(buffer); 884 } 885 } 886 xmlFree(rep); 887 } 888 } else if (ent != NULL) { 889 int i = xmlStrlen(ent->name); 890 const xmlChar *cur = ent->name; 891 892 buffer[nbchars++] = '&'; 893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 894 growBuffer(buffer); 895 } 896 for (;i > 0;i--) 897 buffer[nbchars++] = *cur++; 898 buffer[nbchars++] = ';'; 899 } 900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 901 if (xmlParserDebugEntities) 902 xmlGenericError(xmlGenericErrorContext, 903 "String decoding PE Reference: %.30s\n", str); 904 ent = xmlParseStringPEReference(ctxt, &str); 905 if (ent != NULL) { 906 xmlChar *rep; 907 908 ctxt->depth++; 909 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 910 0, 0, 0); 911 ctxt->depth--; 912 if (rep != NULL) { 913 current = rep; 914 while (*current != 0) { /* non input consuming loop */ 915 buffer[nbchars++] = *current++; 916 if (nbchars > 917 buffer_size - XML_PARSER_BUFFER_SIZE) { 918 growBuffer(buffer); 919 } 920 } 921 xmlFree(rep); 922 } 923 } 924 } else { 925 COPY_BUF(l,buffer,nbchars,c); 926 str += l; 927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 928 growBuffer(buffer); 929 } 930 } 931 c = CUR_SCHAR(str, l); 932 } 933 buffer[nbchars++] = 0; 934 return(buffer); 935} 936 937 938/************************************************************************ 939 * * 940 * Commodity functions to handle xmlChars * 941 * * 942 ************************************************************************/ 943 944/** 945 * xmlStrndup: 946 * @cur: the input xmlChar * 947 * @len: the len of @cur 948 * 949 * a strndup for array of xmlChar's 950 * 951 * Returns a new xmlChar * or NULL 952 */ 953xmlChar * 954xmlStrndup(const xmlChar *cur, int len) { 955 xmlChar *ret; 956 957 if ((cur == NULL) || (len < 0)) return(NULL); 958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 959 if (ret == NULL) { 960 xmlGenericError(xmlGenericErrorContext, 961 "malloc of %ld byte failed\n", 962 (len + 1) * (long)sizeof(xmlChar)); 963 return(NULL); 964 } 965 memcpy(ret, cur, len * sizeof(xmlChar)); 966 ret[len] = 0; 967 return(ret); 968} 969 970/** 971 * xmlStrdup: 972 * @cur: the input xmlChar * 973 * 974 * a strdup for array of xmlChar's. Since they are supposed to be 975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 976 * a termination mark of '0'. 977 * 978 * Returns a new xmlChar * or NULL 979 */ 980xmlChar * 981xmlStrdup(const xmlChar *cur) { 982 const xmlChar *p = cur; 983 984 if (cur == NULL) return(NULL); 985 while (*p != 0) p++; /* non input consuming */ 986 return(xmlStrndup(cur, p - cur)); 987} 988 989/** 990 * xmlCharStrndup: 991 * @cur: the input char * 992 * @len: the len of @cur 993 * 994 * a strndup for char's to xmlChar's 995 * 996 * Returns a new xmlChar * or NULL 997 */ 998 999xmlChar * 1000xmlCharStrndup(const char *cur, int len) { 1001 int i; 1002 xmlChar *ret; 1003 1004 if ((cur == NULL) || (len < 0)) return(NULL); 1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1006 if (ret == NULL) { 1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1008 (len + 1) * (long)sizeof(xmlChar)); 1009 return(NULL); 1010 } 1011 for (i = 0;i < len;i++) 1012 ret[i] = (xmlChar) cur[i]; 1013 ret[len] = 0; 1014 return(ret); 1015} 1016 1017/** 1018 * xmlCharStrdup: 1019 * @cur: the input char * 1020 * @len: the len of @cur 1021 * 1022 * a strdup for char's to xmlChar's 1023 * 1024 * Returns a new xmlChar * or NULL 1025 */ 1026 1027xmlChar * 1028xmlCharStrdup(const char *cur) { 1029 const char *p = cur; 1030 1031 if (cur == NULL) return(NULL); 1032 while (*p != '\0') p++; /* non input consuming */ 1033 return(xmlCharStrndup(cur, p - cur)); 1034} 1035 1036/** 1037 * xmlStrcmp: 1038 * @str1: the first xmlChar * 1039 * @str2: the second xmlChar * 1040 * 1041 * a strcmp for xmlChar's 1042 * 1043 * Returns the integer result of the comparison 1044 */ 1045 1046int 1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1048 register int tmp; 1049 1050 if (str1 == str2) return(0); 1051 if (str1 == NULL) return(-1); 1052 if (str2 == NULL) return(1); 1053 do { 1054 tmp = *str1++ - *str2; 1055 if (tmp != 0) return(tmp); 1056 } while (*str2++ != 0); 1057 return 0; 1058} 1059 1060/** 1061 * xmlStrEqual: 1062 * @str1: the first xmlChar * 1063 * @str2: the second xmlChar * 1064 * 1065 * Check if both string are equal of have same content 1066 * Should be a bit more readable and faster than xmlStrEqual() 1067 * 1068 * Returns 1 if they are equal, 0 if they are different 1069 */ 1070 1071int 1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1073 if (str1 == str2) return(1); 1074 if (str1 == NULL) return(0); 1075 if (str2 == NULL) return(0); 1076 do { 1077 if (*str1++ != *str2) return(0); 1078 } while (*str2++); 1079 return(1); 1080} 1081 1082/** 1083 * xmlStrncmp: 1084 * @str1: the first xmlChar * 1085 * @str2: the second xmlChar * 1086 * @len: the max comparison length 1087 * 1088 * a strncmp for xmlChar's 1089 * 1090 * Returns the integer result of the comparison 1091 */ 1092 1093int 1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1095 register int tmp; 1096 1097 if (len <= 0) return(0); 1098 if (str1 == str2) return(0); 1099 if (str1 == NULL) return(-1); 1100 if (str2 == NULL) return(1); 1101 do { 1102 tmp = *str1++ - *str2; 1103 if (tmp != 0 || --len == 0) return(tmp); 1104 } while (*str2++ != 0); 1105 return 0; 1106} 1107 1108static xmlChar casemap[256] = { 1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1141}; 1142 1143/** 1144 * xmlStrcasecmp: 1145 * @str1: the first xmlChar * 1146 * @str2: the second xmlChar * 1147 * 1148 * a strcasecmp for xmlChar's 1149 * 1150 * Returns the integer result of the comparison 1151 */ 1152 1153int 1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1155 register int tmp; 1156 1157 if (str1 == str2) return(0); 1158 if (str1 == NULL) return(-1); 1159 if (str2 == NULL) return(1); 1160 do { 1161 tmp = casemap[*str1++] - casemap[*str2]; 1162 if (tmp != 0) return(tmp); 1163 } while (*str2++ != 0); 1164 return 0; 1165} 1166 1167/** 1168 * xmlStrncasecmp: 1169 * @str1: the first xmlChar * 1170 * @str2: the second xmlChar * 1171 * @len: the max comparison length 1172 * 1173 * a strncasecmp for xmlChar's 1174 * 1175 * Returns the integer result of the comparison 1176 */ 1177 1178int 1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1180 register int tmp; 1181 1182 if (len <= 0) return(0); 1183 if (str1 == str2) return(0); 1184 if (str1 == NULL) return(-1); 1185 if (str2 == NULL) return(1); 1186 do { 1187 tmp = casemap[*str1++] - casemap[*str2]; 1188 if (tmp != 0 || --len == 0) return(tmp); 1189 } while (*str2++ != 0); 1190 return 0; 1191} 1192 1193/** 1194 * xmlStrchr: 1195 * @str: the xmlChar * array 1196 * @val: the xmlChar to search 1197 * 1198 * a strchr for xmlChar's 1199 * 1200 * Returns the xmlChar * for the first occurence or NULL. 1201 */ 1202 1203const xmlChar * 1204xmlStrchr(const xmlChar *str, xmlChar val) { 1205 if (str == NULL) return(NULL); 1206 while (*str != 0) { /* non input consuming */ 1207 if (*str == val) return((xmlChar *) str); 1208 str++; 1209 } 1210 return(NULL); 1211} 1212 1213/** 1214 * xmlStrstr: 1215 * @str: the xmlChar * array (haystack) 1216 * @val: the xmlChar to search (needle) 1217 * 1218 * a strstr for xmlChar's 1219 * 1220 * Returns the xmlChar * for the first occurence or NULL. 1221 */ 1222 1223const xmlChar * 1224xmlStrstr(const xmlChar *str, xmlChar *val) { 1225 int n; 1226 1227 if (str == NULL) return(NULL); 1228 if (val == NULL) return(NULL); 1229 n = xmlStrlen(val); 1230 1231 if (n == 0) return(str); 1232 while (*str != 0) { /* non input consuming */ 1233 if (*str == *val) { 1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1235 } 1236 str++; 1237 } 1238 return(NULL); 1239} 1240 1241/** 1242 * xmlStrcasestr: 1243 * @str: the xmlChar * array (haystack) 1244 * @val: the xmlChar to search (needle) 1245 * 1246 * a case-ignoring strstr for xmlChar's 1247 * 1248 * Returns the xmlChar * for the first occurence or NULL. 1249 */ 1250 1251const xmlChar * 1252xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1253 int n; 1254 1255 if (str == NULL) return(NULL); 1256 if (val == NULL) return(NULL); 1257 n = xmlStrlen(val); 1258 1259 if (n == 0) return(str); 1260 while (*str != 0) { /* non input consuming */ 1261 if (casemap[*str] == casemap[*val]) 1262 if (!xmlStrncasecmp(str, val, n)) return(str); 1263 str++; 1264 } 1265 return(NULL); 1266} 1267 1268/** 1269 * xmlStrsub: 1270 * @str: the xmlChar * array (haystack) 1271 * @start: the index of the first char (zero based) 1272 * @len: the length of the substring 1273 * 1274 * Extract a substring of a given string 1275 * 1276 * Returns the xmlChar * for the first occurence or NULL. 1277 */ 1278 1279xmlChar * 1280xmlStrsub(const xmlChar *str, int start, int len) { 1281 int i; 1282 1283 if (str == NULL) return(NULL); 1284 if (start < 0) return(NULL); 1285 if (len < 0) return(NULL); 1286 1287 for (i = 0;i < start;i++) { 1288 if (*str == 0) return(NULL); 1289 str++; 1290 } 1291 if (*str == 0) return(NULL); 1292 return(xmlStrndup(str, len)); 1293} 1294 1295/** 1296 * xmlStrlen: 1297 * @str: the xmlChar * array 1298 * 1299 * length of a xmlChar's string 1300 * 1301 * Returns the number of xmlChar contained in the ARRAY. 1302 */ 1303 1304int 1305xmlStrlen(const xmlChar *str) { 1306 int len = 0; 1307 1308 if (str == NULL) return(0); 1309 while (*str != 0) { /* non input consuming */ 1310 str++; 1311 len++; 1312 } 1313 return(len); 1314} 1315 1316/** 1317 * xmlStrncat: 1318 * @cur: the original xmlChar * array 1319 * @add: the xmlChar * array added 1320 * @len: the length of @add 1321 * 1322 * a strncat for array of xmlChar's, it will extend cur with the len 1323 * first bytes of @add. 1324 * 1325 * Returns a new xmlChar *, the original @cur is reallocated if needed 1326 * and should not be freed 1327 */ 1328 1329xmlChar * 1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1331 int size; 1332 xmlChar *ret; 1333 1334 if ((add == NULL) || (len == 0)) 1335 return(cur); 1336 if (cur == NULL) 1337 return(xmlStrndup(add, len)); 1338 1339 size = xmlStrlen(cur); 1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1341 if (ret == NULL) { 1342 xmlGenericError(xmlGenericErrorContext, 1343 "xmlStrncat: realloc of %ld byte failed\n", 1344 (size + len + 1) * (long)sizeof(xmlChar)); 1345 return(cur); 1346 } 1347 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1348 ret[size + len] = 0; 1349 return(ret); 1350} 1351 1352/** 1353 * xmlStrcat: 1354 * @cur: the original xmlChar * array 1355 * @add: the xmlChar * array added 1356 * 1357 * a strcat for array of xmlChar's. Since they are supposed to be 1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1359 * a termination mark of '0'. 1360 * 1361 * Returns a new xmlChar * containing the concatenated string. 1362 */ 1363xmlChar * 1364xmlStrcat(xmlChar *cur, const xmlChar *add) { 1365 const xmlChar *p = add; 1366 1367 if (add == NULL) return(cur); 1368 if (cur == NULL) 1369 return(xmlStrdup(add)); 1370 1371 while (*p != 0) p++; /* non input consuming */ 1372 return(xmlStrncat(cur, add, p - add)); 1373} 1374 1375/************************************************************************ 1376 * * 1377 * Commodity functions, cleanup needed ? * 1378 * * 1379 ************************************************************************/ 1380 1381/** 1382 * areBlanks: 1383 * @ctxt: an XML parser context 1384 * @str: a xmlChar * 1385 * @len: the size of @str 1386 * 1387 * Is this a sequence of blank chars that one can ignore ? 1388 * 1389 * Returns 1 if ignorable 0 otherwise. 1390 */ 1391 1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1393 int i, ret; 1394 xmlNodePtr lastChild; 1395 1396 if (ctxt->keepBlanks) 1397 return(0); 1398 1399 /* 1400 * Check for xml:space value. 1401 */ 1402 if (*(ctxt->space) == 1) 1403 return(0); 1404 1405 /* 1406 * Check that the string is made of blanks 1407 */ 1408 for (i = 0;i < len;i++) 1409 if (!(IS_BLANK(str[i]))) return(0); 1410 1411 /* 1412 * Look if the element is mixed content in the Dtd if available 1413 */ 1414 if (ctxt->myDoc != NULL) { 1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1416 if (ret == 0) return(1); 1417 if (ret == 1) return(0); 1418 } 1419 1420 /* 1421 * Otherwise, heuristic :-\ 1422 */ 1423 if (RAW != '<') return(0); 1424 if (ctxt->node == NULL) return(0); 1425 if ((ctxt->node->children == NULL) && 1426 (RAW == '<') && (NXT(1) == '/')) return(0); 1427 1428 lastChild = xmlGetLastChild(ctxt->node); 1429 if (lastChild == NULL) { 1430 if (ctxt->node->content != NULL) return(0); 1431 } else if (xmlNodeIsText(lastChild)) 1432 return(0); 1433 else if ((ctxt->node->children != NULL) && 1434 (xmlNodeIsText(ctxt->node->children))) 1435 return(0); 1436 return(1); 1437} 1438 1439/* 1440 * Forward definition for recusive behaviour. 1441 */ 1442void xmlParsePEReference(xmlParserCtxtPtr ctxt); 1443void xmlParseReference(xmlParserCtxtPtr ctxt); 1444 1445/************************************************************************ 1446 * * 1447 * Extra stuff for namespace support * 1448 * Relates to http://www.w3.org/TR/WD-xml-names * 1449 * * 1450 ************************************************************************/ 1451 1452/** 1453 * xmlSplitQName: 1454 * @ctxt: an XML parser context 1455 * @name: an XML parser context 1456 * @prefix: a xmlChar ** 1457 * 1458 * parse an UTF8 encoded XML qualified name string 1459 * 1460 * [NS 5] QName ::= (Prefix ':')? LocalPart 1461 * 1462 * [NS 6] Prefix ::= NCName 1463 * 1464 * [NS 7] LocalPart ::= NCName 1465 * 1466 * Returns the local part, and prefix is updated 1467 * to get the Prefix if any. 1468 */ 1469 1470xmlChar * 1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1472 xmlChar buf[XML_MAX_NAMELEN + 5]; 1473 xmlChar *buffer = NULL; 1474 int len = 0; 1475 int max = XML_MAX_NAMELEN; 1476 xmlChar *ret = NULL; 1477 const xmlChar *cur = name; 1478 int c; 1479 1480 *prefix = NULL; 1481 1482#ifndef XML_XML_NAMESPACE 1483 /* xml: prefix is not really a namespace */ 1484 if ((cur[0] == 'x') && (cur[1] == 'm') && 1485 (cur[2] == 'l') && (cur[3] == ':')) 1486 return(xmlStrdup(name)); 1487#endif 1488 1489 /* nasty but valid */ 1490 if (cur[0] == ':') 1491 return(xmlStrdup(name)); 1492 1493 c = *cur++; 1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1495 buf[len++] = c; 1496 c = *cur++; 1497 } 1498 if (len >= max) { 1499 /* 1500 * Okay someone managed to make a huge name, so he's ready to pay 1501 * for the processing speed. 1502 */ 1503 max = len * 2; 1504 1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1506 if (buffer == NULL) { 1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1508 ctxt->sax->error(ctxt->userData, 1509 "xmlSplitQName: out of memory\n"); 1510 return(NULL); 1511 } 1512 memcpy(buffer, buf, len); 1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1514 if (len + 10 > max) { 1515 max *= 2; 1516 buffer = (xmlChar *) xmlRealloc(buffer, 1517 max * sizeof(xmlChar)); 1518 if (buffer == NULL) { 1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1520 ctxt->sax->error(ctxt->userData, 1521 "xmlSplitQName: out of memory\n"); 1522 return(NULL); 1523 } 1524 } 1525 buffer[len++] = c; 1526 c = *cur++; 1527 } 1528 buffer[len] = 0; 1529 } 1530 1531 if (buffer == NULL) 1532 ret = xmlStrndup(buf, len); 1533 else { 1534 ret = buffer; 1535 buffer = NULL; 1536 max = XML_MAX_NAMELEN; 1537 } 1538 1539 1540 if (c == ':') { 1541 c = *cur++; 1542 if (c == 0) return(ret); 1543 *prefix = ret; 1544 len = 0; 1545 1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1547 buf[len++] = c; 1548 c = *cur++; 1549 } 1550 if (len >= max) { 1551 /* 1552 * Okay someone managed to make a huge name, so he's ready to pay 1553 * for the processing speed. 1554 */ 1555 max = len * 2; 1556 1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1558 if (buffer == NULL) { 1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1560 ctxt->sax->error(ctxt->userData, 1561 "xmlSplitQName: out of memory\n"); 1562 return(NULL); 1563 } 1564 memcpy(buffer, buf, len); 1565 while (c != 0) { /* tested bigname2.xml */ 1566 if (len + 10 > max) { 1567 max *= 2; 1568 buffer = (xmlChar *) xmlRealloc(buffer, 1569 max * sizeof(xmlChar)); 1570 if (buffer == NULL) { 1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1572 ctxt->sax->error(ctxt->userData, 1573 "xmlSplitQName: out of memory\n"); 1574 return(NULL); 1575 } 1576 } 1577 buffer[len++] = c; 1578 c = *cur++; 1579 } 1580 buffer[len] = 0; 1581 } 1582 1583 if (buffer == NULL) 1584 ret = xmlStrndup(buf, len); 1585 else { 1586 ret = buffer; 1587 } 1588 } 1589 1590 return(ret); 1591} 1592 1593/************************************************************************ 1594 * * 1595 * The parser itself * 1596 * Relates to http://www.w3.org/TR/REC-xml * 1597 * * 1598 ************************************************************************/ 1599 1600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1601/** 1602 * xmlParseName: 1603 * @ctxt: an XML parser context 1604 * 1605 * parse an XML name. 1606 * 1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1608 * CombiningChar | Extender 1609 * 1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1611 * 1612 * [6] Names ::= Name (S Name)* 1613 * 1614 * Returns the Name parsed or NULL 1615 */ 1616 1617xmlChar * 1618xmlParseName(xmlParserCtxtPtr ctxt) { 1619 const xmlChar *in; 1620 xmlChar *ret; 1621 int count = 0; 1622 1623 GROW; 1624 1625 /* 1626 * Accelerator for simple ASCII names 1627 */ 1628 in = ctxt->input->cur; 1629 if (((*in >= 0x61) && (*in <= 0x7A)) || 1630 ((*in >= 0x41) && (*in <= 0x5A)) || 1631 (*in == '_') || (*in == ':')) { 1632 in++; 1633 while (((*in >= 0x61) && (*in <= 0x7A)) || 1634 ((*in >= 0x41) && (*in <= 0x5A)) || 1635 ((*in >= 0x30) && (*in <= 0x39)) || 1636 (*in == '_') || (*in == ':')) 1637 in++; 1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) { 1639 count = in - ctxt->input->cur; 1640 ret = xmlStrndup(ctxt->input->cur, count); 1641 ctxt->input->cur = in; 1642 return(ret); 1643 } 1644 } 1645 return(xmlParseNameComplex(ctxt)); 1646} 1647 1648xmlChar * 1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1650 xmlChar buf[XML_MAX_NAMELEN + 5]; 1651 int len = 0, l; 1652 int c; 1653 int count = 0; 1654 1655 /* 1656 * Handler for more complex cases 1657 */ 1658 GROW; 1659 c = CUR_CHAR(l); 1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1661 (!IS_LETTER(c) && (c != '_') && 1662 (c != ':'))) { 1663 return(NULL); 1664 } 1665 1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1668 (c == '.') || (c == '-') || 1669 (c == '_') || (c == ':') || 1670 (IS_COMBINING(c)) || 1671 (IS_EXTENDER(c)))) { 1672 if (count++ > 100) { 1673 count = 0; 1674 GROW; 1675 } 1676 COPY_BUF(l,buf,len,c); 1677 NEXTL(l); 1678 c = CUR_CHAR(l); 1679 if (len >= XML_MAX_NAMELEN) { 1680 /* 1681 * Okay someone managed to make a huge name, so he's ready to pay 1682 * for the processing speed. 1683 */ 1684 xmlChar *buffer; 1685 int max = len * 2; 1686 1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1688 if (buffer == NULL) { 1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1690 ctxt->sax->error(ctxt->userData, 1691 "xmlParseNameComplex: out of memory\n"); 1692 return(NULL); 1693 } 1694 memcpy(buffer, buf, len); 1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1696 (c == '.') || (c == '-') || 1697 (c == '_') || (c == ':') || 1698 (IS_COMBINING(c)) || 1699 (IS_EXTENDER(c))) { 1700 if (count++ > 100) { 1701 count = 0; 1702 GROW; 1703 } 1704 if (len + 10 > max) { 1705 max *= 2; 1706 buffer = (xmlChar *) xmlRealloc(buffer, 1707 max * sizeof(xmlChar)); 1708 if (buffer == NULL) { 1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1710 ctxt->sax->error(ctxt->userData, 1711 "xmlParseNameComplex: out of memory\n"); 1712 return(NULL); 1713 } 1714 } 1715 COPY_BUF(l,buffer,len,c); 1716 NEXTL(l); 1717 c = CUR_CHAR(l); 1718 } 1719 buffer[len] = 0; 1720 return(buffer); 1721 } 1722 } 1723 return(xmlStrndup(buf, len)); 1724} 1725 1726/** 1727 * xmlParseStringName: 1728 * @ctxt: an XML parser context 1729 * @str: a pointer to the string pointer (IN/OUT) 1730 * 1731 * parse an XML name. 1732 * 1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1734 * CombiningChar | Extender 1735 * 1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1737 * 1738 * [6] Names ::= Name (S Name)* 1739 * 1740 * Returns the Name parsed or NULL. The str pointer 1741 * is updated to the current location in the string. 1742 */ 1743 1744static xmlChar * 1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1746 xmlChar buf[XML_MAX_NAMELEN + 5]; 1747 const xmlChar *cur = *str; 1748 int len = 0, l; 1749 int c; 1750 1751 c = CUR_SCHAR(cur, l); 1752 if (!IS_LETTER(c) && (c != '_') && 1753 (c != ':')) { 1754 return(NULL); 1755 } 1756 1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1758 (c == '.') || (c == '-') || 1759 (c == '_') || (c == ':') || 1760 (IS_COMBINING(c)) || 1761 (IS_EXTENDER(c))) { 1762 COPY_BUF(l,buf,len,c); 1763 cur += l; 1764 c = CUR_SCHAR(cur, l); 1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1766 /* 1767 * Okay someone managed to make a huge name, so he's ready to pay 1768 * for the processing speed. 1769 */ 1770 xmlChar *buffer; 1771 int max = len * 2; 1772 1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1774 if (buffer == NULL) { 1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1776 ctxt->sax->error(ctxt->userData, 1777 "xmlParseStringName: out of memory\n"); 1778 return(NULL); 1779 } 1780 memcpy(buffer, buf, len); 1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1782 (c == '.') || (c == '-') || 1783 (c == '_') || (c == ':') || 1784 (IS_COMBINING(c)) || 1785 (IS_EXTENDER(c))) { 1786 if (len + 10 > max) { 1787 max *= 2; 1788 buffer = (xmlChar *) xmlRealloc(buffer, 1789 max * sizeof(xmlChar)); 1790 if (buffer == NULL) { 1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1792 ctxt->sax->error(ctxt->userData, 1793 "xmlParseStringName: out of memory\n"); 1794 return(NULL); 1795 } 1796 } 1797 COPY_BUF(l,buffer,len,c); 1798 cur += l; 1799 c = CUR_SCHAR(cur, l); 1800 } 1801 buffer[len] = 0; 1802 *str = cur; 1803 return(buffer); 1804 } 1805 } 1806 *str = cur; 1807 return(xmlStrndup(buf, len)); 1808} 1809 1810/** 1811 * xmlParseNmtoken: 1812 * @ctxt: an XML parser context 1813 * 1814 * parse an XML Nmtoken. 1815 * 1816 * [7] Nmtoken ::= (NameChar)+ 1817 * 1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1819 * 1820 * Returns the Nmtoken parsed or NULL 1821 */ 1822 1823xmlChar * 1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1825 xmlChar buf[XML_MAX_NAMELEN + 5]; 1826 int len = 0, l; 1827 int c; 1828 int count = 0; 1829 1830 GROW; 1831 c = CUR_CHAR(l); 1832 1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1834 (c == '.') || (c == '-') || 1835 (c == '_') || (c == ':') || 1836 (IS_COMBINING(c)) || 1837 (IS_EXTENDER(c))) { 1838 if (count++ > 100) { 1839 count = 0; 1840 GROW; 1841 } 1842 COPY_BUF(l,buf,len,c); 1843 NEXTL(l); 1844 c = CUR_CHAR(l); 1845 if (len >= XML_MAX_NAMELEN) { 1846 /* 1847 * Okay someone managed to make a huge token, so he's ready to pay 1848 * for the processing speed. 1849 */ 1850 xmlChar *buffer; 1851 int max = len * 2; 1852 1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1854 if (buffer == NULL) { 1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1856 ctxt->sax->error(ctxt->userData, 1857 "xmlParseNmtoken: out of memory\n"); 1858 return(NULL); 1859 } 1860 memcpy(buffer, buf, len); 1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1862 (c == '.') || (c == '-') || 1863 (c == '_') || (c == ':') || 1864 (IS_COMBINING(c)) || 1865 (IS_EXTENDER(c))) { 1866 if (count++ > 100) { 1867 count = 0; 1868 GROW; 1869 } 1870 if (len + 10 > max) { 1871 max *= 2; 1872 buffer = (xmlChar *) xmlRealloc(buffer, 1873 max * sizeof(xmlChar)); 1874 if (buffer == NULL) { 1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1876 ctxt->sax->error(ctxt->userData, 1877 "xmlParseNameComplex: out of memory\n"); 1878 return(NULL); 1879 } 1880 } 1881 COPY_BUF(l,buffer,len,c); 1882 NEXTL(l); 1883 c = CUR_CHAR(l); 1884 } 1885 buffer[len] = 0; 1886 return(buffer); 1887 } 1888 } 1889 if (len == 0) 1890 return(NULL); 1891 return(xmlStrndup(buf, len)); 1892} 1893 1894/** 1895 * xmlParseEntityValue: 1896 * @ctxt: an XML parser context 1897 * @orig: if non-NULL store a copy of the original entity value 1898 * 1899 * parse a value for ENTITY declarations 1900 * 1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 1902 * "'" ([^%&'] | PEReference | Reference)* "'" 1903 * 1904 * Returns the EntityValue parsed with reference substitued or NULL 1905 */ 1906 1907xmlChar * 1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 1909 xmlChar *buf = NULL; 1910 int len = 0; 1911 int size = XML_PARSER_BUFFER_SIZE; 1912 int c, l; 1913 xmlChar stop; 1914 xmlChar *ret = NULL; 1915 const xmlChar *cur = NULL; 1916 xmlParserInputPtr input; 1917 1918 if (RAW == '"') stop = '"'; 1919 else if (RAW == '\'') stop = '\''; 1920 else { 1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 1924 ctxt->wellFormed = 0; 1925 ctxt->disableSAX = 1; 1926 return(NULL); 1927 } 1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 1929 if (buf == NULL) { 1930 xmlGenericError(xmlGenericErrorContext, 1931 "malloc of %d byte failed\n", size); 1932 return(NULL); 1933 } 1934 1935 /* 1936 * The content of the entity definition is copied in a buffer. 1937 */ 1938 1939 ctxt->instate = XML_PARSER_ENTITY_VALUE; 1940 input = ctxt->input; 1941 GROW; 1942 NEXT; 1943 c = CUR_CHAR(l); 1944 /* 1945 * NOTE: 4.4.5 Included in Literal 1946 * When a parameter entity reference appears in a literal entity 1947 * value, ... a single or double quote character in the replacement 1948 * text is always treated as a normal data character and will not 1949 * terminate the literal. 1950 * In practice it means we stop the loop only when back at parsing 1951 * the initial entity and the quote is found 1952 */ 1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 1954 (ctxt->input != input))) { 1955 if (len + 5 >= size) { 1956 size *= 2; 1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 1958 if (buf == NULL) { 1959 xmlGenericError(xmlGenericErrorContext, 1960 "realloc of %d byte failed\n", size); 1961 return(NULL); 1962 } 1963 } 1964 COPY_BUF(l,buf,len,c); 1965 NEXTL(l); 1966 /* 1967 * Pop-up of finished entities. 1968 */ 1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 1970 xmlPopInput(ctxt); 1971 1972 GROW; 1973 c = CUR_CHAR(l); 1974 if (c == 0) { 1975 GROW; 1976 c = CUR_CHAR(l); 1977 } 1978 } 1979 buf[len] = 0; 1980 1981 /* 1982 * Raise problem w.r.t. '&' and '%' being used in non-entities 1983 * reference constructs. Note Charref will be handled in 1984 * xmlStringDecodeEntities() 1985 */ 1986 cur = buf; 1987 while (*cur != 0) { /* non input consuming */ 1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 1989 xmlChar *name; 1990 xmlChar tmp = *cur; 1991 1992 cur++; 1993 name = xmlParseStringName(ctxt, &cur); 1994 if ((name == NULL) || (*cur != ';')) { 1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1997 ctxt->sax->error(ctxt->userData, 1998 "EntityValue: '%c' forbidden except for entities references\n", 1999 tmp); 2000 ctxt->wellFormed = 0; 2001 ctxt->disableSAX = 1; 2002 } 2003 if ((ctxt->inSubset == 1) && (tmp == '%')) { 2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2006 ctxt->sax->error(ctxt->userData, 2007 "EntityValue: PEReferences forbidden in internal subset\n", 2008 tmp); 2009 ctxt->wellFormed = 0; 2010 ctxt->disableSAX = 1; 2011 } 2012 if (name != NULL) 2013 xmlFree(name); 2014 } 2015 cur++; 2016 } 2017 2018 /* 2019 * Then PEReference entities are substituted. 2020 */ 2021 if (c != stop) { 2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2025 ctxt->wellFormed = 0; 2026 ctxt->disableSAX = 1; 2027 xmlFree(buf); 2028 } else { 2029 NEXT; 2030 /* 2031 * NOTE: 4.4.7 Bypassed 2032 * When a general entity reference appears in the EntityValue in 2033 * an entity declaration, it is bypassed and left as is. 2034 * so XML_SUBSTITUTE_REF is not set here. 2035 */ 2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2037 0, 0, 0); 2038 if (orig != NULL) 2039 *orig = buf; 2040 else 2041 xmlFree(buf); 2042 } 2043 2044 return(ret); 2045} 2046 2047/** 2048 * xmlParseAttValue: 2049 * @ctxt: an XML parser context 2050 * 2051 * parse a value for an attribute 2052 * Note: the parser won't do substitution of entities here, this 2053 * will be handled later in xmlStringGetNodeList 2054 * 2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2056 * "'" ([^<&'] | Reference)* "'" 2057 * 2058 * 3.3.3 Attribute-Value Normalization: 2059 * Before the value of an attribute is passed to the application or 2060 * checked for validity, the XML processor must normalize it as follows: 2061 * - a character reference is processed by appending the referenced 2062 * character to the attribute value 2063 * - an entity reference is processed by recursively processing the 2064 * replacement text of the entity 2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2066 * appending #x20 to the normalized value, except that only a single 2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2068 * parsed entity or the literal entity value of an internal parsed entity 2069 * - other characters are processed by appending them to the normalized value 2070 * If the declared value is not CDATA, then the XML processor must further 2071 * process the normalized attribute value by discarding any leading and 2072 * trailing space (#x20) characters, and by replacing sequences of space 2073 * (#x20) characters by a single space (#x20) character. 2074 * All attributes for which no declaration has been read should be treated 2075 * by a non-validating parser as if declared CDATA. 2076 * 2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2078 */ 2079 2080xmlChar * 2081xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2082 xmlChar limit = 0; 2083 xmlChar *buf = NULL; 2084 int len = 0; 2085 int buf_size = 0; 2086 int c, l; 2087 xmlChar *current = NULL; 2088 xmlEntityPtr ent; 2089 2090 2091 SHRINK; 2092 if (NXT(0) == '"') { 2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2094 limit = '"'; 2095 NEXT; 2096 } else if (NXT(0) == '\'') { 2097 limit = '\''; 2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2099 NEXT; 2100 } else { 2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2104 ctxt->wellFormed = 0; 2105 ctxt->disableSAX = 1; 2106 return(NULL); 2107 } 2108 2109 /* 2110 * allocate a translation buffer. 2111 */ 2112 buf_size = XML_PARSER_BUFFER_SIZE; 2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2114 if (buf == NULL) { 2115 perror("xmlParseAttValue: malloc failed"); 2116 return(NULL); 2117 } 2118 2119 /* 2120 * Ok loop until we reach one of the ending char or a size limit. 2121 */ 2122 c = CUR_CHAR(l); 2123 while (((NXT(0) != limit) && /* checked */ 2124 (c != '<')) || (ctxt->token != 0)) { 2125 if (c == 0) break; 2126 if (ctxt->token == '&') { 2127 /* 2128 * The reparsing will be done in xmlStringGetNodeList() 2129 * called by the attribute() function in SAX.c 2130 */ 2131 static xmlChar buffer[6] = "&"; 2132 2133 if (len > buf_size - 10) { 2134 growBuffer(buf); 2135 } 2136 current = &buffer[0]; 2137 while (*current != 0) { /* non input consuming */ 2138 buf[len++] = *current++; 2139 } 2140 ctxt->token = 0; 2141 } else if (c == '&') { 2142 if (NXT(1) == '#') { 2143 int val = xmlParseCharRef(ctxt); 2144 if (val == '&') { 2145 /* 2146 * The reparsing will be done in xmlStringGetNodeList() 2147 * called by the attribute() function in SAX.c 2148 */ 2149 static xmlChar buffer[6] = "&"; 2150 2151 if (len > buf_size - 10) { 2152 growBuffer(buf); 2153 } 2154 current = &buffer[0]; 2155 while (*current != 0) { /* non input consuming */ 2156 buf[len++] = *current++; 2157 } 2158 } else { 2159 if (len > buf_size - 10) { 2160 growBuffer(buf); 2161 } 2162 len += xmlCopyChar(0, &buf[len], val); 2163 } 2164 } else { 2165 ent = xmlParseEntityRef(ctxt); 2166 if ((ent != NULL) && 2167 (ctxt->replaceEntities != 0)) { 2168 xmlChar *rep; 2169 2170 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2171 rep = xmlStringDecodeEntities(ctxt, ent->content, 2172 XML_SUBSTITUTE_REF, 0, 0, 0); 2173 if (rep != NULL) { 2174 current = rep; 2175 while (*current != 0) { /* non input consuming */ 2176 buf[len++] = *current++; 2177 if (len > buf_size - 10) { 2178 growBuffer(buf); 2179 } 2180 } 2181 xmlFree(rep); 2182 } 2183 } else { 2184 if (len > buf_size - 10) { 2185 growBuffer(buf); 2186 } 2187 if (ent->content != NULL) 2188 buf[len++] = ent->content[0]; 2189 } 2190 } else if (ent != NULL) { 2191 int i = xmlStrlen(ent->name); 2192 const xmlChar *cur = ent->name; 2193 2194 /* 2195 * This may look absurd but is needed to detect 2196 * entities problems 2197 */ 2198 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2199 (ent->content != NULL)) { 2200 xmlChar *rep; 2201 rep = xmlStringDecodeEntities(ctxt, ent->content, 2202 XML_SUBSTITUTE_REF, 0, 0, 0); 2203 if (rep != NULL) 2204 xmlFree(rep); 2205 } 2206 2207 /* 2208 * Just output the reference 2209 */ 2210 buf[len++] = '&'; 2211 if (len > buf_size - i - 10) { 2212 growBuffer(buf); 2213 } 2214 for (;i > 0;i--) 2215 buf[len++] = *cur++; 2216 buf[len++] = ';'; 2217 } 2218 } 2219 } else { 2220 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2221 COPY_BUF(l,buf,len,0x20); 2222 if (len > buf_size - 10) { 2223 growBuffer(buf); 2224 } 2225 } else { 2226 COPY_BUF(l,buf,len,c); 2227 if (len > buf_size - 10) { 2228 growBuffer(buf); 2229 } 2230 } 2231 NEXTL(l); 2232 } 2233 GROW; 2234 c = CUR_CHAR(l); 2235 } 2236 buf[len++] = 0; 2237 if (RAW == '<') { 2238 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2240 ctxt->sax->error(ctxt->userData, 2241 "Unescaped '<' not allowed in attributes values\n"); 2242 ctxt->wellFormed = 0; 2243 ctxt->disableSAX = 1; 2244 } else if (RAW != limit) { 2245 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2247 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2248 ctxt->wellFormed = 0; 2249 ctxt->disableSAX = 1; 2250 } else 2251 NEXT; 2252 return(buf); 2253} 2254 2255/** 2256 * xmlParseSystemLiteral: 2257 * @ctxt: an XML parser context 2258 * 2259 * parse an XML Literal 2260 * 2261 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2262 * 2263 * Returns the SystemLiteral parsed or NULL 2264 */ 2265 2266xmlChar * 2267xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2268 xmlChar *buf = NULL; 2269 int len = 0; 2270 int size = XML_PARSER_BUFFER_SIZE; 2271 int cur, l; 2272 xmlChar stop; 2273 int state = ctxt->instate; 2274 int count = 0; 2275 2276 SHRINK; 2277 if (RAW == '"') { 2278 NEXT; 2279 stop = '"'; 2280 } else if (RAW == '\'') { 2281 NEXT; 2282 stop = '\''; 2283 } else { 2284 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2286 ctxt->sax->error(ctxt->userData, 2287 "SystemLiteral \" or ' expected\n"); 2288 ctxt->wellFormed = 0; 2289 ctxt->disableSAX = 1; 2290 return(NULL); 2291 } 2292 2293 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2294 if (buf == NULL) { 2295 xmlGenericError(xmlGenericErrorContext, 2296 "malloc of %d byte failed\n", size); 2297 return(NULL); 2298 } 2299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2300 cur = CUR_CHAR(l); 2301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2302 if (len + 5 >= size) { 2303 size *= 2; 2304 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2305 if (buf == NULL) { 2306 xmlGenericError(xmlGenericErrorContext, 2307 "realloc of %d byte failed\n", size); 2308 ctxt->instate = (xmlParserInputState) state; 2309 return(NULL); 2310 } 2311 } 2312 count++; 2313 if (count > 50) { 2314 GROW; 2315 count = 0; 2316 } 2317 COPY_BUF(l,buf,len,cur); 2318 NEXTL(l); 2319 cur = CUR_CHAR(l); 2320 if (cur == 0) { 2321 GROW; 2322 SHRINK; 2323 cur = CUR_CHAR(l); 2324 } 2325 } 2326 buf[len] = 0; 2327 ctxt->instate = (xmlParserInputState) state; 2328 if (!IS_CHAR(cur)) { 2329 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2331 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2332 ctxt->wellFormed = 0; 2333 ctxt->disableSAX = 1; 2334 } else { 2335 NEXT; 2336 } 2337 return(buf); 2338} 2339 2340/** 2341 * xmlParsePubidLiteral: 2342 * @ctxt: an XML parser context 2343 * 2344 * parse an XML public literal 2345 * 2346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2347 * 2348 * Returns the PubidLiteral parsed or NULL. 2349 */ 2350 2351xmlChar * 2352xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2353 xmlChar *buf = NULL; 2354 int len = 0; 2355 int size = XML_PARSER_BUFFER_SIZE; 2356 xmlChar cur; 2357 xmlChar stop; 2358 int count = 0; 2359 2360 SHRINK; 2361 if (RAW == '"') { 2362 NEXT; 2363 stop = '"'; 2364 } else if (RAW == '\'') { 2365 NEXT; 2366 stop = '\''; 2367 } else { 2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2370 ctxt->sax->error(ctxt->userData, 2371 "SystemLiteral \" or ' expected\n"); 2372 ctxt->wellFormed = 0; 2373 ctxt->disableSAX = 1; 2374 return(NULL); 2375 } 2376 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2377 if (buf == NULL) { 2378 xmlGenericError(xmlGenericErrorContext, 2379 "malloc of %d byte failed\n", size); 2380 return(NULL); 2381 } 2382 cur = CUR; 2383 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2384 if (len + 1 >= size) { 2385 size *= 2; 2386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2387 if (buf == NULL) { 2388 xmlGenericError(xmlGenericErrorContext, 2389 "realloc of %d byte failed\n", size); 2390 return(NULL); 2391 } 2392 } 2393 buf[len++] = cur; 2394 count++; 2395 if (count > 50) { 2396 GROW; 2397 count = 0; 2398 } 2399 NEXT; 2400 cur = CUR; 2401 if (cur == 0) { 2402 GROW; 2403 SHRINK; 2404 cur = CUR; 2405 } 2406 } 2407 buf[len] = 0; 2408 if (cur != stop) { 2409 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2411 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2412 ctxt->wellFormed = 0; 2413 ctxt->disableSAX = 1; 2414 } else { 2415 NEXT; 2416 } 2417 return(buf); 2418} 2419 2420void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2421/** 2422 * xmlParseCharData: 2423 * @ctxt: an XML parser context 2424 * @cdata: int indicating whether we are within a CDATA section 2425 * 2426 * parse a CharData section. 2427 * if we are within a CDATA section ']]>' marks an end of section. 2428 * 2429 * The right angle bracket (>) may be represented using the string ">", 2430 * and must, for compatibility, be escaped using ">" or a character 2431 * reference when it appears in the string "]]>" in content, when that 2432 * string is not marking the end of a CDATA section. 2433 * 2434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2435 */ 2436 2437void 2438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2439 const xmlChar *in; 2440 int nbchar = 0; 2441 int line = ctxt->input->line; 2442 int col = ctxt->input->col; 2443 2444 SHRINK; 2445 GROW; 2446 /* 2447 * Accelerated common case where input don't need to be 2448 * modified before passing it to the handler. 2449 */ 2450 if ((ctxt->token == 0) && (!cdata)) { 2451 in = ctxt->input->cur; 2452 do { 2453 while (((*in >= 0x20) && (*in != '<') && 2454 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2455 in++; 2456 if (*in == 0xA) { 2457 ctxt->input->line++; 2458 continue; /* while */ 2459 } 2460 nbchar = in - ctxt->input->cur; 2461 if (nbchar > 0) { 2462 if (IS_BLANK(*ctxt->input->cur) && 2463 areBlanks(ctxt, ctxt->input->cur, nbchar)) { 2464 if (ctxt->sax->ignorableWhitespace != NULL) 2465 ctxt->sax->ignorableWhitespace(ctxt->userData, 2466 ctxt->input->cur, nbchar); 2467 } else { 2468 if (ctxt->sax->characters != NULL) 2469 ctxt->sax->characters(ctxt->userData, 2470 ctxt->input->cur, nbchar); 2471 } 2472 } 2473 ctxt->input->cur = in; 2474 if (*in == 0xD) { 2475 in++; 2476 if (*in == 0xA) { 2477 ctxt->input->cur = in; 2478 in++; 2479 ctxt->input->line++; 2480 continue; /* while */ 2481 } 2482 in--; 2483 } 2484 if (*in == '<') { 2485 return; 2486 } 2487 if (*in == '&') { 2488 return; 2489 } 2490 SHRINK; 2491 GROW; 2492 in = ctxt->input->cur; 2493 } while ((*in >= 0x20) && (*in <= 0x7F)); 2494 nbchar = 0; 2495 } 2496 ctxt->input->line = line; 2497 ctxt->input->col = col; 2498 xmlParseCharDataComplex(ctxt, cdata); 2499} 2500 2501void 2502xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2503 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2504 int nbchar = 0; 2505 int cur, l; 2506 int count = 0; 2507 2508 SHRINK; 2509 GROW; 2510 cur = CUR_CHAR(l); 2511 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2512 ((cur != '&') || (ctxt->token == '&')) && 2513 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2514 if ((cur == ']') && (NXT(1) == ']') && 2515 (NXT(2) == '>')) { 2516 if (cdata) break; 2517 else { 2518 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2520 ctxt->sax->error(ctxt->userData, 2521 "Sequence ']]>' not allowed in content\n"); 2522 /* Should this be relaxed ??? I see a "must here */ 2523 ctxt->wellFormed = 0; 2524 ctxt->disableSAX = 1; 2525 } 2526 } 2527 COPY_BUF(l,buf,nbchar,cur); 2528 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2529 /* 2530 * Ok the segment is to be consumed as chars. 2531 */ 2532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2533 if (areBlanks(ctxt, buf, nbchar)) { 2534 if (ctxt->sax->ignorableWhitespace != NULL) 2535 ctxt->sax->ignorableWhitespace(ctxt->userData, 2536 buf, nbchar); 2537 } else { 2538 if (ctxt->sax->characters != NULL) 2539 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2540 } 2541 } 2542 nbchar = 0; 2543 } 2544 count++; 2545 if (count > 50) { 2546 GROW; 2547 count = 0; 2548 } 2549 NEXTL(l); 2550 cur = CUR_CHAR(l); 2551 } 2552 if (nbchar != 0) { 2553 /* 2554 * Ok the segment is to be consumed as chars. 2555 */ 2556 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2557 if (areBlanks(ctxt, buf, nbchar)) { 2558 if (ctxt->sax->ignorableWhitespace != NULL) 2559 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2560 } else { 2561 if (ctxt->sax->characters != NULL) 2562 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2563 } 2564 } 2565 } 2566} 2567 2568/** 2569 * xmlParseExternalID: 2570 * @ctxt: an XML parser context 2571 * @publicID: a xmlChar** receiving PubidLiteral 2572 * @strict: indicate whether we should restrict parsing to only 2573 * production [75], see NOTE below 2574 * 2575 * Parse an External ID or a Public ID 2576 * 2577 * NOTE: Productions [75] and [83] interract badly since [75] can generate 2578 * 'PUBLIC' S PubidLiteral S SystemLiteral 2579 * 2580 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2581 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2582 * 2583 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2584 * 2585 * Returns the function returns SystemLiteral and in the second 2586 * case publicID receives PubidLiteral, is strict is off 2587 * it is possible to return NULL and have publicID set. 2588 */ 2589 2590xmlChar * 2591xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2592 xmlChar *URI = NULL; 2593 2594 SHRINK; 2595 2596 *publicID = NULL; 2597 if ((RAW == 'S') && (NXT(1) == 'Y') && 2598 (NXT(2) == 'S') && (NXT(3) == 'T') && 2599 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2600 SKIP(6); 2601 if (!IS_BLANK(CUR)) { 2602 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2604 ctxt->sax->error(ctxt->userData, 2605 "Space required after 'SYSTEM'\n"); 2606 ctxt->wellFormed = 0; 2607 ctxt->disableSAX = 1; 2608 } 2609 SKIP_BLANKS; 2610 URI = xmlParseSystemLiteral(ctxt); 2611 if (URI == NULL) { 2612 ctxt->errNo = XML_ERR_URI_REQUIRED; 2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2614 ctxt->sax->error(ctxt->userData, 2615 "xmlParseExternalID: SYSTEM, no URI\n"); 2616 ctxt->wellFormed = 0; 2617 ctxt->disableSAX = 1; 2618 } 2619 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2620 (NXT(2) == 'B') && (NXT(3) == 'L') && 2621 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2622 SKIP(6); 2623 if (!IS_BLANK(CUR)) { 2624 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2626 ctxt->sax->error(ctxt->userData, 2627 "Space required after 'PUBLIC'\n"); 2628 ctxt->wellFormed = 0; 2629 ctxt->disableSAX = 1; 2630 } 2631 SKIP_BLANKS; 2632 *publicID = xmlParsePubidLiteral(ctxt); 2633 if (*publicID == NULL) { 2634 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2636 ctxt->sax->error(ctxt->userData, 2637 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2638 ctxt->wellFormed = 0; 2639 ctxt->disableSAX = 1; 2640 } 2641 if (strict) { 2642 /* 2643 * We don't handle [83] so "S SystemLiteral" is required. 2644 */ 2645 if (!IS_BLANK(CUR)) { 2646 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2648 ctxt->sax->error(ctxt->userData, 2649 "Space required after the Public Identifier\n"); 2650 ctxt->wellFormed = 0; 2651 ctxt->disableSAX = 1; 2652 } 2653 } else { 2654 /* 2655 * We handle [83] so we return immediately, if 2656 * "S SystemLiteral" is not detected. From a purely parsing 2657 * point of view that's a nice mess. 2658 */ 2659 const xmlChar *ptr; 2660 GROW; 2661 2662 ptr = CUR_PTR; 2663 if (!IS_BLANK(*ptr)) return(NULL); 2664 2665 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2666 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2667 } 2668 SKIP_BLANKS; 2669 URI = xmlParseSystemLiteral(ctxt); 2670 if (URI == NULL) { 2671 ctxt->errNo = XML_ERR_URI_REQUIRED; 2672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2673 ctxt->sax->error(ctxt->userData, 2674 "xmlParseExternalID: PUBLIC, no URI\n"); 2675 ctxt->wellFormed = 0; 2676 ctxt->disableSAX = 1; 2677 } 2678 } 2679 return(URI); 2680} 2681 2682/** 2683 * xmlParseComment: 2684 * @ctxt: an XML parser context 2685 * 2686 * Skip an XML (SGML) comment <!-- .... --> 2687 * The spec says that "For compatibility, the string "--" (double-hyphen) 2688 * must not occur within comments. " 2689 * 2690 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2691 */ 2692void 2693xmlParseComment(xmlParserCtxtPtr ctxt) { 2694 xmlChar *buf = NULL; 2695 int len; 2696 int size = XML_PARSER_BUFFER_SIZE; 2697 int q, ql; 2698 int r, rl; 2699 int cur, l; 2700 xmlParserInputState state; 2701 xmlParserInputPtr input = ctxt->input; 2702 int count = 0; 2703 2704 /* 2705 * Check that there is a comment right here. 2706 */ 2707 if ((RAW != '<') || (NXT(1) != '!') || 2708 (NXT(2) != '-') || (NXT(3) != '-')) return; 2709 2710 state = ctxt->instate; 2711 ctxt->instate = XML_PARSER_COMMENT; 2712 SHRINK; 2713 SKIP(4); 2714 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2715 if (buf == NULL) { 2716 xmlGenericError(xmlGenericErrorContext, 2717 "malloc of %d byte failed\n", size); 2718 ctxt->instate = state; 2719 return; 2720 } 2721 q = CUR_CHAR(ql); 2722 NEXTL(ql); 2723 r = CUR_CHAR(rl); 2724 NEXTL(rl); 2725 cur = CUR_CHAR(l); 2726 len = 0; 2727 while (IS_CHAR(cur) && /* checked */ 2728 ((cur != '>') || 2729 (r != '-') || (q != '-'))) { 2730 if ((r == '-') && (q == '-') && (len > 1)) { 2731 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2733 ctxt->sax->error(ctxt->userData, 2734 "Comment must not contain '--' (double-hyphen)`\n"); 2735 ctxt->wellFormed = 0; 2736 ctxt->disableSAX = 1; 2737 } 2738 if (len + 5 >= size) { 2739 size *= 2; 2740 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2741 if (buf == NULL) { 2742 xmlGenericError(xmlGenericErrorContext, 2743 "realloc of %d byte failed\n", size); 2744 ctxt->instate = state; 2745 return; 2746 } 2747 } 2748 COPY_BUF(ql,buf,len,q); 2749 q = r; 2750 ql = rl; 2751 r = cur; 2752 rl = l; 2753 2754 count++; 2755 if (count > 50) { 2756 GROW; 2757 count = 0; 2758 } 2759 NEXTL(l); 2760 cur = CUR_CHAR(l); 2761 if (cur == 0) { 2762 SHRINK; 2763 GROW; 2764 cur = CUR_CHAR(l); 2765 } 2766 } 2767 buf[len] = 0; 2768 if (!IS_CHAR(cur)) { 2769 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2771 ctxt->sax->error(ctxt->userData, 2772 "Comment not terminated \n<!--%.50s\n", buf); 2773 ctxt->wellFormed = 0; 2774 ctxt->disableSAX = 1; 2775 xmlFree(buf); 2776 } else { 2777 if (input != ctxt->input) { 2778 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2780 ctxt->sax->error(ctxt->userData, 2781"Comment doesn't start and stop in the same entity\n"); 2782 ctxt->wellFormed = 0; 2783 ctxt->disableSAX = 1; 2784 } 2785 NEXT; 2786 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2787 (!ctxt->disableSAX)) 2788 ctxt->sax->comment(ctxt->userData, buf); 2789 xmlFree(buf); 2790 } 2791 ctxt->instate = state; 2792} 2793 2794/** 2795 * xmlParsePITarget: 2796 * @ctxt: an XML parser context 2797 * 2798 * parse the name of a PI 2799 * 2800 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2801 * 2802 * Returns the PITarget name or NULL 2803 */ 2804 2805xmlChar * 2806xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2807 xmlChar *name; 2808 2809 name = xmlParseName(ctxt); 2810 if ((name != NULL) && 2811 ((name[0] == 'x') || (name[0] == 'X')) && 2812 ((name[1] == 'm') || (name[1] == 'M')) && 2813 ((name[2] == 'l') || (name[2] == 'L'))) { 2814 int i; 2815 if ((name[0] == 'x') && (name[1] == 'm') && 2816 (name[2] == 'l') && (name[3] == 0)) { 2817 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2819 ctxt->sax->error(ctxt->userData, 2820 "XML declaration allowed only at the start of the document\n"); 2821 ctxt->wellFormed = 0; 2822 ctxt->disableSAX = 1; 2823 return(name); 2824 } else if (name[3] == 0) { 2825 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2827 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2828 ctxt->wellFormed = 0; 2829 ctxt->disableSAX = 1; 2830 return(name); 2831 } 2832 for (i = 0;;i++) { 2833 if (xmlW3CPIs[i] == NULL) break; 2834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2835 return(name); 2836 } 2837 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2838 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2839 ctxt->sax->warning(ctxt->userData, 2840 "xmlParsePItarget: invalid name prefix 'xml'\n"); 2841 } 2842 } 2843 return(name); 2844} 2845 2846/** 2847 * xmlParsePI: 2848 * @ctxt: an XML parser context 2849 * 2850 * parse an XML Processing Instruction. 2851 * 2852 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2853 * 2854 * The processing is transfered to SAX once parsed. 2855 */ 2856 2857void 2858xmlParsePI(xmlParserCtxtPtr ctxt) { 2859 xmlChar *buf = NULL; 2860 int len = 0; 2861 int size = XML_PARSER_BUFFER_SIZE; 2862 int cur, l; 2863 xmlChar *target; 2864 xmlParserInputState state; 2865 int count = 0; 2866 2867 if ((RAW == '<') && (NXT(1) == '?')) { 2868 xmlParserInputPtr input = ctxt->input; 2869 state = ctxt->instate; 2870 ctxt->instate = XML_PARSER_PI; 2871 /* 2872 * this is a Processing Instruction. 2873 */ 2874 SKIP(2); 2875 SHRINK; 2876 2877 /* 2878 * Parse the target name and check for special support like 2879 * namespace. 2880 */ 2881 target = xmlParsePITarget(ctxt); 2882 if (target != NULL) { 2883 if ((RAW == '?') && (NXT(1) == '>')) { 2884 if (input != ctxt->input) { 2885 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2887 ctxt->sax->error(ctxt->userData, 2888 "PI declaration doesn't start and stop in the same entity\n"); 2889 ctxt->wellFormed = 0; 2890 ctxt->disableSAX = 1; 2891 } 2892 SKIP(2); 2893 2894 /* 2895 * SAX: PI detected. 2896 */ 2897 if ((ctxt->sax) && (!ctxt->disableSAX) && 2898 (ctxt->sax->processingInstruction != NULL)) 2899 ctxt->sax->processingInstruction(ctxt->userData, 2900 target, NULL); 2901 ctxt->instate = state; 2902 xmlFree(target); 2903 return; 2904 } 2905 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2906 if (buf == NULL) { 2907 xmlGenericError(xmlGenericErrorContext, 2908 "malloc of %d byte failed\n", size); 2909 ctxt->instate = state; 2910 return; 2911 } 2912 cur = CUR; 2913 if (!IS_BLANK(cur)) { 2914 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2916 ctxt->sax->error(ctxt->userData, 2917 "xmlParsePI: PI %s space expected\n", target); 2918 ctxt->wellFormed = 0; 2919 ctxt->disableSAX = 1; 2920 } 2921 SKIP_BLANKS; 2922 cur = CUR_CHAR(l); 2923 while (IS_CHAR(cur) && /* checked */ 2924 ((cur != '?') || (NXT(1) != '>'))) { 2925 if (len + 5 >= size) { 2926 size *= 2; 2927 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2928 if (buf == NULL) { 2929 xmlGenericError(xmlGenericErrorContext, 2930 "realloc of %d byte failed\n", size); 2931 ctxt->instate = state; 2932 return; 2933 } 2934 } 2935 count++; 2936 if (count > 50) { 2937 GROW; 2938 count = 0; 2939 } 2940 COPY_BUF(l,buf,len,cur); 2941 NEXTL(l); 2942 cur = CUR_CHAR(l); 2943 if (cur == 0) { 2944 SHRINK; 2945 GROW; 2946 cur = CUR_CHAR(l); 2947 } 2948 } 2949 buf[len] = 0; 2950 if (cur != '?') { 2951 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 2952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2953 ctxt->sax->error(ctxt->userData, 2954 "xmlParsePI: PI %s never end ...\n", target); 2955 ctxt->wellFormed = 0; 2956 ctxt->disableSAX = 1; 2957 } else { 2958 if (input != ctxt->input) { 2959 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2961 ctxt->sax->error(ctxt->userData, 2962 "PI declaration doesn't start and stop in the same entity\n"); 2963 ctxt->wellFormed = 0; 2964 ctxt->disableSAX = 1; 2965 } 2966 SKIP(2); 2967 2968 /* 2969 * SAX: PI detected. 2970 */ 2971 if ((ctxt->sax) && (!ctxt->disableSAX) && 2972 (ctxt->sax->processingInstruction != NULL)) 2973 ctxt->sax->processingInstruction(ctxt->userData, 2974 target, buf); 2975 } 2976 xmlFree(buf); 2977 xmlFree(target); 2978 } else { 2979 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 2980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2981 ctxt->sax->error(ctxt->userData, 2982 "xmlParsePI : no target name\n"); 2983 ctxt->wellFormed = 0; 2984 ctxt->disableSAX = 1; 2985 } 2986 ctxt->instate = state; 2987 } 2988} 2989 2990/** 2991 * xmlParseNotationDecl: 2992 * @ctxt: an XML parser context 2993 * 2994 * parse a notation declaration 2995 * 2996 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 2997 * 2998 * Hence there is actually 3 choices: 2999 * 'PUBLIC' S PubidLiteral 3000 * 'PUBLIC' S PubidLiteral S SystemLiteral 3001 * and 'SYSTEM' S SystemLiteral 3002 * 3003 * See the NOTE on xmlParseExternalID(). 3004 */ 3005 3006void 3007xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3008 xmlChar *name; 3009 xmlChar *Pubid; 3010 xmlChar *Systemid; 3011 3012 if ((RAW == '<') && (NXT(1) == '!') && 3013 (NXT(2) == 'N') && (NXT(3) == 'O') && 3014 (NXT(4) == 'T') && (NXT(5) == 'A') && 3015 (NXT(6) == 'T') && (NXT(7) == 'I') && 3016 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3017 xmlParserInputPtr input = ctxt->input; 3018 SHRINK; 3019 SKIP(10); 3020 if (!IS_BLANK(CUR)) { 3021 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3023 ctxt->sax->error(ctxt->userData, 3024 "Space required after '<!NOTATION'\n"); 3025 ctxt->wellFormed = 0; 3026 ctxt->disableSAX = 1; 3027 return; 3028 } 3029 SKIP_BLANKS; 3030 3031 name = xmlParseNameComplex(ctxt); 3032 if (name == NULL) { 3033 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3035 ctxt->sax->error(ctxt->userData, 3036 "NOTATION: Name expected here\n"); 3037 ctxt->wellFormed = 0; 3038 ctxt->disableSAX = 1; 3039 return; 3040 } 3041 if (!IS_BLANK(CUR)) { 3042 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3044 ctxt->sax->error(ctxt->userData, 3045 "Space required after the NOTATION name'\n"); 3046 ctxt->wellFormed = 0; 3047 ctxt->disableSAX = 1; 3048 return; 3049 } 3050 SKIP_BLANKS; 3051 3052 /* 3053 * Parse the IDs. 3054 */ 3055 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3056 SKIP_BLANKS; 3057 3058 if (RAW == '>') { 3059 if (input != ctxt->input) { 3060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3062 ctxt->sax->error(ctxt->userData, 3063"Notation declaration doesn't start and stop in the same entity\n"); 3064 ctxt->wellFormed = 0; 3065 ctxt->disableSAX = 1; 3066 } 3067 NEXT; 3068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3069 (ctxt->sax->notationDecl != NULL)) 3070 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3071 } else { 3072 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3074 ctxt->sax->error(ctxt->userData, 3075 "'>' required to close NOTATION declaration\n"); 3076 ctxt->wellFormed = 0; 3077 ctxt->disableSAX = 1; 3078 } 3079 xmlFree(name); 3080 if (Systemid != NULL) xmlFree(Systemid); 3081 if (Pubid != NULL) xmlFree(Pubid); 3082 } 3083} 3084 3085/** 3086 * xmlParseEntityDecl: 3087 * @ctxt: an XML parser context 3088 * 3089 * parse <!ENTITY declarations 3090 * 3091 * [70] EntityDecl ::= GEDecl | PEDecl 3092 * 3093 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3094 * 3095 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3096 * 3097 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3098 * 3099 * [74] PEDef ::= EntityValue | ExternalID 3100 * 3101 * [76] NDataDecl ::= S 'NDATA' S Name 3102 * 3103 * [ VC: Notation Declared ] 3104 * The Name must match the declared name of a notation. 3105 */ 3106 3107void 3108xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3109 xmlChar *name = NULL; 3110 xmlChar *value = NULL; 3111 xmlChar *URI = NULL, *literal = NULL; 3112 xmlChar *ndata = NULL; 3113 int isParameter = 0; 3114 xmlChar *orig = NULL; 3115 3116 GROW; 3117 if ((RAW == '<') && (NXT(1) == '!') && 3118 (NXT(2) == 'E') && (NXT(3) == 'N') && 3119 (NXT(4) == 'T') && (NXT(5) == 'I') && 3120 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3121 xmlParserInputPtr input = ctxt->input; 3122 ctxt->instate = XML_PARSER_ENTITY_DECL; 3123 SHRINK; 3124 SKIP(8); 3125 if (!IS_BLANK(CUR)) { 3126 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3128 ctxt->sax->error(ctxt->userData, 3129 "Space required after '<!ENTITY'\n"); 3130 ctxt->wellFormed = 0; 3131 ctxt->disableSAX = 1; 3132 } 3133 SKIP_BLANKS; 3134 3135 if (RAW == '%') { 3136 NEXT; 3137 if (!IS_BLANK(CUR)) { 3138 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3140 ctxt->sax->error(ctxt->userData, 3141 "Space required after '%'\n"); 3142 ctxt->wellFormed = 0; 3143 ctxt->disableSAX = 1; 3144 } 3145 SKIP_BLANKS; 3146 isParameter = 1; 3147 } 3148 3149 name = xmlParseNameComplex(ctxt); 3150 if (name == NULL) { 3151 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3153 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3154 ctxt->wellFormed = 0; 3155 ctxt->disableSAX = 1; 3156 return; 3157 } 3158 if (!IS_BLANK(CUR)) { 3159 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3161 ctxt->sax->error(ctxt->userData, 3162 "Space required after the entity name\n"); 3163 ctxt->wellFormed = 0; 3164 ctxt->disableSAX = 1; 3165 } 3166 SKIP_BLANKS; 3167 3168 /* 3169 * handle the various case of definitions... 3170 */ 3171 if (isParameter) { 3172 if ((RAW == '"') || (RAW == '\'')) { 3173 value = xmlParseEntityValue(ctxt, &orig); 3174 if (value) { 3175 if ((ctxt->sax != NULL) && 3176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3177 ctxt->sax->entityDecl(ctxt->userData, name, 3178 XML_INTERNAL_PARAMETER_ENTITY, 3179 NULL, NULL, value); 3180 } 3181 } else { 3182 URI = xmlParseExternalID(ctxt, &literal, 1); 3183 if ((URI == NULL) && (literal == NULL)) { 3184 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3186 ctxt->sax->error(ctxt->userData, 3187 "Entity value required\n"); 3188 ctxt->wellFormed = 0; 3189 ctxt->disableSAX = 1; 3190 } 3191 if (URI) { 3192 xmlURIPtr uri; 3193 3194 uri = xmlParseURI((const char *) URI); 3195 if (uri == NULL) { 3196 ctxt->errNo = XML_ERR_INVALID_URI; 3197 if ((ctxt->sax != NULL) && 3198 (!ctxt->disableSAX) && 3199 (ctxt->sax->error != NULL)) 3200 ctxt->sax->error(ctxt->userData, 3201 "Invalid URI: %s\n", URI); 3202 ctxt->wellFormed = 0; 3203 } else { 3204 if (uri->fragment != NULL) { 3205 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3206 if ((ctxt->sax != NULL) && 3207 (!ctxt->disableSAX) && 3208 (ctxt->sax->error != NULL)) 3209 ctxt->sax->error(ctxt->userData, 3210 "Fragment not allowed: %s\n", URI); 3211 ctxt->wellFormed = 0; 3212 } else { 3213 if ((ctxt->sax != NULL) && 3214 (!ctxt->disableSAX) && 3215 (ctxt->sax->entityDecl != NULL)) 3216 ctxt->sax->entityDecl(ctxt->userData, name, 3217 XML_EXTERNAL_PARAMETER_ENTITY, 3218 literal, URI, NULL); 3219 } 3220 xmlFreeURI(uri); 3221 } 3222 } 3223 } 3224 } else { 3225 if ((RAW == '"') || (RAW == '\'')) { 3226 value = xmlParseEntityValue(ctxt, &orig); 3227 if ((ctxt->sax != NULL) && 3228 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3229 ctxt->sax->entityDecl(ctxt->userData, name, 3230 XML_INTERNAL_GENERAL_ENTITY, 3231 NULL, NULL, value); 3232 } else { 3233 URI = xmlParseExternalID(ctxt, &literal, 1); 3234 if ((URI == NULL) && (literal == NULL)) { 3235 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3237 ctxt->sax->error(ctxt->userData, 3238 "Entity value required\n"); 3239 ctxt->wellFormed = 0; 3240 ctxt->disableSAX = 1; 3241 } 3242 if (URI) { 3243 xmlURIPtr uri; 3244 3245 uri = xmlParseURI((const char *)URI); 3246 if (uri == NULL) { 3247 ctxt->errNo = XML_ERR_INVALID_URI; 3248 if ((ctxt->sax != NULL) && 3249 (!ctxt->disableSAX) && 3250 (ctxt->sax->error != NULL)) 3251 ctxt->sax->error(ctxt->userData, 3252 "Invalid URI: %s\n", URI); 3253 ctxt->wellFormed = 0; 3254 } else { 3255 if (uri->fragment != NULL) { 3256 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3257 if ((ctxt->sax != NULL) && 3258 (!ctxt->disableSAX) && 3259 (ctxt->sax->error != NULL)) 3260 ctxt->sax->error(ctxt->userData, 3261 "Fragment not allowed: %s\n", URI); 3262 ctxt->wellFormed = 0; 3263 } 3264 xmlFreeURI(uri); 3265 } 3266 } 3267 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3268 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3270 ctxt->sax->error(ctxt->userData, 3271 "Space required before 'NDATA'\n"); 3272 ctxt->wellFormed = 0; 3273 ctxt->disableSAX = 1; 3274 } 3275 SKIP_BLANKS; 3276 if ((RAW == 'N') && (NXT(1) == 'D') && 3277 (NXT(2) == 'A') && (NXT(3) == 'T') && 3278 (NXT(4) == 'A')) { 3279 SKIP(5); 3280 if (!IS_BLANK(CUR)) { 3281 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3283 ctxt->sax->error(ctxt->userData, 3284 "Space required after 'NDATA'\n"); 3285 ctxt->wellFormed = 0; 3286 ctxt->disableSAX = 1; 3287 } 3288 SKIP_BLANKS; 3289 ndata = xmlParseNameComplex(ctxt); 3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3291 (ctxt->sax->unparsedEntityDecl != NULL)) 3292 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3293 literal, URI, ndata); 3294 } else { 3295 if ((ctxt->sax != NULL) && 3296 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3297 ctxt->sax->entityDecl(ctxt->userData, name, 3298 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3299 literal, URI, NULL); 3300 } 3301 } 3302 } 3303 SKIP_BLANKS; 3304 if (RAW != '>') { 3305 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3307 ctxt->sax->error(ctxt->userData, 3308 "xmlParseEntityDecl: entity %s not terminated\n", name); 3309 ctxt->wellFormed = 0; 3310 ctxt->disableSAX = 1; 3311 } else { 3312 if (input != ctxt->input) { 3313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3315 ctxt->sax->error(ctxt->userData, 3316"Entity declaration doesn't start and stop in the same entity\n"); 3317 ctxt->wellFormed = 0; 3318 ctxt->disableSAX = 1; 3319 } 3320 NEXT; 3321 } 3322 if (orig != NULL) { 3323 /* 3324 * Ugly mechanism to save the raw entity value. 3325 */ 3326 xmlEntityPtr cur = NULL; 3327 3328 if (isParameter) { 3329 if ((ctxt->sax != NULL) && 3330 (ctxt->sax->getParameterEntity != NULL)) 3331 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3332 } else { 3333 if ((ctxt->sax != NULL) && 3334 (ctxt->sax->getEntity != NULL)) 3335 cur = ctxt->sax->getEntity(ctxt->userData, name); 3336 } 3337 if (cur != NULL) { 3338 if (cur->orig != NULL) 3339 xmlFree(orig); 3340 else 3341 cur->orig = orig; 3342 } else 3343 xmlFree(orig); 3344 } 3345 if (name != NULL) xmlFree(name); 3346 if (value != NULL) xmlFree(value); 3347 if (URI != NULL) xmlFree(URI); 3348 if (literal != NULL) xmlFree(literal); 3349 if (ndata != NULL) xmlFree(ndata); 3350 } 3351} 3352 3353/** 3354 * xmlParseDefaultDecl: 3355 * @ctxt: an XML parser context 3356 * @value: Receive a possible fixed default value for the attribute 3357 * 3358 * Parse an attribute default declaration 3359 * 3360 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3361 * 3362 * [ VC: Required Attribute ] 3363 * if the default declaration is the keyword #REQUIRED, then the 3364 * attribute must be specified for all elements of the type in the 3365 * attribute-list declaration. 3366 * 3367 * [ VC: Attribute Default Legal ] 3368 * The declared default value must meet the lexical constraints of 3369 * the declared attribute type c.f. xmlValidateAttributeDecl() 3370 * 3371 * [ VC: Fixed Attribute Default ] 3372 * if an attribute has a default value declared with the #FIXED 3373 * keyword, instances of that attribute must match the default value. 3374 * 3375 * [ WFC: No < in Attribute Values ] 3376 * handled in xmlParseAttValue() 3377 * 3378 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3379 * or XML_ATTRIBUTE_FIXED. 3380 */ 3381 3382int 3383xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3384 int val; 3385 xmlChar *ret; 3386 3387 *value = NULL; 3388 if ((RAW == '#') && (NXT(1) == 'R') && 3389 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3390 (NXT(4) == 'U') && (NXT(5) == 'I') && 3391 (NXT(6) == 'R') && (NXT(7) == 'E') && 3392 (NXT(8) == 'D')) { 3393 SKIP(9); 3394 return(XML_ATTRIBUTE_REQUIRED); 3395 } 3396 if ((RAW == '#') && (NXT(1) == 'I') && 3397 (NXT(2) == 'M') && (NXT(3) == 'P') && 3398 (NXT(4) == 'L') && (NXT(5) == 'I') && 3399 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3400 SKIP(8); 3401 return(XML_ATTRIBUTE_IMPLIED); 3402 } 3403 val = XML_ATTRIBUTE_NONE; 3404 if ((RAW == '#') && (NXT(1) == 'F') && 3405 (NXT(2) == 'I') && (NXT(3) == 'X') && 3406 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3407 SKIP(6); 3408 val = XML_ATTRIBUTE_FIXED; 3409 if (!IS_BLANK(CUR)) { 3410 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3412 ctxt->sax->error(ctxt->userData, 3413 "Space required after '#FIXED'\n"); 3414 ctxt->wellFormed = 0; 3415 ctxt->disableSAX = 1; 3416 } 3417 SKIP_BLANKS; 3418 } 3419 ret = xmlParseAttValue(ctxt); 3420 ctxt->instate = XML_PARSER_DTD; 3421 if (ret == NULL) { 3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3423 ctxt->sax->error(ctxt->userData, 3424 "Attribute default value declaration error\n"); 3425 ctxt->wellFormed = 0; 3426 ctxt->disableSAX = 1; 3427 } else 3428 *value = ret; 3429 return(val); 3430} 3431 3432/** 3433 * xmlParseNotationType: 3434 * @ctxt: an XML parser context 3435 * 3436 * parse an Notation attribute type. 3437 * 3438 * Note: the leading 'NOTATION' S part has already being parsed... 3439 * 3440 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3441 * 3442 * [ VC: Notation Attributes ] 3443 * Values of this type must match one of the notation names included 3444 * in the declaration; all notation names in the declaration must be declared. 3445 * 3446 * Returns: the notation attribute tree built while parsing 3447 */ 3448 3449xmlEnumerationPtr 3450xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3451 xmlChar *name; 3452 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3453 3454 if (RAW != '(') { 3455 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3457 ctxt->sax->error(ctxt->userData, 3458 "'(' required to start 'NOTATION'\n"); 3459 ctxt->wellFormed = 0; 3460 ctxt->disableSAX = 1; 3461 return(NULL); 3462 } 3463 SHRINK; 3464 do { 3465 NEXT; 3466 SKIP_BLANKS; 3467 name = xmlParseNameComplex(ctxt); 3468 if (name == NULL) { 3469 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3471 ctxt->sax->error(ctxt->userData, 3472 "Name expected in NOTATION declaration\n"); 3473 ctxt->wellFormed = 0; 3474 ctxt->disableSAX = 1; 3475 return(ret); 3476 } 3477 cur = xmlCreateEnumeration(name); 3478 xmlFree(name); 3479 if (cur == NULL) return(ret); 3480 if (last == NULL) ret = last = cur; 3481 else { 3482 last->next = cur; 3483 last = cur; 3484 } 3485 SKIP_BLANKS; 3486 } while (RAW == '|'); 3487 if (RAW != ')') { 3488 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3490 ctxt->sax->error(ctxt->userData, 3491 "')' required to finish NOTATION declaration\n"); 3492 ctxt->wellFormed = 0; 3493 ctxt->disableSAX = 1; 3494 if ((last != NULL) && (last != ret)) 3495 xmlFreeEnumeration(last); 3496 return(ret); 3497 } 3498 NEXT; 3499 return(ret); 3500} 3501 3502/** 3503 * xmlParseEnumerationType: 3504 * @ctxt: an XML parser context 3505 * 3506 * parse an Enumeration attribute type. 3507 * 3508 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3509 * 3510 * [ VC: Enumeration ] 3511 * Values of this type must match one of the Nmtoken tokens in 3512 * the declaration 3513 * 3514 * Returns: the enumeration attribute tree built while parsing 3515 */ 3516 3517xmlEnumerationPtr 3518xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3519 xmlChar *name; 3520 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3521 3522 if (RAW != '(') { 3523 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3525 ctxt->sax->error(ctxt->userData, 3526 "'(' required to start ATTLIST enumeration\n"); 3527 ctxt->wellFormed = 0; 3528 ctxt->disableSAX = 1; 3529 return(NULL); 3530 } 3531 SHRINK; 3532 do { 3533 NEXT; 3534 SKIP_BLANKS; 3535 name = xmlParseNmtoken(ctxt); 3536 if (name == NULL) { 3537 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3539 ctxt->sax->error(ctxt->userData, 3540 "NmToken expected in ATTLIST enumeration\n"); 3541 ctxt->wellFormed = 0; 3542 ctxt->disableSAX = 1; 3543 return(ret); 3544 } 3545 cur = xmlCreateEnumeration(name); 3546 xmlFree(name); 3547 if (cur == NULL) return(ret); 3548 if (last == NULL) ret = last = cur; 3549 else { 3550 last->next = cur; 3551 last = cur; 3552 } 3553 SKIP_BLANKS; 3554 } while (RAW == '|'); 3555 if (RAW != ')') { 3556 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3558 ctxt->sax->error(ctxt->userData, 3559 "')' required to finish ATTLIST enumeration\n"); 3560 ctxt->wellFormed = 0; 3561 ctxt->disableSAX = 1; 3562 return(ret); 3563 } 3564 NEXT; 3565 return(ret); 3566} 3567 3568/** 3569 * xmlParseEnumeratedType: 3570 * @ctxt: an XML parser context 3571 * @tree: the enumeration tree built while parsing 3572 * 3573 * parse an Enumerated attribute type. 3574 * 3575 * [57] EnumeratedType ::= NotationType | Enumeration 3576 * 3577 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3578 * 3579 * 3580 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3581 */ 3582 3583int 3584xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3585 if ((RAW == 'N') && (NXT(1) == 'O') && 3586 (NXT(2) == 'T') && (NXT(3) == 'A') && 3587 (NXT(4) == 'T') && (NXT(5) == 'I') && 3588 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3589 SKIP(8); 3590 if (!IS_BLANK(CUR)) { 3591 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3593 ctxt->sax->error(ctxt->userData, 3594 "Space required after 'NOTATION'\n"); 3595 ctxt->wellFormed = 0; 3596 ctxt->disableSAX = 1; 3597 return(0); 3598 } 3599 SKIP_BLANKS; 3600 *tree = xmlParseNotationType(ctxt); 3601 if (*tree == NULL) return(0); 3602 return(XML_ATTRIBUTE_NOTATION); 3603 } 3604 *tree = xmlParseEnumerationType(ctxt); 3605 if (*tree == NULL) return(0); 3606 return(XML_ATTRIBUTE_ENUMERATION); 3607} 3608 3609/** 3610 * xmlParseAttributeType: 3611 * @ctxt: an XML parser context 3612 * @tree: the enumeration tree built while parsing 3613 * 3614 * parse the Attribute list def for an element 3615 * 3616 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3617 * 3618 * [55] StringType ::= 'CDATA' 3619 * 3620 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3621 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3622 * 3623 * Validity constraints for attribute values syntax are checked in 3624 * xmlValidateAttributeValue() 3625 * 3626 * [ VC: ID ] 3627 * Values of type ID must match the Name production. A name must not 3628 * appear more than once in an XML document as a value of this type; 3629 * i.e., ID values must uniquely identify the elements which bear them. 3630 * 3631 * [ VC: One ID per Element Type ] 3632 * No element type may have more than one ID attribute specified. 3633 * 3634 * [ VC: ID Attribute Default ] 3635 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3636 * 3637 * [ VC: IDREF ] 3638 * Values of type IDREF must match the Name production, and values 3639 * of type IDREFS must match Names; each IDREF Name must match the value 3640 * of an ID attribute on some element in the XML document; i.e. IDREF 3641 * values must match the value of some ID attribute. 3642 * 3643 * [ VC: Entity Name ] 3644 * Values of type ENTITY must match the Name production, values 3645 * of type ENTITIES must match Names; each Entity Name must match the 3646 * name of an unparsed entity declared in the DTD. 3647 * 3648 * [ VC: Name Token ] 3649 * Values of type NMTOKEN must match the Nmtoken production; values 3650 * of type NMTOKENS must match Nmtokens. 3651 * 3652 * Returns the attribute type 3653 */ 3654int 3655xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3656 SHRINK; 3657 if ((RAW == 'C') && (NXT(1) == 'D') && 3658 (NXT(2) == 'A') && (NXT(3) == 'T') && 3659 (NXT(4) == 'A')) { 3660 SKIP(5); 3661 return(XML_ATTRIBUTE_CDATA); 3662 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3663 (NXT(2) == 'R') && (NXT(3) == 'E') && 3664 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3665 SKIP(6); 3666 return(XML_ATTRIBUTE_IDREFS); 3667 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3668 (NXT(2) == 'R') && (NXT(3) == 'E') && 3669 (NXT(4) == 'F')) { 3670 SKIP(5); 3671 return(XML_ATTRIBUTE_IDREF); 3672 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3673 SKIP(2); 3674 return(XML_ATTRIBUTE_ID); 3675 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3676 (NXT(2) == 'T') && (NXT(3) == 'I') && 3677 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3678 SKIP(6); 3679 return(XML_ATTRIBUTE_ENTITY); 3680 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3681 (NXT(2) == 'T') && (NXT(3) == 'I') && 3682 (NXT(4) == 'T') && (NXT(5) == 'I') && 3683 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3684 SKIP(8); 3685 return(XML_ATTRIBUTE_ENTITIES); 3686 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3687 (NXT(2) == 'T') && (NXT(3) == 'O') && 3688 (NXT(4) == 'K') && (NXT(5) == 'E') && 3689 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3690 SKIP(8); 3691 return(XML_ATTRIBUTE_NMTOKENS); 3692 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3693 (NXT(2) == 'T') && (NXT(3) == 'O') && 3694 (NXT(4) == 'K') && (NXT(5) == 'E') && 3695 (NXT(6) == 'N')) { 3696 SKIP(7); 3697 return(XML_ATTRIBUTE_NMTOKEN); 3698 } 3699 return(xmlParseEnumeratedType(ctxt, tree)); 3700} 3701 3702/** 3703 * xmlParseAttributeListDecl: 3704 * @ctxt: an XML parser context 3705 * 3706 * : parse the Attribute list def for an element 3707 * 3708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3709 * 3710 * [53] AttDef ::= S Name S AttType S DefaultDecl 3711 * 3712 */ 3713void 3714xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3715 xmlChar *elemName; 3716 xmlChar *attrName; 3717 xmlEnumerationPtr tree; 3718 3719 if ((RAW == '<') && (NXT(1) == '!') && 3720 (NXT(2) == 'A') && (NXT(3) == 'T') && 3721 (NXT(4) == 'T') && (NXT(5) == 'L') && 3722 (NXT(6) == 'I') && (NXT(7) == 'S') && 3723 (NXT(8) == 'T')) { 3724 xmlParserInputPtr input = ctxt->input; 3725 3726 SKIP(9); 3727 if (!IS_BLANK(CUR)) { 3728 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3730 ctxt->sax->error(ctxt->userData, 3731 "Space required after '<!ATTLIST'\n"); 3732 ctxt->wellFormed = 0; 3733 ctxt->disableSAX = 1; 3734 } 3735 SKIP_BLANKS; 3736 elemName = xmlParseNameComplex(ctxt); 3737 if (elemName == NULL) { 3738 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3740 ctxt->sax->error(ctxt->userData, 3741 "ATTLIST: no name for Element\n"); 3742 ctxt->wellFormed = 0; 3743 ctxt->disableSAX = 1; 3744 return; 3745 } 3746 SKIP_BLANKS; 3747 GROW; 3748 while (RAW != '>') { 3749 const xmlChar *check = CUR_PTR; 3750 int type; 3751 int def; 3752 xmlChar *defaultValue = NULL; 3753 3754 GROW; 3755 tree = NULL; 3756 attrName = xmlParseNameComplex(ctxt); 3757 if (attrName == NULL) { 3758 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3760 ctxt->sax->error(ctxt->userData, 3761 "ATTLIST: no name for Attribute\n"); 3762 ctxt->wellFormed = 0; 3763 ctxt->disableSAX = 1; 3764 break; 3765 } 3766 GROW; 3767 if (!IS_BLANK(CUR)) { 3768 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3770 ctxt->sax->error(ctxt->userData, 3771 "Space required after the attribute name\n"); 3772 ctxt->wellFormed = 0; 3773 ctxt->disableSAX = 1; 3774 if (attrName != NULL) 3775 xmlFree(attrName); 3776 if (defaultValue != NULL) 3777 xmlFree(defaultValue); 3778 break; 3779 } 3780 SKIP_BLANKS; 3781 3782 type = xmlParseAttributeType(ctxt, &tree); 3783 if (type <= 0) { 3784 if (attrName != NULL) 3785 xmlFree(attrName); 3786 if (defaultValue != NULL) 3787 xmlFree(defaultValue); 3788 break; 3789 } 3790 3791 GROW; 3792 if (!IS_BLANK(CUR)) { 3793 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3795 ctxt->sax->error(ctxt->userData, 3796 "Space required after the attribute type\n"); 3797 ctxt->wellFormed = 0; 3798 ctxt->disableSAX = 1; 3799 if (attrName != NULL) 3800 xmlFree(attrName); 3801 if (defaultValue != NULL) 3802 xmlFree(defaultValue); 3803 if (tree != NULL) 3804 xmlFreeEnumeration(tree); 3805 break; 3806 } 3807 SKIP_BLANKS; 3808 3809 def = xmlParseDefaultDecl(ctxt, &defaultValue); 3810 if (def <= 0) { 3811 if (attrName != NULL) 3812 xmlFree(attrName); 3813 if (defaultValue != NULL) 3814 xmlFree(defaultValue); 3815 if (tree != NULL) 3816 xmlFreeEnumeration(tree); 3817 break; 3818 } 3819 3820 GROW; 3821 if (RAW != '>') { 3822 if (!IS_BLANK(CUR)) { 3823 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3825 ctxt->sax->error(ctxt->userData, 3826 "Space required after the attribute default value\n"); 3827 ctxt->wellFormed = 0; 3828 ctxt->disableSAX = 1; 3829 if (attrName != NULL) 3830 xmlFree(attrName); 3831 if (defaultValue != NULL) 3832 xmlFree(defaultValue); 3833 if (tree != NULL) 3834 xmlFreeEnumeration(tree); 3835 break; 3836 } 3837 SKIP_BLANKS; 3838 } 3839 if (check == CUR_PTR) { 3840 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 3841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3842 ctxt->sax->error(ctxt->userData, 3843 "xmlParseAttributeListDecl: detected internal error\n"); 3844 if (attrName != NULL) 3845 xmlFree(attrName); 3846 if (defaultValue != NULL) 3847 xmlFree(defaultValue); 3848 if (tree != NULL) 3849 xmlFreeEnumeration(tree); 3850 break; 3851 } 3852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3853 (ctxt->sax->attributeDecl != NULL)) 3854 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 3855 type, def, defaultValue, tree); 3856 if (attrName != NULL) 3857 xmlFree(attrName); 3858 if (defaultValue != NULL) 3859 xmlFree(defaultValue); 3860 GROW; 3861 } 3862 if (RAW == '>') { 3863 if (input != ctxt->input) { 3864 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3866 ctxt->sax->error(ctxt->userData, 3867"Attribute list declaration doesn't start and stop in the same entity\n"); 3868 ctxt->wellFormed = 0; 3869 ctxt->disableSAX = 1; 3870 } 3871 NEXT; 3872 } 3873 3874 xmlFree(elemName); 3875 } 3876} 3877 3878/** 3879 * xmlParseElementMixedContentDecl: 3880 * @ctxt: an XML parser context 3881 * 3882 * parse the declaration for a Mixed Element content 3883 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3884 * 3885 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 3886 * '(' S? '#PCDATA' S? ')' 3887 * 3888 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 3889 * 3890 * [ VC: No Duplicate Types ] 3891 * The same name must not appear more than once in a single 3892 * mixed-content declaration. 3893 * 3894 * returns: the list of the xmlElementContentPtr describing the element choices 3895 */ 3896xmlElementContentPtr 3897xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 3898 xmlElementContentPtr ret = NULL, cur = NULL, n; 3899 xmlChar *elem = NULL; 3900 3901 GROW; 3902 if ((RAW == '#') && (NXT(1) == 'P') && 3903 (NXT(2) == 'C') && (NXT(3) == 'D') && 3904 (NXT(4) == 'A') && (NXT(5) == 'T') && 3905 (NXT(6) == 'A')) { 3906 SKIP(7); 3907 SKIP_BLANKS; 3908 SHRINK; 3909 if (RAW == ')') { 3910 ctxt->entity = ctxt->input; 3911 NEXT; 3912 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3913 if (RAW == '*') { 3914 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3915 NEXT; 3916 } 3917 return(ret); 3918 } 3919 if ((RAW == '(') || (RAW == '|')) { 3920 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3921 if (ret == NULL) return(NULL); 3922 } 3923 while (RAW == '|') { 3924 NEXT; 3925 if (elem == NULL) { 3926 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3927 if (ret == NULL) return(NULL); 3928 ret->c1 = cur; 3929 if (cur != NULL) 3930 cur->parent = ret; 3931 cur = ret; 3932 } else { 3933 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3934 if (n == NULL) return(NULL); 3935 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 3936 if (n->c1 != NULL) 3937 n->c1->parent = n; 3938 cur->c2 = n; 3939 if (n != NULL) 3940 n->parent = cur; 3941 cur = n; 3942 xmlFree(elem); 3943 } 3944 SKIP_BLANKS; 3945 elem = xmlParseNameComplex(ctxt); 3946 if (elem == NULL) { 3947 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3949 ctxt->sax->error(ctxt->userData, 3950 "xmlParseElementMixedContentDecl : Name expected\n"); 3951 ctxt->wellFormed = 0; 3952 ctxt->disableSAX = 1; 3953 xmlFreeElementContent(cur); 3954 return(NULL); 3955 } 3956 SKIP_BLANKS; 3957 GROW; 3958 } 3959 if ((RAW == ')') && (NXT(1) == '*')) { 3960 if (elem != NULL) { 3961 cur->c2 = xmlNewElementContent(elem, 3962 XML_ELEMENT_CONTENT_ELEMENT); 3963 if (cur->c2 != NULL) 3964 cur->c2->parent = cur; 3965 xmlFree(elem); 3966 } 3967 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3968 ctxt->entity = ctxt->input; 3969 SKIP(2); 3970 } else { 3971 if (elem != NULL) xmlFree(elem); 3972 xmlFreeElementContent(ret); 3973 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 3974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3975 ctxt->sax->error(ctxt->userData, 3976 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 3977 ctxt->wellFormed = 0; 3978 ctxt->disableSAX = 1; 3979 return(NULL); 3980 } 3981 3982 } else { 3983 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 3984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3985 ctxt->sax->error(ctxt->userData, 3986 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 3987 ctxt->wellFormed = 0; 3988 ctxt->disableSAX = 1; 3989 } 3990 return(ret); 3991} 3992 3993/** 3994 * xmlParseElementChildrenContentDecl: 3995 * @ctxt: an XML parser context 3996 * 3997 * parse the declaration for a Mixed Element content 3998 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3999 * 4000 * 4001 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4002 * 4003 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4004 * 4005 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4006 * 4007 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4008 * 4009 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4010 * TODO Parameter-entity replacement text must be properly nested 4011 * with parenthetized groups. That is to say, if either of the 4012 * opening or closing parentheses in a choice, seq, or Mixed 4013 * construct is contained in the replacement text for a parameter 4014 * entity, both must be contained in the same replacement text. For 4015 * interoperability, if a parameter-entity reference appears in a 4016 * choice, seq, or Mixed construct, its replacement text should not 4017 * be empty, and neither the first nor last non-blank character of 4018 * the replacement text should be a connector (| or ,). 4019 * 4020 * returns: the tree of xmlElementContentPtr describing the element 4021 * hierarchy. 4022 */ 4023xmlElementContentPtr 4024#ifdef VMS 4025xmlParseElementChildrenContentD 4026#else 4027xmlParseElementChildrenContentDecl 4028#endif 4029(xmlParserCtxtPtr ctxt) { 4030 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4031 xmlChar *elem; 4032 xmlChar type = 0; 4033 4034 SKIP_BLANKS; 4035 GROW; 4036 if (RAW == '(') { 4037 /* Recurse on first child */ 4038 NEXT; 4039 SKIP_BLANKS; 4040 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4041 SKIP_BLANKS; 4042 GROW; 4043 } else { 4044 elem = xmlParseNameComplex(ctxt); 4045 if (elem == NULL) { 4046 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4048 ctxt->sax->error(ctxt->userData, 4049 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4050 ctxt->wellFormed = 0; 4051 ctxt->disableSAX = 1; 4052 return(NULL); 4053 } 4054 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4055 GROW; 4056 if (RAW == '?') { 4057 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4058 NEXT; 4059 } else if (RAW == '*') { 4060 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4061 NEXT; 4062 } else if (RAW == '+') { 4063 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4064 NEXT; 4065 } else { 4066 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4067 } 4068 xmlFree(elem); 4069 GROW; 4070 } 4071 SKIP_BLANKS; 4072 SHRINK; 4073 while (RAW != ')') { 4074 /* 4075 * Each loop we parse one separator and one element. 4076 */ 4077 if (RAW == ',') { 4078 if (type == 0) type = CUR; 4079 4080 /* 4081 * Detect "Name | Name , Name" error 4082 */ 4083 else if (type != CUR) { 4084 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4086 ctxt->sax->error(ctxt->userData, 4087 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4088 type); 4089 ctxt->wellFormed = 0; 4090 ctxt->disableSAX = 1; 4091 if ((op != NULL) && (op != ret)) 4092 xmlFreeElementContent(op); 4093 if ((last != NULL) && (last != ret) && 4094 (last != ret->c1) && (last != ret->c2)) 4095 xmlFreeElementContent(last); 4096 if (ret != NULL) 4097 xmlFreeElementContent(ret); 4098 return(NULL); 4099 } 4100 NEXT; 4101 4102 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4103 if (op == NULL) { 4104 xmlFreeElementContent(ret); 4105 return(NULL); 4106 } 4107 if (last == NULL) { 4108 op->c1 = ret; 4109 if (ret != NULL) 4110 ret->parent = op; 4111 ret = cur = op; 4112 } else { 4113 cur->c2 = op; 4114 if (op != NULL) 4115 op->parent = cur; 4116 op->c1 = last; 4117 if (last != NULL) 4118 last->parent = op; 4119 cur =op; 4120 last = NULL; 4121 } 4122 } else if (RAW == '|') { 4123 if (type == 0) type = CUR; 4124 4125 /* 4126 * Detect "Name , Name | Name" error 4127 */ 4128 else if (type != CUR) { 4129 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4131 ctxt->sax->error(ctxt->userData, 4132 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4133 type); 4134 ctxt->wellFormed = 0; 4135 ctxt->disableSAX = 1; 4136 if ((op != NULL) && (op != ret) && (op != last)) 4137 xmlFreeElementContent(op); 4138 if ((last != NULL) && (last != ret) && 4139 (last != ret->c1) && (last != ret->c2)) 4140 xmlFreeElementContent(last); 4141 if (ret != NULL) 4142 xmlFreeElementContent(ret); 4143 return(NULL); 4144 } 4145 NEXT; 4146 4147 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4148 if (op == NULL) { 4149 if ((op != NULL) && (op != ret)) 4150 xmlFreeElementContent(op); 4151 if ((last != NULL) && (last != ret) && 4152 (last != ret->c1) && (last != ret->c2)) 4153 xmlFreeElementContent(last); 4154 if (ret != NULL) 4155 xmlFreeElementContent(ret); 4156 return(NULL); 4157 } 4158 if (last == NULL) { 4159 op->c1 = ret; 4160 if (ret != NULL) 4161 ret->parent = op; 4162 ret = cur = op; 4163 } else { 4164 cur->c2 = op; 4165 if (op != NULL) 4166 op->parent = cur; 4167 op->c1 = last; 4168 if (last != NULL) 4169 last->parent = op; 4170 cur =op; 4171 last = NULL; 4172 } 4173 } else { 4174 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4176 ctxt->sax->error(ctxt->userData, 4177 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4178 ctxt->wellFormed = 0; 4179 ctxt->disableSAX = 1; 4180 if ((op != NULL) && (op != ret)) 4181 xmlFreeElementContent(op); 4182 if ((last != NULL) && (last != ret) && 4183 (last != ret->c1) && (last != ret->c2)) 4184 xmlFreeElementContent(last); 4185 if (ret != NULL) 4186 xmlFreeElementContent(ret); 4187 return(NULL); 4188 } 4189 GROW; 4190 SKIP_BLANKS; 4191 GROW; 4192 if (RAW == '(') { 4193 /* Recurse on second child */ 4194 NEXT; 4195 SKIP_BLANKS; 4196 last = xmlParseElementChildrenContentDecl(ctxt); 4197 SKIP_BLANKS; 4198 } else { 4199 elem = xmlParseNameComplex(ctxt); 4200 if (elem == NULL) { 4201 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4203 ctxt->sax->error(ctxt->userData, 4204 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4205 ctxt->wellFormed = 0; 4206 ctxt->disableSAX = 1; 4207 if ((op != NULL) && (op != ret)) 4208 xmlFreeElementContent(op); 4209 if ((last != NULL) && (last != ret) && 4210 (last != ret->c1) && (last != ret->c2)) 4211 xmlFreeElementContent(last); 4212 if (ret != NULL) 4213 xmlFreeElementContent(ret); 4214 return(NULL); 4215 } 4216 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4217 xmlFree(elem); 4218 if (RAW == '?') { 4219 last->ocur = XML_ELEMENT_CONTENT_OPT; 4220 NEXT; 4221 } else if (RAW == '*') { 4222 last->ocur = XML_ELEMENT_CONTENT_MULT; 4223 NEXT; 4224 } else if (RAW == '+') { 4225 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4226 NEXT; 4227 } else { 4228 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4229 } 4230 } 4231 SKIP_BLANKS; 4232 GROW; 4233 } 4234 if ((cur != NULL) && (last != NULL)) { 4235 cur->c2 = last; 4236 if (last != NULL) 4237 last->parent = cur; 4238 } 4239 ctxt->entity = ctxt->input; 4240 NEXT; 4241 if (RAW == '?') { 4242 if (ret != NULL) 4243 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4244 NEXT; 4245 } else if (RAW == '*') { 4246 if (ret != NULL) 4247 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4248 NEXT; 4249 } else if (RAW == '+') { 4250 if (ret != NULL) 4251 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4252 NEXT; 4253 } 4254 return(ret); 4255} 4256 4257/** 4258 * xmlParseElementContentDecl: 4259 * @ctxt: an XML parser context 4260 * @name: the name of the element being defined. 4261 * @result: the Element Content pointer will be stored here if any 4262 * 4263 * parse the declaration for an Element content either Mixed or Children, 4264 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4265 * 4266 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4267 * 4268 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4269 */ 4270 4271int 4272xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4273 xmlElementContentPtr *result) { 4274 4275 xmlElementContentPtr tree = NULL; 4276 xmlParserInputPtr input = ctxt->input; 4277 int res; 4278 4279 *result = NULL; 4280 4281 if (RAW != '(') { 4282 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4284 ctxt->sax->error(ctxt->userData, 4285 "xmlParseElementContentDecl : %s '(' expected\n", name); 4286 ctxt->wellFormed = 0; 4287 ctxt->disableSAX = 1; 4288 return(-1); 4289 } 4290 NEXT; 4291 GROW; 4292 SKIP_BLANKS; 4293 if ((RAW == '#') && (NXT(1) == 'P') && 4294 (NXT(2) == 'C') && (NXT(3) == 'D') && 4295 (NXT(4) == 'A') && (NXT(5) == 'T') && 4296 (NXT(6) == 'A')) { 4297 tree = xmlParseElementMixedContentDecl(ctxt); 4298 res = XML_ELEMENT_TYPE_MIXED; 4299 } else { 4300 tree = xmlParseElementChildrenContentDecl(ctxt); 4301 res = XML_ELEMENT_TYPE_ELEMENT; 4302 } 4303 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4304 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4306 ctxt->sax->error(ctxt->userData, 4307"Element content declaration doesn't start and stop in the same entity\n"); 4308 ctxt->wellFormed = 0; 4309 ctxt->disableSAX = 1; 4310 } 4311 SKIP_BLANKS; 4312 *result = tree; 4313 return(res); 4314} 4315 4316/** 4317 * xmlParseElementDecl: 4318 * @ctxt: an XML parser context 4319 * 4320 * parse an Element declaration. 4321 * 4322 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4323 * 4324 * [ VC: Unique Element Type Declaration ] 4325 * No element type may be declared more than once 4326 * 4327 * Returns the type of the element, or -1 in case of error 4328 */ 4329int 4330xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4331 xmlChar *name; 4332 int ret = -1; 4333 xmlElementContentPtr content = NULL; 4334 4335 GROW; 4336 if ((RAW == '<') && (NXT(1) == '!') && 4337 (NXT(2) == 'E') && (NXT(3) == 'L') && 4338 (NXT(4) == 'E') && (NXT(5) == 'M') && 4339 (NXT(6) == 'E') && (NXT(7) == 'N') && 4340 (NXT(8) == 'T')) { 4341 xmlParserInputPtr input = ctxt->input; 4342 4343 SKIP(9); 4344 if (!IS_BLANK(CUR)) { 4345 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4347 ctxt->sax->error(ctxt->userData, 4348 "Space required after 'ELEMENT'\n"); 4349 ctxt->wellFormed = 0; 4350 ctxt->disableSAX = 1; 4351 } 4352 SKIP_BLANKS; 4353 name = xmlParseNameComplex(ctxt); 4354 if (name == NULL) { 4355 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4357 ctxt->sax->error(ctxt->userData, 4358 "xmlParseElementDecl: no name for Element\n"); 4359 ctxt->wellFormed = 0; 4360 ctxt->disableSAX = 1; 4361 return(-1); 4362 } 4363 while ((RAW == 0) && (ctxt->inputNr > 1)) 4364 xmlPopInput(ctxt); 4365 if (!IS_BLANK(CUR)) { 4366 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4368 ctxt->sax->error(ctxt->userData, 4369 "Space required after the element name\n"); 4370 ctxt->wellFormed = 0; 4371 ctxt->disableSAX = 1; 4372 } 4373 SKIP_BLANKS; 4374 if ((RAW == 'E') && (NXT(1) == 'M') && 4375 (NXT(2) == 'P') && (NXT(3) == 'T') && 4376 (NXT(4) == 'Y')) { 4377 SKIP(5); 4378 /* 4379 * Element must always be empty. 4380 */ 4381 ret = XML_ELEMENT_TYPE_EMPTY; 4382 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4383 (NXT(2) == 'Y')) { 4384 SKIP(3); 4385 /* 4386 * Element is a generic container. 4387 */ 4388 ret = XML_ELEMENT_TYPE_ANY; 4389 } else if (RAW == '(') { 4390 ret = xmlParseElementContentDecl(ctxt, name, &content); 4391 } else { 4392 /* 4393 * [ WFC: PEs in Internal Subset ] error handling. 4394 */ 4395 if ((RAW == '%') && (ctxt->external == 0) && 4396 (ctxt->inputNr == 1)) { 4397 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4399 ctxt->sax->error(ctxt->userData, 4400 "PEReference: forbidden within markup decl in internal subset\n"); 4401 } else { 4402 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4404 ctxt->sax->error(ctxt->userData, 4405 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4406 } 4407 ctxt->wellFormed = 0; 4408 ctxt->disableSAX = 1; 4409 if (name != NULL) xmlFree(name); 4410 return(-1); 4411 } 4412 4413 SKIP_BLANKS; 4414 /* 4415 * Pop-up of finished entities. 4416 */ 4417 while ((RAW == 0) && (ctxt->inputNr > 1)) 4418 xmlPopInput(ctxt); 4419 SKIP_BLANKS; 4420 4421 if (RAW != '>') { 4422 ctxt->errNo = XML_ERR_GT_REQUIRED; 4423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4424 ctxt->sax->error(ctxt->userData, 4425 "xmlParseElementDecl: expected '>' at the end\n"); 4426 ctxt->wellFormed = 0; 4427 ctxt->disableSAX = 1; 4428 } else { 4429 if (input != ctxt->input) { 4430 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4432 ctxt->sax->error(ctxt->userData, 4433"Element declaration doesn't start and stop in the same entity\n"); 4434 ctxt->wellFormed = 0; 4435 ctxt->disableSAX = 1; 4436 } 4437 4438 NEXT; 4439 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4440 (ctxt->sax->elementDecl != NULL)) 4441 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4442 content); 4443 } 4444 if (content != NULL) { 4445 xmlFreeElementContent(content); 4446 } 4447 if (name != NULL) { 4448 xmlFree(name); 4449 } 4450 } 4451 return(ret); 4452} 4453 4454/** 4455 * xmlParseMarkupDecl: 4456 * @ctxt: an XML parser context 4457 * 4458 * parse Markup declarations 4459 * 4460 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4461 * NotationDecl | PI | Comment 4462 * 4463 * [ VC: Proper Declaration/PE Nesting ] 4464 * Parameter-entity replacement text must be properly nested with 4465 * markup declarations. That is to say, if either the first character 4466 * or the last character of a markup declaration (markupdecl above) is 4467 * contained in the replacement text for a parameter-entity reference, 4468 * both must be contained in the same replacement text. 4469 * 4470 * [ WFC: PEs in Internal Subset ] 4471 * In the internal DTD subset, parameter-entity references can occur 4472 * only where markup declarations can occur, not within markup declarations. 4473 * (This does not apply to references that occur in external parameter 4474 * entities or to the external subset.) 4475 */ 4476void 4477xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4478 GROW; 4479 xmlParseElementDecl(ctxt); 4480 xmlParseAttributeListDecl(ctxt); 4481 xmlParseEntityDecl(ctxt); 4482 xmlParseNotationDecl(ctxt); 4483 xmlParsePI(ctxt); 4484 xmlParseComment(ctxt); 4485 /* 4486 * This is only for internal subset. On external entities, 4487 * the replacement is done before parsing stage 4488 */ 4489 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4490 xmlParsePEReference(ctxt); 4491 ctxt->instate = XML_PARSER_DTD; 4492} 4493 4494/** 4495 * xmlParseTextDecl: 4496 * @ctxt: an XML parser context 4497 * 4498 * parse an XML declaration header for external entities 4499 * 4500 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4501 * 4502 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4503 */ 4504 4505void 4506xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4507 xmlChar *version; 4508 4509 /* 4510 * We know that '<?xml' is here. 4511 */ 4512 if ((RAW == '<') && (NXT(1) == '?') && 4513 (NXT(2) == 'x') && (NXT(3) == 'm') && 4514 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4515 SKIP(5); 4516 } else { 4517 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4519 ctxt->sax->error(ctxt->userData, 4520 "Text declaration '<?xml' required\n"); 4521 ctxt->wellFormed = 0; 4522 ctxt->disableSAX = 1; 4523 4524 return; 4525 } 4526 4527 if (!IS_BLANK(CUR)) { 4528 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4530 ctxt->sax->error(ctxt->userData, 4531 "Space needed after '<?xml'\n"); 4532 ctxt->wellFormed = 0; 4533 ctxt->disableSAX = 1; 4534 } 4535 SKIP_BLANKS; 4536 4537 /* 4538 * We may have the VersionInfo here. 4539 */ 4540 version = xmlParseVersionInfo(ctxt); 4541 if (version == NULL) 4542 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4543 ctxt->input->version = version; 4544 4545 /* 4546 * We must have the encoding declaration 4547 */ 4548 if (!IS_BLANK(CUR)) { 4549 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4551 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4552 ctxt->wellFormed = 0; 4553 ctxt->disableSAX = 1; 4554 } 4555 xmlParseEncodingDecl(ctxt); 4556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4557 /* 4558 * The XML REC instructs us to stop parsing right here 4559 */ 4560 return; 4561 } 4562 4563 SKIP_BLANKS; 4564 if ((RAW == '?') && (NXT(1) == '>')) { 4565 SKIP(2); 4566 } else if (RAW == '>') { 4567 /* Deprecated old WD ... */ 4568 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4570 ctxt->sax->error(ctxt->userData, 4571 "XML declaration must end-up with '?>'\n"); 4572 ctxt->wellFormed = 0; 4573 ctxt->disableSAX = 1; 4574 NEXT; 4575 } else { 4576 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4578 ctxt->sax->error(ctxt->userData, 4579 "parsing XML declaration: '?>' expected\n"); 4580 ctxt->wellFormed = 0; 4581 ctxt->disableSAX = 1; 4582 MOVETO_ENDTAG(CUR_PTR); 4583 NEXT; 4584 } 4585} 4586 4587/* 4588 * xmlParseConditionalSections 4589 * @ctxt: an XML parser context 4590 * 4591 * [61] conditionalSect ::= includeSect | ignoreSect 4592 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4593 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4594 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4595 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4596 */ 4597 4598static void 4599xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4600 SKIP(3); 4601 SKIP_BLANKS; 4602 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4603 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4604 (NXT(6) == 'E')) { 4605 SKIP(7); 4606 SKIP_BLANKS; 4607 if (RAW != '[') { 4608 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4610 ctxt->sax->error(ctxt->userData, 4611 "XML conditional section '[' expected\n"); 4612 ctxt->wellFormed = 0; 4613 ctxt->disableSAX = 1; 4614 } else { 4615 NEXT; 4616 } 4617 if (xmlParserDebugEntities) { 4618 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4619 xmlGenericError(xmlGenericErrorContext, 4620 "%s(%d): ", ctxt->input->filename, 4621 ctxt->input->line); 4622 xmlGenericError(xmlGenericErrorContext, 4623 "Entering INCLUDE Conditional Section\n"); 4624 } 4625 4626 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4627 (NXT(2) != '>'))) { 4628 const xmlChar *check = CUR_PTR; 4629 int cons = ctxt->input->consumed; 4630 int tok = ctxt->token; 4631 4632 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4633 xmlParseConditionalSections(ctxt); 4634 } else if (IS_BLANK(CUR)) { 4635 NEXT; 4636 } else if (RAW == '%') { 4637 xmlParsePEReference(ctxt); 4638 } else 4639 xmlParseMarkupDecl(ctxt); 4640 4641 /* 4642 * Pop-up of finished entities. 4643 */ 4644 while ((RAW == 0) && (ctxt->inputNr > 1)) 4645 xmlPopInput(ctxt); 4646 4647 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4648 (tok == ctxt->token)) { 4649 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4651 ctxt->sax->error(ctxt->userData, 4652 "Content error in the external subset\n"); 4653 ctxt->wellFormed = 0; 4654 ctxt->disableSAX = 1; 4655 break; 4656 } 4657 } 4658 if (xmlParserDebugEntities) { 4659 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4660 xmlGenericError(xmlGenericErrorContext, 4661 "%s(%d): ", ctxt->input->filename, 4662 ctxt->input->line); 4663 xmlGenericError(xmlGenericErrorContext, 4664 "Leaving INCLUDE Conditional Section\n"); 4665 } 4666 4667 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4668 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4669 int state; 4670 int instate; 4671 int depth = 0; 4672 4673 SKIP(6); 4674 SKIP_BLANKS; 4675 if (RAW != '[') { 4676 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4678 ctxt->sax->error(ctxt->userData, 4679 "XML conditional section '[' expected\n"); 4680 ctxt->wellFormed = 0; 4681 ctxt->disableSAX = 1; 4682 } else { 4683 NEXT; 4684 } 4685 if (xmlParserDebugEntities) { 4686 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4687 xmlGenericError(xmlGenericErrorContext, 4688 "%s(%d): ", ctxt->input->filename, 4689 ctxt->input->line); 4690 xmlGenericError(xmlGenericErrorContext, 4691 "Entering IGNORE Conditional Section\n"); 4692 } 4693 4694 /* 4695 * Parse up to the end of the conditionnal section 4696 * But disable SAX event generating DTD building in the meantime 4697 */ 4698 state = ctxt->disableSAX; 4699 instate = ctxt->instate; 4700 ctxt->disableSAX = 1; 4701 ctxt->instate = XML_PARSER_IGNORE; 4702 4703 while (depth >= 0) { 4704 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4705 depth++; 4706 SKIP(3); 4707 continue; 4708 } 4709 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4710 if (--depth >= 0) SKIP(3); 4711 continue; 4712 } 4713 NEXT; 4714 continue; 4715 } 4716 4717 ctxt->disableSAX = state; 4718 ctxt->instate = instate; 4719 4720 if (xmlParserDebugEntities) { 4721 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4722 xmlGenericError(xmlGenericErrorContext, 4723 "%s(%d): ", ctxt->input->filename, 4724 ctxt->input->line); 4725 xmlGenericError(xmlGenericErrorContext, 4726 "Leaving IGNORE Conditional Section\n"); 4727 } 4728 4729 } else { 4730 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4732 ctxt->sax->error(ctxt->userData, 4733 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4734 ctxt->wellFormed = 0; 4735 ctxt->disableSAX = 1; 4736 } 4737 4738 if (RAW == 0) 4739 SHRINK; 4740 4741 if (RAW == 0) { 4742 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4744 ctxt->sax->error(ctxt->userData, 4745 "XML conditional section not closed\n"); 4746 ctxt->wellFormed = 0; 4747 ctxt->disableSAX = 1; 4748 } else { 4749 SKIP(3); 4750 } 4751} 4752 4753/** 4754 * xmlParseExternalSubset: 4755 * @ctxt: an XML parser context 4756 * @ExternalID: the external identifier 4757 * @SystemID: the system identifier (or URL) 4758 * 4759 * parse Markup declarations from an external subset 4760 * 4761 * [30] extSubset ::= textDecl? extSubsetDecl 4762 * 4763 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4764 */ 4765void 4766xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4767 const xmlChar *SystemID) { 4768 GROW; 4769 if ((RAW == '<') && (NXT(1) == '?') && 4770 (NXT(2) == 'x') && (NXT(3) == 'm') && 4771 (NXT(4) == 'l')) { 4772 xmlParseTextDecl(ctxt); 4773 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4774 /* 4775 * The XML REC instructs us to stop parsing right here 4776 */ 4777 ctxt->instate = XML_PARSER_EOF; 4778 return; 4779 } 4780 } 4781 if (ctxt->myDoc == NULL) { 4782 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4783 } 4784 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4785 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 4786 4787 ctxt->instate = XML_PARSER_DTD; 4788 ctxt->external = 1; 4789 while (((RAW == '<') && (NXT(1) == '?')) || 4790 ((RAW == '<') && (NXT(1) == '!')) || 4791 IS_BLANK(CUR)) { 4792 const xmlChar *check = CUR_PTR; 4793 int cons = ctxt->input->consumed; 4794 int tok = ctxt->token; 4795 4796 GROW; 4797 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4798 xmlParseConditionalSections(ctxt); 4799 } else if (IS_BLANK(CUR)) { 4800 NEXT; 4801 } else if (RAW == '%') { 4802 xmlParsePEReference(ctxt); 4803 } else 4804 xmlParseMarkupDecl(ctxt); 4805 4806 /* 4807 * Pop-up of finished entities. 4808 */ 4809 while ((RAW == 0) && (ctxt->inputNr > 1)) 4810 xmlPopInput(ctxt); 4811 4812 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4813 (tok == ctxt->token)) { 4814 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4816 ctxt->sax->error(ctxt->userData, 4817 "Content error in the external subset\n"); 4818 ctxt->wellFormed = 0; 4819 ctxt->disableSAX = 1; 4820 break; 4821 } 4822 } 4823 4824 if (RAW != 0) { 4825 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4827 ctxt->sax->error(ctxt->userData, 4828 "Extra content at the end of the document\n"); 4829 ctxt->wellFormed = 0; 4830 ctxt->disableSAX = 1; 4831 } 4832 4833} 4834 4835/** 4836 * xmlParseReference: 4837 * @ctxt: an XML parser context 4838 * 4839 * parse and handle entity references in content, depending on the SAX 4840 * interface, this may end-up in a call to character() if this is a 4841 * CharRef, a predefined entity, if there is no reference() callback. 4842 * or if the parser was asked to switch to that mode. 4843 * 4844 * [67] Reference ::= EntityRef | CharRef 4845 */ 4846void 4847xmlParseReference(xmlParserCtxtPtr ctxt) { 4848 xmlEntityPtr ent; 4849 xmlChar *val; 4850 if (RAW != '&') return; 4851 4852 if (NXT(1) == '#') { 4853 int i = 0; 4854 xmlChar out[10]; 4855 int hex = NXT(2); 4856 int value = xmlParseCharRef(ctxt); 4857 4858 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 4859 /* 4860 * So we are using non-UTF-8 buffers 4861 * Check that the char fit on 8bits, if not 4862 * generate a CharRef. 4863 */ 4864 if (value <= 0xFF) { 4865 out[0] = value; 4866 out[1] = 0; 4867 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4868 (!ctxt->disableSAX)) 4869 ctxt->sax->characters(ctxt->userData, out, 1); 4870 } else { 4871 if ((hex == 'x') || (hex == 'X')) 4872 sprintf((char *)out, "#x%X", value); 4873 else 4874 sprintf((char *)out, "#%d", value); 4875 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4876 (!ctxt->disableSAX)) 4877 ctxt->sax->reference(ctxt->userData, out); 4878 } 4879 } else { 4880 /* 4881 * Just encode the value in UTF-8 4882 */ 4883 COPY_BUF(0 ,out, i, value); 4884 out[i] = 0; 4885 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4886 (!ctxt->disableSAX)) 4887 ctxt->sax->characters(ctxt->userData, out, i); 4888 } 4889 } else { 4890 ent = xmlParseEntityRef(ctxt); 4891 if (ent == NULL) return; 4892 if ((ent->name != NULL) && 4893 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 4894 xmlNodePtr list = NULL; 4895 int ret; 4896 4897 4898 /* 4899 * The first reference to the entity trigger a parsing phase 4900 * where the ent->children is filled with the result from 4901 * the parsing. 4902 */ 4903 if (ent->children == NULL) { 4904 xmlChar *value; 4905 value = ent->content; 4906 4907 /* 4908 * Check that this entity is well formed 4909 */ 4910 if ((value != NULL) && 4911 (value[1] == 0) && (value[0] == '<') && 4912 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 4913 /* 4914 * DONE: get definite answer on this !!! 4915 * Lots of entity decls are used to declare a single 4916 * char 4917 * <!ENTITY lt "<"> 4918 * Which seems to be valid since 4919 * 2.4: The ampersand character (&) and the left angle 4920 * bracket (<) may appear in their literal form only 4921 * when used ... They are also legal within the literal 4922 * entity value of an internal entity declaration;i 4923 * see "4.3.2 Well-Formed Parsed Entities". 4924 * IMHO 2.4 and 4.3.2 are directly in contradiction. 4925 * Looking at the OASIS test suite and James Clark 4926 * tests, this is broken. However the XML REC uses 4927 * it. Is the XML REC not well-formed ???? 4928 * This is a hack to avoid this problem 4929 * 4930 * ANSWER: since lt gt amp .. are already defined, 4931 * this is a redefinition and hence the fact that the 4932 * contentis not well balanced is not a Wf error, this 4933 * is lousy but acceptable. 4934 */ 4935 list = xmlNewDocText(ctxt->myDoc, value); 4936 if (list != NULL) { 4937 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4938 (ent->children == NULL)) { 4939 ent->children = list; 4940 ent->last = list; 4941 list->parent = (xmlNodePtr) ent; 4942 } else { 4943 xmlFreeNodeList(list); 4944 } 4945 } else if (list != NULL) { 4946 xmlFreeNodeList(list); 4947 } 4948 } else { 4949 /* 4950 * 4.3.2: An internal general parsed entity is well-formed 4951 * if its replacement text matches the production labeled 4952 * content. 4953 */ 4954 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 4955 ctxt->depth++; 4956 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 4957 ctxt->sax, NULL, ctxt->depth, 4958 value, &list); 4959 ctxt->depth--; 4960 } else if (ent->etype == 4961 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 4962 ctxt->depth++; 4963 ret = xmlParseExternalEntity(ctxt->myDoc, 4964 ctxt->sax, NULL, ctxt->depth, 4965 ent->URI, ent->ExternalID, &list); 4966 ctxt->depth--; 4967 } else { 4968 ret = -1; 4969 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4970 ctxt->sax->error(ctxt->userData, 4971 "Internal: invalid entity type\n"); 4972 } 4973 if (ret == XML_ERR_ENTITY_LOOP) { 4974 ctxt->errNo = XML_ERR_ENTITY_LOOP; 4975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4976 ctxt->sax->error(ctxt->userData, 4977 "Detected entity reference loop\n"); 4978 ctxt->wellFormed = 0; 4979 ctxt->disableSAX = 1; 4980 } else if ((ret == 0) && (list != NULL)) { 4981 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4982 (ent->children == NULL)) { 4983 ent->children = list; 4984 while (list != NULL) { 4985 list->parent = (xmlNodePtr) ent; 4986 if (list->next == NULL) 4987 ent->last = list; 4988 list = list->next; 4989 } 4990 } else { 4991 xmlFreeNodeList(list); 4992 } 4993 } else if (ret > 0) { 4994 ctxt->errNo = ret; 4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4996 ctxt->sax->error(ctxt->userData, 4997 "Entity value required\n"); 4998 ctxt->wellFormed = 0; 4999 ctxt->disableSAX = 1; 5000 } else if (list != NULL) { 5001 xmlFreeNodeList(list); 5002 } 5003 } 5004 } 5005 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5006 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5007 /* 5008 * Create a node. 5009 */ 5010 ctxt->sax->reference(ctxt->userData, ent->name); 5011 return; 5012 } else if (ctxt->replaceEntities) { 5013 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5014 /* 5015 * Seems we are generating the DOM content, do 5016 * a simple tree copy 5017 */ 5018 xmlNodePtr new; 5019 new = xmlCopyNodeList(ent->children); 5020 5021 xmlAddChildList(ctxt->node, new); 5022 /* 5023 * This is to avoid a nasty side effect, see 5024 * characters() in SAX.c 5025 */ 5026 ctxt->nodemem = 0; 5027 ctxt->nodelen = 0; 5028 return; 5029 } else { 5030 /* 5031 * Probably running in SAX mode 5032 */ 5033 xmlParserInputPtr input; 5034 5035 input = xmlNewEntityInputStream(ctxt, ent); 5036 xmlPushInput(ctxt, input); 5037 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5038 (RAW == '<') && (NXT(1) == '?') && 5039 (NXT(2) == 'x') && (NXT(3) == 'm') && 5040 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5041 xmlParseTextDecl(ctxt); 5042 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5043 /* 5044 * The XML REC instructs us to stop parsing right here 5045 */ 5046 ctxt->instate = XML_PARSER_EOF; 5047 return; 5048 } 5049 if (input->standalone == 1) { 5050 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5052 ctxt->sax->error(ctxt->userData, 5053 "external parsed entities cannot be standalone\n"); 5054 ctxt->wellFormed = 0; 5055 ctxt->disableSAX = 1; 5056 } 5057 } 5058 return; 5059 } 5060 } 5061 } else { 5062 val = ent->content; 5063 if (val == NULL) return; 5064 /* 5065 * inline the entity. 5066 */ 5067 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5068 (!ctxt->disableSAX)) 5069 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5070 } 5071 } 5072} 5073 5074/** 5075 * xmlParseEntityRef: 5076 * @ctxt: an XML parser context 5077 * 5078 * parse ENTITY references declarations 5079 * 5080 * [68] EntityRef ::= '&' Name ';' 5081 * 5082 * [ WFC: Entity Declared ] 5083 * In a document without any DTD, a document with only an internal DTD 5084 * subset which contains no parameter entity references, or a document 5085 * with "standalone='yes'", the Name given in the entity reference 5086 * must match that in an entity declaration, except that well-formed 5087 * documents need not declare any of the following entities: amp, lt, 5088 * gt, apos, quot. The declaration of a parameter entity must precede 5089 * any reference to it. Similarly, the declaration of a general entity 5090 * must precede any reference to it which appears in a default value in an 5091 * attribute-list declaration. Note that if entities are declared in the 5092 * external subset or in external parameter entities, a non-validating 5093 * processor is not obligated to read and process their declarations; 5094 * for such documents, the rule that an entity must be declared is a 5095 * well-formedness constraint only if standalone='yes'. 5096 * 5097 * [ WFC: Parsed Entity ] 5098 * An entity reference must not contain the name of an unparsed entity 5099 * 5100 * Returns the xmlEntityPtr if found, or NULL otherwise. 5101 */ 5102xmlEntityPtr 5103xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5104 xmlChar *name; 5105 xmlEntityPtr ent = NULL; 5106 5107 GROW; 5108 5109 if (RAW == '&') { 5110 NEXT; 5111 name = xmlParseName(ctxt); 5112 if (name == NULL) { 5113 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5115 ctxt->sax->error(ctxt->userData, 5116 "xmlParseEntityRef: no name\n"); 5117 ctxt->wellFormed = 0; 5118 ctxt->disableSAX = 1; 5119 } else { 5120 if (RAW == ';') { 5121 NEXT; 5122 /* 5123 * Ask first SAX for entity resolution, otherwise try the 5124 * predefined set. 5125 */ 5126 if (ctxt->sax != NULL) { 5127 if (ctxt->sax->getEntity != NULL) 5128 ent = ctxt->sax->getEntity(ctxt->userData, name); 5129 if (ent == NULL) 5130 ent = xmlGetPredefinedEntity(name); 5131 } 5132 /* 5133 * [ WFC: Entity Declared ] 5134 * In a document without any DTD, a document with only an 5135 * internal DTD subset which contains no parameter entity 5136 * references, or a document with "standalone='yes'", the 5137 * Name given in the entity reference must match that in an 5138 * entity declaration, except that well-formed documents 5139 * need not declare any of the following entities: amp, lt, 5140 * gt, apos, quot. 5141 * The declaration of a parameter entity must precede any 5142 * reference to it. 5143 * Similarly, the declaration of a general entity must 5144 * precede any reference to it which appears in a default 5145 * value in an attribute-list declaration. Note that if 5146 * entities are declared in the external subset or in 5147 * external parameter entities, a non-validating processor 5148 * is not obligated to read and process their declarations; 5149 * for such documents, the rule that an entity must be 5150 * declared is a well-formedness constraint only if 5151 * standalone='yes'. 5152 */ 5153 if (ent == NULL) { 5154 if ((ctxt->standalone == 1) || 5155 ((ctxt->hasExternalSubset == 0) && 5156 (ctxt->hasPErefs == 0))) { 5157 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5159 ctxt->sax->error(ctxt->userData, 5160 "Entity '%s' not defined\n", name); 5161 ctxt->wellFormed = 0; 5162 ctxt->disableSAX = 1; 5163 } else { 5164 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5165 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5166 ctxt->sax->warning(ctxt->userData, 5167 "Entity '%s' not defined\n", name); 5168 } 5169 } 5170 5171 /* 5172 * [ WFC: Parsed Entity ] 5173 * An entity reference must not contain the name of an 5174 * unparsed entity 5175 */ 5176 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5177 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5179 ctxt->sax->error(ctxt->userData, 5180 "Entity reference to unparsed entity %s\n", name); 5181 ctxt->wellFormed = 0; 5182 ctxt->disableSAX = 1; 5183 } 5184 5185 /* 5186 * [ WFC: No External Entity References ] 5187 * Attribute values cannot contain direct or indirect 5188 * entity references to external entities. 5189 */ 5190 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5191 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5192 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5194 ctxt->sax->error(ctxt->userData, 5195 "Attribute references external entity '%s'\n", name); 5196 ctxt->wellFormed = 0; 5197 ctxt->disableSAX = 1; 5198 } 5199 /* 5200 * [ WFC: No < in Attribute Values ] 5201 * The replacement text of any entity referred to directly or 5202 * indirectly in an attribute value (other than "<") must 5203 * not contain a <. 5204 */ 5205 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5206 (ent != NULL) && 5207 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5208 (ent->content != NULL) && 5209 (xmlStrchr(ent->content, '<'))) { 5210 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5212 ctxt->sax->error(ctxt->userData, 5213 "'<' in entity '%s' is not allowed in attributes values\n", name); 5214 ctxt->wellFormed = 0; 5215 ctxt->disableSAX = 1; 5216 } 5217 5218 /* 5219 * Internal check, no parameter entities here ... 5220 */ 5221 else { 5222 switch (ent->etype) { 5223 case XML_INTERNAL_PARAMETER_ENTITY: 5224 case XML_EXTERNAL_PARAMETER_ENTITY: 5225 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5227 ctxt->sax->error(ctxt->userData, 5228 "Attempt to reference the parameter entity '%s'\n", name); 5229 ctxt->wellFormed = 0; 5230 ctxt->disableSAX = 1; 5231 break; 5232 default: 5233 break; 5234 } 5235 } 5236 5237 /* 5238 * [ WFC: No Recursion ] 5239 * A parsed entity must not contain a recursive reference 5240 * to itself, either directly or indirectly. 5241 * Done somewhere else 5242 */ 5243 5244 } else { 5245 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5247 ctxt->sax->error(ctxt->userData, 5248 "xmlParseEntityRef: expecting ';'\n"); 5249 ctxt->wellFormed = 0; 5250 ctxt->disableSAX = 1; 5251 } 5252 xmlFree(name); 5253 } 5254 } 5255 return(ent); 5256} 5257 5258/** 5259 * xmlParseStringEntityRef: 5260 * @ctxt: an XML parser context 5261 * @str: a pointer to an index in the string 5262 * 5263 * parse ENTITY references declarations, but this version parses it from 5264 * a string value. 5265 * 5266 * [68] EntityRef ::= '&' Name ';' 5267 * 5268 * [ WFC: Entity Declared ] 5269 * In a document without any DTD, a document with only an internal DTD 5270 * subset which contains no parameter entity references, or a document 5271 * with "standalone='yes'", the Name given in the entity reference 5272 * must match that in an entity declaration, except that well-formed 5273 * documents need not declare any of the following entities: amp, lt, 5274 * gt, apos, quot. The declaration of a parameter entity must precede 5275 * any reference to it. Similarly, the declaration of a general entity 5276 * must precede any reference to it which appears in a default value in an 5277 * attribute-list declaration. Note that if entities are declared in the 5278 * external subset or in external parameter entities, a non-validating 5279 * processor is not obligated to read and process their declarations; 5280 * for such documents, the rule that an entity must be declared is a 5281 * well-formedness constraint only if standalone='yes'. 5282 * 5283 * [ WFC: Parsed Entity ] 5284 * An entity reference must not contain the name of an unparsed entity 5285 * 5286 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5287 * is updated to the current location in the string. 5288 */ 5289xmlEntityPtr 5290xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5291 xmlChar *name; 5292 const xmlChar *ptr; 5293 xmlChar cur; 5294 xmlEntityPtr ent = NULL; 5295 5296 if ((str == NULL) || (*str == NULL)) 5297 return(NULL); 5298 ptr = *str; 5299 cur = *ptr; 5300 if (cur == '&') { 5301 ptr++; 5302 cur = *ptr; 5303 name = xmlParseStringName(ctxt, &ptr); 5304 if (name == NULL) { 5305 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5307 ctxt->sax->error(ctxt->userData, 5308 "xmlParseEntityRef: no name\n"); 5309 ctxt->wellFormed = 0; 5310 ctxt->disableSAX = 1; 5311 } else { 5312 if (*ptr == ';') { 5313 ptr++; 5314 /* 5315 * Ask first SAX for entity resolution, otherwise try the 5316 * predefined set. 5317 */ 5318 if (ctxt->sax != NULL) { 5319 if (ctxt->sax->getEntity != NULL) 5320 ent = ctxt->sax->getEntity(ctxt->userData, name); 5321 if (ent == NULL) 5322 ent = xmlGetPredefinedEntity(name); 5323 } 5324 /* 5325 * [ WFC: Entity Declared ] 5326 * In a document without any DTD, a document with only an 5327 * internal DTD subset which contains no parameter entity 5328 * references, or a document with "standalone='yes'", the 5329 * Name given in the entity reference must match that in an 5330 * entity declaration, except that well-formed documents 5331 * need not declare any of the following entities: amp, lt, 5332 * gt, apos, quot. 5333 * The declaration of a parameter entity must precede any 5334 * reference to it. 5335 * Similarly, the declaration of a general entity must 5336 * precede any reference to it which appears in a default 5337 * value in an attribute-list declaration. Note that if 5338 * entities are declared in the external subset or in 5339 * external parameter entities, a non-validating processor 5340 * is not obligated to read and process their declarations; 5341 * for such documents, the rule that an entity must be 5342 * declared is a well-formedness constraint only if 5343 * standalone='yes'. 5344 */ 5345 if (ent == NULL) { 5346 if ((ctxt->standalone == 1) || 5347 ((ctxt->hasExternalSubset == 0) && 5348 (ctxt->hasPErefs == 0))) { 5349 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5351 ctxt->sax->error(ctxt->userData, 5352 "Entity '%s' not defined\n", name); 5353 ctxt->wellFormed = 0; 5354 ctxt->disableSAX = 1; 5355 } else { 5356 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5357 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5358 ctxt->sax->warning(ctxt->userData, 5359 "Entity '%s' not defined\n", name); 5360 } 5361 } 5362 5363 /* 5364 * [ WFC: Parsed Entity ] 5365 * An entity reference must not contain the name of an 5366 * unparsed entity 5367 */ 5368 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5369 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5371 ctxt->sax->error(ctxt->userData, 5372 "Entity reference to unparsed entity %s\n", name); 5373 ctxt->wellFormed = 0; 5374 ctxt->disableSAX = 1; 5375 } 5376 5377 /* 5378 * [ WFC: No External Entity References ] 5379 * Attribute values cannot contain direct or indirect 5380 * entity references to external entities. 5381 */ 5382 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5383 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5384 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5386 ctxt->sax->error(ctxt->userData, 5387 "Attribute references external entity '%s'\n", name); 5388 ctxt->wellFormed = 0; 5389 ctxt->disableSAX = 1; 5390 } 5391 /* 5392 * [ WFC: No < in Attribute Values ] 5393 * The replacement text of any entity referred to directly or 5394 * indirectly in an attribute value (other than "<") must 5395 * not contain a <. 5396 */ 5397 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5398 (ent != NULL) && 5399 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5400 (ent->content != NULL) && 5401 (xmlStrchr(ent->content, '<'))) { 5402 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5404 ctxt->sax->error(ctxt->userData, 5405 "'<' in entity '%s' is not allowed in attributes values\n", name); 5406 ctxt->wellFormed = 0; 5407 ctxt->disableSAX = 1; 5408 } 5409 5410 /* 5411 * Internal check, no parameter entities here ... 5412 */ 5413 else { 5414 switch (ent->etype) { 5415 case XML_INTERNAL_PARAMETER_ENTITY: 5416 case XML_EXTERNAL_PARAMETER_ENTITY: 5417 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5419 ctxt->sax->error(ctxt->userData, 5420 "Attempt to reference the parameter entity '%s'\n", name); 5421 ctxt->wellFormed = 0; 5422 ctxt->disableSAX = 1; 5423 break; 5424 default: 5425 break; 5426 } 5427 } 5428 5429 /* 5430 * [ WFC: No Recursion ] 5431 * A parsed entity must not contain a recursive reference 5432 * to itself, either directly or indirectly. 5433 * Done somewhwere else 5434 */ 5435 5436 } else { 5437 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5439 ctxt->sax->error(ctxt->userData, 5440 "xmlParseEntityRef: expecting ';'\n"); 5441 ctxt->wellFormed = 0; 5442 ctxt->disableSAX = 1; 5443 } 5444 xmlFree(name); 5445 } 5446 } 5447 *str = ptr; 5448 return(ent); 5449} 5450 5451/** 5452 * xmlParsePEReference: 5453 * @ctxt: an XML parser context 5454 * 5455 * parse PEReference declarations 5456 * The entity content is handled directly by pushing it's content as 5457 * a new input stream. 5458 * 5459 * [69] PEReference ::= '%' Name ';' 5460 * 5461 * [ WFC: No Recursion ] 5462 * A parsed entity must not contain a recursive 5463 * reference to itself, either directly or indirectly. 5464 * 5465 * [ WFC: Entity Declared ] 5466 * In a document without any DTD, a document with only an internal DTD 5467 * subset which contains no parameter entity references, or a document 5468 * with "standalone='yes'", ... ... The declaration of a parameter 5469 * entity must precede any reference to it... 5470 * 5471 * [ VC: Entity Declared ] 5472 * In a document with an external subset or external parameter entities 5473 * with "standalone='no'", ... ... The declaration of a parameter entity 5474 * must precede any reference to it... 5475 * 5476 * [ WFC: In DTD ] 5477 * Parameter-entity references may only appear in the DTD. 5478 * NOTE: misleading but this is handled. 5479 */ 5480void 5481xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5482 xmlChar *name; 5483 xmlEntityPtr entity = NULL; 5484 xmlParserInputPtr input; 5485 5486 if (RAW == '%') { 5487 NEXT; 5488 name = xmlParseNameComplex(ctxt); 5489 if (name == NULL) { 5490 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5492 ctxt->sax->error(ctxt->userData, 5493 "xmlParsePEReference: no name\n"); 5494 ctxt->wellFormed = 0; 5495 ctxt->disableSAX = 1; 5496 } else { 5497 if (RAW == ';') { 5498 NEXT; 5499 if ((ctxt->sax != NULL) && 5500 (ctxt->sax->getParameterEntity != NULL)) 5501 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5502 name); 5503 if (entity == NULL) { 5504 /* 5505 * [ WFC: Entity Declared ] 5506 * In a document without any DTD, a document with only an 5507 * internal DTD subset which contains no parameter entity 5508 * references, or a document with "standalone='yes'", ... 5509 * ... The declaration of a parameter entity must precede 5510 * any reference to it... 5511 */ 5512 if ((ctxt->standalone == 1) || 5513 ((ctxt->hasExternalSubset == 0) && 5514 (ctxt->hasPErefs == 0))) { 5515 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5516 if ((!ctxt->disableSAX) && 5517 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5518 ctxt->sax->error(ctxt->userData, 5519 "PEReference: %%%s; not found\n", name); 5520 ctxt->wellFormed = 0; 5521 ctxt->disableSAX = 1; 5522 } else { 5523 /* 5524 * [ VC: Entity Declared ] 5525 * In a document with an external subset or external 5526 * parameter entities with "standalone='no'", ... 5527 * ... The declaration of a parameter entity must precede 5528 * any reference to it... 5529 */ 5530 if ((!ctxt->disableSAX) && 5531 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5532 ctxt->sax->warning(ctxt->userData, 5533 "PEReference: %%%s; not found\n", name); 5534 ctxt->valid = 0; 5535 } 5536 } else { 5537 /* 5538 * Internal checking in case the entity quest barfed 5539 */ 5540 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5541 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5542 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5543 ctxt->sax->warning(ctxt->userData, 5544 "Internal: %%%s; is not a parameter entity\n", name); 5545 } else { 5546 /* 5547 * TODO !!! 5548 * handle the extra spaces added before and after 5549 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5550 */ 5551 input = xmlNewEntityInputStream(ctxt, entity); 5552 xmlPushInput(ctxt, input); 5553 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5554 (RAW == '<') && (NXT(1) == '?') && 5555 (NXT(2) == 'x') && (NXT(3) == 'm') && 5556 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5557 xmlParseTextDecl(ctxt); 5558 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5559 /* 5560 * The XML REC instructs us to stop parsing 5561 * right here 5562 */ 5563 ctxt->instate = XML_PARSER_EOF; 5564 xmlFree(name); 5565 return; 5566 } 5567 } 5568 if (ctxt->token == 0) 5569 ctxt->token = ' '; 5570 } 5571 } 5572 ctxt->hasPErefs = 1; 5573 } else { 5574 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5576 ctxt->sax->error(ctxt->userData, 5577 "xmlParsePEReference: expecting ';'\n"); 5578 ctxt->wellFormed = 0; 5579 ctxt->disableSAX = 1; 5580 } 5581 xmlFree(name); 5582 } 5583 } 5584} 5585 5586/** 5587 * xmlParseStringPEReference: 5588 * @ctxt: an XML parser context 5589 * @str: a pointer to an index in the string 5590 * 5591 * parse PEReference declarations 5592 * 5593 * [69] PEReference ::= '%' Name ';' 5594 * 5595 * [ WFC: No Recursion ] 5596 * A parsed entity must not contain a recursive 5597 * reference to itself, either directly or indirectly. 5598 * 5599 * [ WFC: Entity Declared ] 5600 * In a document without any DTD, a document with only an internal DTD 5601 * subset which contains no parameter entity references, or a document 5602 * with "standalone='yes'", ... ... The declaration of a parameter 5603 * entity must precede any reference to it... 5604 * 5605 * [ VC: Entity Declared ] 5606 * In a document with an external subset or external parameter entities 5607 * with "standalone='no'", ... ... The declaration of a parameter entity 5608 * must precede any reference to it... 5609 * 5610 * [ WFC: In DTD ] 5611 * Parameter-entity references may only appear in the DTD. 5612 * NOTE: misleading but this is handled. 5613 * 5614 * Returns the string of the entity content. 5615 * str is updated to the current value of the index 5616 */ 5617xmlEntityPtr 5618xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5619 const xmlChar *ptr; 5620 xmlChar cur; 5621 xmlChar *name; 5622 xmlEntityPtr entity = NULL; 5623 5624 if ((str == NULL) || (*str == NULL)) return(NULL); 5625 ptr = *str; 5626 cur = *ptr; 5627 if (cur == '%') { 5628 ptr++; 5629 cur = *ptr; 5630 name = xmlParseStringName(ctxt, &ptr); 5631 if (name == NULL) { 5632 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5634 ctxt->sax->error(ctxt->userData, 5635 "xmlParseStringPEReference: no name\n"); 5636 ctxt->wellFormed = 0; 5637 ctxt->disableSAX = 1; 5638 } else { 5639 cur = *ptr; 5640 if (cur == ';') { 5641 ptr++; 5642 cur = *ptr; 5643 if ((ctxt->sax != NULL) && 5644 (ctxt->sax->getParameterEntity != NULL)) 5645 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5646 name); 5647 if (entity == NULL) { 5648 /* 5649 * [ WFC: Entity Declared ] 5650 * In a document without any DTD, a document with only an 5651 * internal DTD subset which contains no parameter entity 5652 * references, or a document with "standalone='yes'", ... 5653 * ... The declaration of a parameter entity must precede 5654 * any reference to it... 5655 */ 5656 if ((ctxt->standalone == 1) || 5657 ((ctxt->hasExternalSubset == 0) && 5658 (ctxt->hasPErefs == 0))) { 5659 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5661 ctxt->sax->error(ctxt->userData, 5662 "PEReference: %%%s; not found\n", name); 5663 ctxt->wellFormed = 0; 5664 ctxt->disableSAX = 1; 5665 } else { 5666 /* 5667 * [ VC: Entity Declared ] 5668 * In a document with an external subset or external 5669 * parameter entities with "standalone='no'", ... 5670 * ... The declaration of a parameter entity must 5671 * precede any reference to it... 5672 */ 5673 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5674 ctxt->sax->warning(ctxt->userData, 5675 "PEReference: %%%s; not found\n", name); 5676 ctxt->valid = 0; 5677 } 5678 } else { 5679 /* 5680 * Internal checking in case the entity quest barfed 5681 */ 5682 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5683 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5684 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5685 ctxt->sax->warning(ctxt->userData, 5686 "Internal: %%%s; is not a parameter entity\n", name); 5687 } 5688 } 5689 ctxt->hasPErefs = 1; 5690 } else { 5691 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5693 ctxt->sax->error(ctxt->userData, 5694 "xmlParseStringPEReference: expecting ';'\n"); 5695 ctxt->wellFormed = 0; 5696 ctxt->disableSAX = 1; 5697 } 5698 xmlFree(name); 5699 } 5700 } 5701 *str = ptr; 5702 return(entity); 5703} 5704 5705/** 5706 * xmlParseDocTypeDecl: 5707 * @ctxt: an XML parser context 5708 * 5709 * parse a DOCTYPE declaration 5710 * 5711 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5712 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5713 * 5714 * [ VC: Root Element Type ] 5715 * The Name in the document type declaration must match the element 5716 * type of the root element. 5717 */ 5718 5719void 5720xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5721 xmlChar *name = NULL; 5722 xmlChar *ExternalID = NULL; 5723 xmlChar *URI = NULL; 5724 5725 /* 5726 * We know that '<!DOCTYPE' has been detected. 5727 */ 5728 SKIP(9); 5729 5730 SKIP_BLANKS; 5731 5732 /* 5733 * Parse the DOCTYPE name. 5734 */ 5735 name = xmlParseName(ctxt); 5736 if (name == NULL) { 5737 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5739 ctxt->sax->error(ctxt->userData, 5740 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5741 ctxt->wellFormed = 0; 5742 ctxt->disableSAX = 1; 5743 } 5744 ctxt->intSubName = name; 5745 5746 SKIP_BLANKS; 5747 5748 /* 5749 * Check for SystemID and ExternalID 5750 */ 5751 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 5752 5753 if ((URI != NULL) || (ExternalID != NULL)) { 5754 ctxt->hasExternalSubset = 1; 5755 } 5756 ctxt->extSubURI = URI; 5757 ctxt->extSubSystem = ExternalID; 5758 5759 SKIP_BLANKS; 5760 5761 /* 5762 * Create and update the internal subset. 5763 */ 5764 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 5765 (!ctxt->disableSAX)) 5766 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 5767 5768 /* 5769 * Is there any internal subset declarations ? 5770 * they are handled separately in xmlParseInternalSubset() 5771 */ 5772 if (RAW == '[') 5773 return; 5774 5775 /* 5776 * We should be at the end of the DOCTYPE declaration. 5777 */ 5778 if (RAW != '>') { 5779 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5781 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5782 ctxt->wellFormed = 0; 5783 ctxt->disableSAX = 1; 5784 } 5785 NEXT; 5786} 5787 5788/** 5789 * xmlParseInternalsubset: 5790 * @ctxt: an XML parser context 5791 * 5792 * parse the internal subset declaration 5793 * 5794 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5795 */ 5796 5797static void 5798xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 5799 /* 5800 * Is there any DTD definition ? 5801 */ 5802 if (RAW == '[') { 5803 ctxt->instate = XML_PARSER_DTD; 5804 NEXT; 5805 /* 5806 * Parse the succession of Markup declarations and 5807 * PEReferences. 5808 * Subsequence (markupdecl | PEReference | S)* 5809 */ 5810 while (RAW != ']') { 5811 const xmlChar *check = CUR_PTR; 5812 int cons = ctxt->input->consumed; 5813 5814 SKIP_BLANKS; 5815 xmlParseMarkupDecl(ctxt); 5816 xmlParsePEReference(ctxt); 5817 5818 /* 5819 * Pop-up of finished entities. 5820 */ 5821 while ((RAW == 0) && (ctxt->inputNr > 1)) 5822 xmlPopInput(ctxt); 5823 5824 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5825 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5827 ctxt->sax->error(ctxt->userData, 5828 "xmlParseInternalSubset: error detected in Markup declaration\n"); 5829 ctxt->wellFormed = 0; 5830 ctxt->disableSAX = 1; 5831 break; 5832 } 5833 } 5834 if (RAW == ']') { 5835 NEXT; 5836 SKIP_BLANKS; 5837 } 5838 } 5839 5840 /* 5841 * We should be at the end of the DOCTYPE declaration. 5842 */ 5843 if (RAW != '>') { 5844 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5846 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5847 ctxt->wellFormed = 0; 5848 ctxt->disableSAX = 1; 5849 } 5850 NEXT; 5851} 5852 5853/** 5854 * xmlParseAttribute: 5855 * @ctxt: an XML parser context 5856 * @value: a xmlChar ** used to store the value of the attribute 5857 * 5858 * parse an attribute 5859 * 5860 * [41] Attribute ::= Name Eq AttValue 5861 * 5862 * [ WFC: No External Entity References ] 5863 * Attribute values cannot contain direct or indirect entity references 5864 * to external entities. 5865 * 5866 * [ WFC: No < in Attribute Values ] 5867 * The replacement text of any entity referred to directly or indirectly in 5868 * an attribute value (other than "<") must not contain a <. 5869 * 5870 * [ VC: Attribute Value Type ] 5871 * The attribute must have been declared; the value must be of the type 5872 * declared for it. 5873 * 5874 * [25] Eq ::= S? '=' S? 5875 * 5876 * With namespace: 5877 * 5878 * [NS 11] Attribute ::= QName Eq AttValue 5879 * 5880 * Also the case QName == xmlns:??? is handled independently as a namespace 5881 * definition. 5882 * 5883 * Returns the attribute name, and the value in *value. 5884 */ 5885 5886xmlChar * 5887xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 5888 xmlChar *name, *val; 5889 5890 *value = NULL; 5891 name = xmlParseName(ctxt); 5892 if (name == NULL) { 5893 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5895 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 5896 ctxt->wellFormed = 0; 5897 ctxt->disableSAX = 1; 5898 return(NULL); 5899 } 5900 5901 /* 5902 * read the value 5903 */ 5904 SKIP_BLANKS; 5905 if (RAW == '=') { 5906 NEXT; 5907 SKIP_BLANKS; 5908 val = xmlParseAttValue(ctxt); 5909 ctxt->instate = XML_PARSER_CONTENT; 5910 } else { 5911 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5913 ctxt->sax->error(ctxt->userData, 5914 "Specification mandate value for attribute %s\n", name); 5915 ctxt->wellFormed = 0; 5916 ctxt->disableSAX = 1; 5917 xmlFree(name); 5918 return(NULL); 5919 } 5920 5921 /* 5922 * Check that xml:lang conforms to the specification 5923 * No more registered as an error, just generate a warning now 5924 * since this was deprecated in XML second edition 5925 */ 5926 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 5927 if (!xmlCheckLanguageID(val)) { 5928 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5929 ctxt->sax->warning(ctxt->userData, 5930 "Malformed value for xml:lang : %s\n", val); 5931 } 5932 } 5933 5934 /* 5935 * Check that xml:space conforms to the specification 5936 */ 5937 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 5938 if (xmlStrEqual(val, BAD_CAST "default")) 5939 *(ctxt->space) = 0; 5940 else if (xmlStrEqual(val, BAD_CAST "preserve")) 5941 *(ctxt->space) = 1; 5942 else { 5943 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5945 ctxt->sax->error(ctxt->userData, 5946"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 5947 val); 5948 ctxt->wellFormed = 0; 5949 ctxt->disableSAX = 1; 5950 } 5951 } 5952 5953 *value = val; 5954 return(name); 5955} 5956 5957/** 5958 * xmlParseStartTag: 5959 * @ctxt: an XML parser context 5960 * 5961 * parse a start of tag either for rule element or 5962 * EmptyElement. In both case we don't parse the tag closing chars. 5963 * 5964 * [40] STag ::= '<' Name (S Attribute)* S? '>' 5965 * 5966 * [ WFC: Unique Att Spec ] 5967 * No attribute name may appear more than once in the same start-tag or 5968 * empty-element tag. 5969 * 5970 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 5971 * 5972 * [ WFC: Unique Att Spec ] 5973 * No attribute name may appear more than once in the same start-tag or 5974 * empty-element tag. 5975 * 5976 * With namespace: 5977 * 5978 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 5979 * 5980 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 5981 * 5982 * Returns the element name parsed 5983 */ 5984 5985xmlChar * 5986xmlParseStartTag(xmlParserCtxtPtr ctxt) { 5987 xmlChar *name; 5988 xmlChar *attname; 5989 xmlChar *attvalue; 5990 const xmlChar **atts = NULL; 5991 int nbatts = 0; 5992 int maxatts = 0; 5993 int i; 5994 5995 if (RAW != '<') return(NULL); 5996 NEXT1; 5997 5998 name = xmlParseName(ctxt); 5999 if (name == NULL) { 6000 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6002 ctxt->sax->error(ctxt->userData, 6003 "xmlParseStartTag: invalid element name\n"); 6004 ctxt->wellFormed = 0; 6005 ctxt->disableSAX = 1; 6006 return(NULL); 6007 } 6008 6009 /* 6010 * Now parse the attributes, it ends up with the ending 6011 * 6012 * (S Attribute)* S? 6013 */ 6014 SKIP_BLANKS; 6015 GROW; 6016 6017 while ((RAW != '>') && 6018 ((RAW != '/') || (NXT(1) != '>')) && 6019 (IS_CHAR(RAW))) { 6020 const xmlChar *q = CUR_PTR; 6021 int cons = ctxt->input->consumed; 6022 6023 attname = xmlParseAttribute(ctxt, &attvalue); 6024 if ((attname != NULL) && (attvalue != NULL)) { 6025 /* 6026 * [ WFC: Unique Att Spec ] 6027 * No attribute name may appear more than once in the same 6028 * start-tag or empty-element tag. 6029 */ 6030 for (i = 0; i < nbatts;i += 2) { 6031 if (xmlStrEqual(atts[i], attname)) { 6032 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6034 ctxt->sax->error(ctxt->userData, 6035 "Attribute %s redefined\n", 6036 attname); 6037 ctxt->wellFormed = 0; 6038 ctxt->disableSAX = 1; 6039 xmlFree(attname); 6040 xmlFree(attvalue); 6041 goto failed; 6042 } 6043 } 6044 6045 /* 6046 * Add the pair to atts 6047 */ 6048 if (atts == NULL) { 6049 maxatts = 10; 6050 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6051 if (atts == NULL) { 6052 xmlGenericError(xmlGenericErrorContext, 6053 "malloc of %ld byte failed\n", 6054 maxatts * (long)sizeof(xmlChar *)); 6055 return(NULL); 6056 } 6057 } else if (nbatts + 4 > maxatts) { 6058 maxatts *= 2; 6059 atts = (const xmlChar **) xmlRealloc((void *) atts, 6060 maxatts * sizeof(xmlChar *)); 6061 if (atts == NULL) { 6062 xmlGenericError(xmlGenericErrorContext, 6063 "realloc of %ld byte failed\n", 6064 maxatts * (long)sizeof(xmlChar *)); 6065 return(NULL); 6066 } 6067 } 6068 atts[nbatts++] = attname; 6069 atts[nbatts++] = attvalue; 6070 atts[nbatts] = NULL; 6071 atts[nbatts + 1] = NULL; 6072 } else { 6073 if (attname != NULL) 6074 xmlFree(attname); 6075 if (attvalue != NULL) 6076 xmlFree(attvalue); 6077 } 6078 6079failed: 6080 6081 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6082 break; 6083 if (!IS_BLANK(RAW)) { 6084 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6086 ctxt->sax->error(ctxt->userData, 6087 "attributes construct error\n"); 6088 ctxt->wellFormed = 0; 6089 ctxt->disableSAX = 1; 6090 } 6091 SKIP_BLANKS; 6092 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6093 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6095 ctxt->sax->error(ctxt->userData, 6096 "xmlParseStartTag: problem parsing attributes\n"); 6097 ctxt->wellFormed = 0; 6098 ctxt->disableSAX = 1; 6099 break; 6100 } 6101 GROW; 6102 } 6103 6104 /* 6105 * SAX: Start of Element ! 6106 */ 6107 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6108 (!ctxt->disableSAX)) 6109 ctxt->sax->startElement(ctxt->userData, name, atts); 6110 6111 if (atts != NULL) { 6112 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6113 xmlFree((void *) atts); 6114 } 6115 return(name); 6116} 6117 6118/** 6119 * xmlParseEndTag: 6120 * @ctxt: an XML parser context 6121 * 6122 * parse an end of tag 6123 * 6124 * [42] ETag ::= '</' Name S? '>' 6125 * 6126 * With namespace 6127 * 6128 * [NS 9] ETag ::= '</' QName S? '>' 6129 */ 6130 6131void 6132xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6133 xmlChar *name; 6134 xmlChar *oldname; 6135 6136 GROW; 6137 if ((RAW != '<') || (NXT(1) != '/')) { 6138 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6140 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6141 ctxt->wellFormed = 0; 6142 ctxt->disableSAX = 1; 6143 return; 6144 } 6145 SKIP(2); 6146 6147 name = xmlParseName(ctxt); 6148 6149 /* 6150 * We should definitely be at the ending "S? '>'" part 6151 */ 6152 GROW; 6153 SKIP_BLANKS; 6154 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6155 ctxt->errNo = XML_ERR_GT_REQUIRED; 6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6157 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6158 ctxt->wellFormed = 0; 6159 ctxt->disableSAX = 1; 6160 } else 6161 NEXT1; 6162 6163 /* 6164 * [ WFC: Element Type Match ] 6165 * The Name in an element's end-tag must match the element type in the 6166 * start-tag. 6167 * 6168 */ 6169 if ((name == NULL) || (ctxt->name == NULL) || 6170 (!xmlStrEqual(name, ctxt->name))) { 6171 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6173 if ((name != NULL) && (ctxt->name != NULL)) { 6174 ctxt->sax->error(ctxt->userData, 6175 "Opening and ending tag mismatch: %s and %s\n", 6176 ctxt->name, name); 6177 } else if (ctxt->name != NULL) { 6178 ctxt->sax->error(ctxt->userData, 6179 "Ending tag eror for: %s\n", ctxt->name); 6180 } else { 6181 ctxt->sax->error(ctxt->userData, 6182 "Ending tag error: internal error ???\n"); 6183 } 6184 6185 } 6186 ctxt->wellFormed = 0; 6187 ctxt->disableSAX = 1; 6188 } 6189 6190 /* 6191 * SAX: End of Tag 6192 */ 6193 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6194 (!ctxt->disableSAX)) 6195 ctxt->sax->endElement(ctxt->userData, name); 6196 6197 if (name != NULL) 6198 xmlFree(name); 6199 oldname = namePop(ctxt); 6200 spacePop(ctxt); 6201 if (oldname != NULL) { 6202#ifdef DEBUG_STACK 6203 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6204#endif 6205 xmlFree(oldname); 6206 } 6207 return; 6208} 6209 6210/** 6211 * xmlParseCDSect: 6212 * @ctxt: an XML parser context 6213 * 6214 * Parse escaped pure raw content. 6215 * 6216 * [18] CDSect ::= CDStart CData CDEnd 6217 * 6218 * [19] CDStart ::= '<![CDATA[' 6219 * 6220 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6221 * 6222 * [21] CDEnd ::= ']]>' 6223 */ 6224void 6225xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6226 xmlChar *buf = NULL; 6227 int len = 0; 6228 int size = XML_PARSER_BUFFER_SIZE; 6229 int r, rl; 6230 int s, sl; 6231 int cur, l; 6232 int count = 0; 6233 6234 if ((NXT(0) == '<') && (NXT(1) == '!') && 6235 (NXT(2) == '[') && (NXT(3) == 'C') && 6236 (NXT(4) == 'D') && (NXT(5) == 'A') && 6237 (NXT(6) == 'T') && (NXT(7) == 'A') && 6238 (NXT(8) == '[')) { 6239 SKIP(9); 6240 } else 6241 return; 6242 6243 ctxt->instate = XML_PARSER_CDATA_SECTION; 6244 r = CUR_CHAR(rl); 6245 if (!IS_CHAR(r)) { 6246 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6248 ctxt->sax->error(ctxt->userData, 6249 "CData section not finished\n"); 6250 ctxt->wellFormed = 0; 6251 ctxt->disableSAX = 1; 6252 ctxt->instate = XML_PARSER_CONTENT; 6253 return; 6254 } 6255 NEXTL(rl); 6256 s = CUR_CHAR(sl); 6257 if (!IS_CHAR(s)) { 6258 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6260 ctxt->sax->error(ctxt->userData, 6261 "CData section not finished\n"); 6262 ctxt->wellFormed = 0; 6263 ctxt->disableSAX = 1; 6264 ctxt->instate = XML_PARSER_CONTENT; 6265 return; 6266 } 6267 NEXTL(sl); 6268 cur = CUR_CHAR(l); 6269 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6270 if (buf == NULL) { 6271 xmlGenericError(xmlGenericErrorContext, 6272 "malloc of %d byte failed\n", size); 6273 return; 6274 } 6275 while (IS_CHAR(cur) && 6276 ((r != ']') || (s != ']') || (cur != '>'))) { 6277 if (len + 5 >= size) { 6278 size *= 2; 6279 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6280 if (buf == NULL) { 6281 xmlGenericError(xmlGenericErrorContext, 6282 "realloc of %d byte failed\n", size); 6283 return; 6284 } 6285 } 6286 COPY_BUF(rl,buf,len,r); 6287 r = s; 6288 rl = sl; 6289 s = cur; 6290 sl = l; 6291 count++; 6292 if (count > 50) { 6293 GROW; 6294 count = 0; 6295 } 6296 NEXTL(l); 6297 cur = CUR_CHAR(l); 6298 } 6299 buf[len] = 0; 6300 ctxt->instate = XML_PARSER_CONTENT; 6301 if (cur != '>') { 6302 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6304 ctxt->sax->error(ctxt->userData, 6305 "CData section not finished\n%.50s\n", buf); 6306 ctxt->wellFormed = 0; 6307 ctxt->disableSAX = 1; 6308 xmlFree(buf); 6309 return; 6310 } 6311 NEXTL(l); 6312 6313 /* 6314 * Ok the buffer is to be consumed as cdata. 6315 */ 6316 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6317 if (ctxt->sax->cdataBlock != NULL) 6318 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6319 } 6320 xmlFree(buf); 6321} 6322 6323/** 6324 * xmlParseContent: 6325 * @ctxt: an XML parser context 6326 * 6327 * Parse a content: 6328 * 6329 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6330 */ 6331 6332void 6333xmlParseContent(xmlParserCtxtPtr ctxt) { 6334 GROW; 6335 while (((RAW != 0) || (ctxt->token != 0)) && 6336 ((RAW != '<') || (NXT(1) != '/'))) { 6337 const xmlChar *test = CUR_PTR; 6338 int cons = ctxt->input->consumed; 6339 int tok = ctxt->token; 6340 const xmlChar *cur = ctxt->input->cur; 6341 6342 /* 6343 * Handle possible processed charrefs. 6344 */ 6345 if (ctxt->token != 0) { 6346 xmlParseCharData(ctxt, 0); 6347 } 6348 /* 6349 * First case : a Processing Instruction. 6350 */ 6351 else if ((*cur == '<') && (cur[1] == '?')) { 6352 xmlParsePI(ctxt); 6353 } 6354 6355 /* 6356 * Second case : a CDSection 6357 */ 6358 else if ((*cur == '<') && (NXT(1) == '!') && 6359 (NXT(2) == '[') && (NXT(3) == 'C') && 6360 (NXT(4) == 'D') && (NXT(5) == 'A') && 6361 (NXT(6) == 'T') && (NXT(7) == 'A') && 6362 (NXT(8) == '[')) { 6363 xmlParseCDSect(ctxt); 6364 } 6365 6366 /* 6367 * Third case : a comment 6368 */ 6369 else if ((*cur == '<') && (NXT(1) == '!') && 6370 (NXT(2) == '-') && (NXT(3) == '-')) { 6371 xmlParseComment(ctxt); 6372 ctxt->instate = XML_PARSER_CONTENT; 6373 } 6374 6375 /* 6376 * Fourth case : a sub-element. 6377 */ 6378 else if (*cur == '<') { 6379 xmlParseElement(ctxt); 6380 } 6381 6382 /* 6383 * Fifth case : a reference. If if has not been resolved, 6384 * parsing returns it's Name, create the node 6385 */ 6386 6387 else if (*cur == '&') { 6388 xmlParseReference(ctxt); 6389 } 6390 6391 /* 6392 * Last case, text. Note that References are handled directly. 6393 */ 6394 else { 6395 xmlParseCharData(ctxt, 0); 6396 } 6397 6398 GROW; 6399 /* 6400 * Pop-up of finished entities. 6401 */ 6402 while ((RAW == 0) && (ctxt->inputNr > 1)) 6403 xmlPopInput(ctxt); 6404 SHRINK; 6405 6406 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6407 (tok == ctxt->token)) { 6408 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6410 ctxt->sax->error(ctxt->userData, 6411 "detected an error in element content\n"); 6412 ctxt->wellFormed = 0; 6413 ctxt->disableSAX = 1; 6414 ctxt->instate = XML_PARSER_EOF; 6415 break; 6416 } 6417 } 6418} 6419 6420/** 6421 * xmlParseElement: 6422 * @ctxt: an XML parser context 6423 * 6424 * parse an XML element, this is highly recursive 6425 * 6426 * [39] element ::= EmptyElemTag | STag content ETag 6427 * 6428 * [ WFC: Element Type Match ] 6429 * The Name in an element's end-tag must match the element type in the 6430 * start-tag. 6431 * 6432 * [ VC: Element Valid ] 6433 * An element is valid if there is a declaration matching elementdecl 6434 * where the Name matches the element type and one of the following holds: 6435 * - The declaration matches EMPTY and the element has no content. 6436 * - The declaration matches children and the sequence of child elements 6437 * belongs to the language generated by the regular expression in the 6438 * content model, with optional white space (characters matching the 6439 * nonterminal S) between each pair of child elements. 6440 * - The declaration matches Mixed and the content consists of character 6441 * data and child elements whose types match names in the content model. 6442 * - The declaration matches ANY, and the types of any child elements have 6443 * been declared. 6444 */ 6445 6446void 6447xmlParseElement(xmlParserCtxtPtr ctxt) { 6448 const xmlChar *openTag = CUR_PTR; 6449 xmlChar *name; 6450 xmlChar *oldname; 6451 xmlParserNodeInfo node_info; 6452 xmlNodePtr ret; 6453 6454 /* Capture start position */ 6455 if (ctxt->record_info) { 6456 node_info.begin_pos = ctxt->input->consumed + 6457 (CUR_PTR - ctxt->input->base); 6458 node_info.begin_line = ctxt->input->line; 6459 } 6460 6461 if (ctxt->spaceNr == 0) 6462 spacePush(ctxt, -1); 6463 else 6464 spacePush(ctxt, *ctxt->space); 6465 6466 name = xmlParseStartTag(ctxt); 6467 if (name == NULL) { 6468 spacePop(ctxt); 6469 return; 6470 } 6471 namePush(ctxt, name); 6472 ret = ctxt->node; 6473 6474 /* 6475 * [ VC: Root Element Type ] 6476 * The Name in the document type declaration must match the element 6477 * type of the root element. 6478 */ 6479 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6480 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6481 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6482 6483 /* 6484 * Check for an Empty Element. 6485 */ 6486 if ((RAW == '/') && (NXT(1) == '>')) { 6487 SKIP(2); 6488 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6489 (!ctxt->disableSAX)) 6490 ctxt->sax->endElement(ctxt->userData, name); 6491 oldname = namePop(ctxt); 6492 spacePop(ctxt); 6493 if (oldname != NULL) { 6494#ifdef DEBUG_STACK 6495 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6496#endif 6497 xmlFree(oldname); 6498 } 6499 if ( ret != NULL && ctxt->record_info ) { 6500 node_info.end_pos = ctxt->input->consumed + 6501 (CUR_PTR - ctxt->input->base); 6502 node_info.end_line = ctxt->input->line; 6503 node_info.node = ret; 6504 xmlParserAddNodeInfo(ctxt, &node_info); 6505 } 6506 return; 6507 } 6508 if (RAW == '>') { 6509 NEXT1; 6510 } else { 6511 ctxt->errNo = XML_ERR_GT_REQUIRED; 6512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6513 ctxt->sax->error(ctxt->userData, 6514 "Couldn't find end of Start Tag\n%.30s\n", 6515 openTag); 6516 ctxt->wellFormed = 0; 6517 ctxt->disableSAX = 1; 6518 6519 /* 6520 * end of parsing of this node. 6521 */ 6522 nodePop(ctxt); 6523 oldname = namePop(ctxt); 6524 spacePop(ctxt); 6525 if (oldname != NULL) { 6526#ifdef DEBUG_STACK 6527 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6528#endif 6529 xmlFree(oldname); 6530 } 6531 6532 /* 6533 * Capture end position and add node 6534 */ 6535 if ( ret != NULL && ctxt->record_info ) { 6536 node_info.end_pos = ctxt->input->consumed + 6537 (CUR_PTR - ctxt->input->base); 6538 node_info.end_line = ctxt->input->line; 6539 node_info.node = ret; 6540 xmlParserAddNodeInfo(ctxt, &node_info); 6541 } 6542 return; 6543 } 6544 6545 /* 6546 * Parse the content of the element: 6547 */ 6548 xmlParseContent(ctxt); 6549 if (!IS_CHAR(RAW)) { 6550 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6552 ctxt->sax->error(ctxt->userData, 6553 "Premature end of data in tag %.30s\n", openTag); 6554 ctxt->wellFormed = 0; 6555 ctxt->disableSAX = 1; 6556 6557 /* 6558 * end of parsing of this node. 6559 */ 6560 nodePop(ctxt); 6561 oldname = namePop(ctxt); 6562 spacePop(ctxt); 6563 if (oldname != NULL) { 6564#ifdef DEBUG_STACK 6565 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6566#endif 6567 xmlFree(oldname); 6568 } 6569 return; 6570 } 6571 6572 /* 6573 * parse the end of tag: '</' should be here. 6574 */ 6575 xmlParseEndTag(ctxt); 6576 6577 /* 6578 * Capture end position and add node 6579 */ 6580 if ( ret != NULL && ctxt->record_info ) { 6581 node_info.end_pos = ctxt->input->consumed + 6582 (CUR_PTR - ctxt->input->base); 6583 node_info.end_line = ctxt->input->line; 6584 node_info.node = ret; 6585 xmlParserAddNodeInfo(ctxt, &node_info); 6586 } 6587} 6588 6589/** 6590 * xmlParseVersionNum: 6591 * @ctxt: an XML parser context 6592 * 6593 * parse the XML version value. 6594 * 6595 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6596 * 6597 * Returns the string giving the XML version number, or NULL 6598 */ 6599xmlChar * 6600xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6601 xmlChar *buf = NULL; 6602 int len = 0; 6603 int size = 10; 6604 xmlChar cur; 6605 6606 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6607 if (buf == NULL) { 6608 xmlGenericError(xmlGenericErrorContext, 6609 "malloc of %d byte failed\n", size); 6610 return(NULL); 6611 } 6612 cur = CUR; 6613 while (((cur >= 'a') && (cur <= 'z')) || 6614 ((cur >= 'A') && (cur <= 'Z')) || 6615 ((cur >= '0') && (cur <= '9')) || 6616 (cur == '_') || (cur == '.') || 6617 (cur == ':') || (cur == '-')) { 6618 if (len + 1 >= size) { 6619 size *= 2; 6620 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6621 if (buf == NULL) { 6622 xmlGenericError(xmlGenericErrorContext, 6623 "realloc of %d byte failed\n", size); 6624 return(NULL); 6625 } 6626 } 6627 buf[len++] = cur; 6628 NEXT; 6629 cur=CUR; 6630 } 6631 buf[len] = 0; 6632 return(buf); 6633} 6634 6635/** 6636 * xmlParseVersionInfo: 6637 * @ctxt: an XML parser context 6638 * 6639 * parse the XML version. 6640 * 6641 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6642 * 6643 * [25] Eq ::= S? '=' S? 6644 * 6645 * Returns the version string, e.g. "1.0" 6646 */ 6647 6648xmlChar * 6649xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6650 xmlChar *version = NULL; 6651 const xmlChar *q; 6652 6653 if ((RAW == 'v') && (NXT(1) == 'e') && 6654 (NXT(2) == 'r') && (NXT(3) == 's') && 6655 (NXT(4) == 'i') && (NXT(5) == 'o') && 6656 (NXT(6) == 'n')) { 6657 SKIP(7); 6658 SKIP_BLANKS; 6659 if (RAW != '=') { 6660 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6662 ctxt->sax->error(ctxt->userData, 6663 "xmlParseVersionInfo : expected '='\n"); 6664 ctxt->wellFormed = 0; 6665 ctxt->disableSAX = 1; 6666 return(NULL); 6667 } 6668 NEXT; 6669 SKIP_BLANKS; 6670 if (RAW == '"') { 6671 NEXT; 6672 q = CUR_PTR; 6673 version = xmlParseVersionNum(ctxt); 6674 if (RAW != '"') { 6675 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6677 ctxt->sax->error(ctxt->userData, 6678 "String not closed\n%.50s\n", q); 6679 ctxt->wellFormed = 0; 6680 ctxt->disableSAX = 1; 6681 } else 6682 NEXT; 6683 } else if (RAW == '\''){ 6684 NEXT; 6685 q = CUR_PTR; 6686 version = xmlParseVersionNum(ctxt); 6687 if (RAW != '\'') { 6688 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6690 ctxt->sax->error(ctxt->userData, 6691 "String not closed\n%.50s\n", q); 6692 ctxt->wellFormed = 0; 6693 ctxt->disableSAX = 1; 6694 } else 6695 NEXT; 6696 } else { 6697 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6699 ctxt->sax->error(ctxt->userData, 6700 "xmlParseVersionInfo : expected ' or \"\n"); 6701 ctxt->wellFormed = 0; 6702 ctxt->disableSAX = 1; 6703 } 6704 } 6705 return(version); 6706} 6707 6708/** 6709 * xmlParseEncName: 6710 * @ctxt: an XML parser context 6711 * 6712 * parse the XML encoding name 6713 * 6714 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6715 * 6716 * Returns the encoding name value or NULL 6717 */ 6718xmlChar * 6719xmlParseEncName(xmlParserCtxtPtr ctxt) { 6720 xmlChar *buf = NULL; 6721 int len = 0; 6722 int size = 10; 6723 xmlChar cur; 6724 6725 cur = CUR; 6726 if (((cur >= 'a') && (cur <= 'z')) || 6727 ((cur >= 'A') && (cur <= 'Z'))) { 6728 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6729 if (buf == NULL) { 6730 xmlGenericError(xmlGenericErrorContext, 6731 "malloc of %d byte failed\n", size); 6732 return(NULL); 6733 } 6734 6735 buf[len++] = cur; 6736 NEXT; 6737 cur = CUR; 6738 while (((cur >= 'a') && (cur <= 'z')) || 6739 ((cur >= 'A') && (cur <= 'Z')) || 6740 ((cur >= '0') && (cur <= '9')) || 6741 (cur == '.') || (cur == '_') || 6742 (cur == '-')) { 6743 if (len + 1 >= size) { 6744 size *= 2; 6745 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6746 if (buf == NULL) { 6747 xmlGenericError(xmlGenericErrorContext, 6748 "realloc of %d byte failed\n", size); 6749 return(NULL); 6750 } 6751 } 6752 buf[len++] = cur; 6753 NEXT; 6754 cur = CUR; 6755 if (cur == 0) { 6756 SHRINK; 6757 GROW; 6758 cur = CUR; 6759 } 6760 } 6761 buf[len] = 0; 6762 } else { 6763 ctxt->errNo = XML_ERR_ENCODING_NAME; 6764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6765 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 6766 ctxt->wellFormed = 0; 6767 ctxt->disableSAX = 1; 6768 } 6769 return(buf); 6770} 6771 6772/** 6773 * xmlParseEncodingDecl: 6774 * @ctxt: an XML parser context 6775 * 6776 * parse the XML encoding declaration 6777 * 6778 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 6779 * 6780 * this setups the conversion filters. 6781 * 6782 * Returns the encoding value or NULL 6783 */ 6784 6785xmlChar * 6786xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 6787 xmlChar *encoding = NULL; 6788 const xmlChar *q; 6789 6790 SKIP_BLANKS; 6791 if ((RAW == 'e') && (NXT(1) == 'n') && 6792 (NXT(2) == 'c') && (NXT(3) == 'o') && 6793 (NXT(4) == 'd') && (NXT(5) == 'i') && 6794 (NXT(6) == 'n') && (NXT(7) == 'g')) { 6795 SKIP(8); 6796 SKIP_BLANKS; 6797 if (RAW != '=') { 6798 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6800 ctxt->sax->error(ctxt->userData, 6801 "xmlParseEncodingDecl : expected '='\n"); 6802 ctxt->wellFormed = 0; 6803 ctxt->disableSAX = 1; 6804 return(NULL); 6805 } 6806 NEXT; 6807 SKIP_BLANKS; 6808 if (RAW == '"') { 6809 NEXT; 6810 q = CUR_PTR; 6811 encoding = xmlParseEncName(ctxt); 6812 if (RAW != '"') { 6813 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6815 ctxt->sax->error(ctxt->userData, 6816 "String not closed\n%.50s\n", q); 6817 ctxt->wellFormed = 0; 6818 ctxt->disableSAX = 1; 6819 } else 6820 NEXT; 6821 } else if (RAW == '\''){ 6822 NEXT; 6823 q = CUR_PTR; 6824 encoding = xmlParseEncName(ctxt); 6825 if (RAW != '\'') { 6826 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6828 ctxt->sax->error(ctxt->userData, 6829 "String not closed\n%.50s\n", q); 6830 ctxt->wellFormed = 0; 6831 ctxt->disableSAX = 1; 6832 } else 6833 NEXT; 6834 } else if (RAW == '"'){ 6835 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6837 ctxt->sax->error(ctxt->userData, 6838 "xmlParseEncodingDecl : expected ' or \"\n"); 6839 ctxt->wellFormed = 0; 6840 ctxt->disableSAX = 1; 6841 } 6842 if (encoding != NULL) { 6843 xmlCharEncoding enc; 6844 xmlCharEncodingHandlerPtr handler; 6845 6846 if (ctxt->input->encoding != NULL) 6847 xmlFree((xmlChar *) ctxt->input->encoding); 6848 ctxt->input->encoding = encoding; 6849 6850 enc = xmlParseCharEncoding((const char *) encoding); 6851 /* 6852 * registered set of known encodings 6853 */ 6854 if (enc != XML_CHAR_ENCODING_ERROR) { 6855 xmlSwitchEncoding(ctxt, enc); 6856 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6857 xmlFree(encoding); 6858 return(NULL); 6859 } 6860 } else { 6861 /* 6862 * fallback for unknown encodings 6863 */ 6864 handler = xmlFindCharEncodingHandler((const char *) encoding); 6865 if (handler != NULL) { 6866 xmlSwitchToEncoding(ctxt, handler); 6867 } else { 6868 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 6869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6870 ctxt->sax->error(ctxt->userData, 6871 "Unsupported encoding %s\n", encoding); 6872 return(NULL); 6873 } 6874 } 6875 } 6876 } 6877 return(encoding); 6878} 6879 6880/** 6881 * xmlParseSDDecl: 6882 * @ctxt: an XML parser context 6883 * 6884 * parse the XML standalone declaration 6885 * 6886 * [32] SDDecl ::= S 'standalone' Eq 6887 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 6888 * 6889 * [ VC: Standalone Document Declaration ] 6890 * TODO The standalone document declaration must have the value "no" 6891 * if any external markup declarations contain declarations of: 6892 * - attributes with default values, if elements to which these 6893 * attributes apply appear in the document without specifications 6894 * of values for these attributes, or 6895 * - entities (other than amp, lt, gt, apos, quot), if references 6896 * to those entities appear in the document, or 6897 * - attributes with values subject to normalization, where the 6898 * attribute appears in the document with a value which will change 6899 * as a result of normalization, or 6900 * - element types with element content, if white space occurs directly 6901 * within any instance of those types. 6902 * 6903 * Returns 1 if standalone, 0 otherwise 6904 */ 6905 6906int 6907xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 6908 int standalone = -1; 6909 6910 SKIP_BLANKS; 6911 if ((RAW == 's') && (NXT(1) == 't') && 6912 (NXT(2) == 'a') && (NXT(3) == 'n') && 6913 (NXT(4) == 'd') && (NXT(5) == 'a') && 6914 (NXT(6) == 'l') && (NXT(7) == 'o') && 6915 (NXT(8) == 'n') && (NXT(9) == 'e')) { 6916 SKIP(10); 6917 SKIP_BLANKS; 6918 if (RAW != '=') { 6919 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6921 ctxt->sax->error(ctxt->userData, 6922 "XML standalone declaration : expected '='\n"); 6923 ctxt->wellFormed = 0; 6924 ctxt->disableSAX = 1; 6925 return(standalone); 6926 } 6927 NEXT; 6928 SKIP_BLANKS; 6929 if (RAW == '\''){ 6930 NEXT; 6931 if ((RAW == 'n') && (NXT(1) == 'o')) { 6932 standalone = 0; 6933 SKIP(2); 6934 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6935 (NXT(2) == 's')) { 6936 standalone = 1; 6937 SKIP(3); 6938 } else { 6939 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6941 ctxt->sax->error(ctxt->userData, 6942 "standalone accepts only 'yes' or 'no'\n"); 6943 ctxt->wellFormed = 0; 6944 ctxt->disableSAX = 1; 6945 } 6946 if (RAW != '\'') { 6947 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6949 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6950 ctxt->wellFormed = 0; 6951 ctxt->disableSAX = 1; 6952 } else 6953 NEXT; 6954 } else if (RAW == '"'){ 6955 NEXT; 6956 if ((RAW == 'n') && (NXT(1) == 'o')) { 6957 standalone = 0; 6958 SKIP(2); 6959 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6960 (NXT(2) == 's')) { 6961 standalone = 1; 6962 SKIP(3); 6963 } else { 6964 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6966 ctxt->sax->error(ctxt->userData, 6967 "standalone accepts only 'yes' or 'no'\n"); 6968 ctxt->wellFormed = 0; 6969 ctxt->disableSAX = 1; 6970 } 6971 if (RAW != '"') { 6972 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6974 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6975 ctxt->wellFormed = 0; 6976 ctxt->disableSAX = 1; 6977 } else 6978 NEXT; 6979 } else { 6980 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6982 ctxt->sax->error(ctxt->userData, 6983 "Standalone value not found\n"); 6984 ctxt->wellFormed = 0; 6985 ctxt->disableSAX = 1; 6986 } 6987 } 6988 return(standalone); 6989} 6990 6991/** 6992 * xmlParseXMLDecl: 6993 * @ctxt: an XML parser context 6994 * 6995 * parse an XML declaration header 6996 * 6997 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 6998 */ 6999 7000void 7001xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7002 xmlChar *version; 7003 7004 /* 7005 * We know that '<?xml' is here. 7006 */ 7007 SKIP(5); 7008 7009 if (!IS_BLANK(RAW)) { 7010 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7012 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7013 ctxt->wellFormed = 0; 7014 ctxt->disableSAX = 1; 7015 } 7016 SKIP_BLANKS; 7017 7018 /* 7019 * We should have the VersionInfo here. 7020 */ 7021 version = xmlParseVersionInfo(ctxt); 7022 if (version == NULL) 7023 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7024 ctxt->version = xmlStrdup(version); 7025 xmlFree(version); 7026 7027 /* 7028 * We may have the encoding declaration 7029 */ 7030 if (!IS_BLANK(RAW)) { 7031 if ((RAW == '?') && (NXT(1) == '>')) { 7032 SKIP(2); 7033 return; 7034 } 7035 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7037 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7038 ctxt->wellFormed = 0; 7039 ctxt->disableSAX = 1; 7040 } 7041 xmlParseEncodingDecl(ctxt); 7042 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7043 /* 7044 * The XML REC instructs us to stop parsing right here 7045 */ 7046 return; 7047 } 7048 7049 /* 7050 * We may have the standalone status. 7051 */ 7052 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7053 if ((RAW == '?') && (NXT(1) == '>')) { 7054 SKIP(2); 7055 return; 7056 } 7057 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7059 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7060 ctxt->wellFormed = 0; 7061 ctxt->disableSAX = 1; 7062 } 7063 SKIP_BLANKS; 7064 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7065 7066 SKIP_BLANKS; 7067 if ((RAW == '?') && (NXT(1) == '>')) { 7068 SKIP(2); 7069 } else if (RAW == '>') { 7070 /* Deprecated old WD ... */ 7071 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7073 ctxt->sax->error(ctxt->userData, 7074 "XML declaration must end-up with '?>'\n"); 7075 ctxt->wellFormed = 0; 7076 ctxt->disableSAX = 1; 7077 NEXT; 7078 } else { 7079 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7081 ctxt->sax->error(ctxt->userData, 7082 "parsing XML declaration: '?>' expected\n"); 7083 ctxt->wellFormed = 0; 7084 ctxt->disableSAX = 1; 7085 MOVETO_ENDTAG(CUR_PTR); 7086 NEXT; 7087 } 7088} 7089 7090/** 7091 * xmlParseMisc: 7092 * @ctxt: an XML parser context 7093 * 7094 * parse an XML Misc* optionnal field. 7095 * 7096 * [27] Misc ::= Comment | PI | S 7097 */ 7098 7099void 7100xmlParseMisc(xmlParserCtxtPtr ctxt) { 7101 while (((RAW == '<') && (NXT(1) == '?')) || 7102 ((RAW == '<') && (NXT(1) == '!') && 7103 (NXT(2) == '-') && (NXT(3) == '-')) || 7104 IS_BLANK(CUR)) { 7105 if ((RAW == '<') && (NXT(1) == '?')) { 7106 xmlParsePI(ctxt); 7107 } else if (IS_BLANK(CUR)) { 7108 NEXT; 7109 } else 7110 xmlParseComment(ctxt); 7111 } 7112} 7113 7114/** 7115 * xmlParseDocument: 7116 * @ctxt: an XML parser context 7117 * 7118 * parse an XML document (and build a tree if using the standard SAX 7119 * interface). 7120 * 7121 * [1] document ::= prolog element Misc* 7122 * 7123 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7124 * 7125 * Returns 0, -1 in case of error. the parser context is augmented 7126 * as a result of the parsing. 7127 */ 7128 7129int 7130xmlParseDocument(xmlParserCtxtPtr ctxt) { 7131 xmlChar start[4]; 7132 xmlCharEncoding enc; 7133 7134 xmlInitParser(); 7135 7136 GROW; 7137 7138 /* 7139 * SAX: beginning of the document processing. 7140 */ 7141 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7142 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7143 7144 /* 7145 * Get the 4 first bytes and decode the charset 7146 * if enc != XML_CHAR_ENCODING_NONE 7147 * plug some encoding conversion routines. 7148 */ 7149 start[0] = RAW; 7150 start[1] = NXT(1); 7151 start[2] = NXT(2); 7152 start[3] = NXT(3); 7153 enc = xmlDetectCharEncoding(start, 4); 7154 if (enc != XML_CHAR_ENCODING_NONE) { 7155 xmlSwitchEncoding(ctxt, enc); 7156 } 7157 7158 7159 if (CUR == 0) { 7160 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7162 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7163 ctxt->wellFormed = 0; 7164 ctxt->disableSAX = 1; 7165 } 7166 7167 /* 7168 * Check for the XMLDecl in the Prolog. 7169 */ 7170 GROW; 7171 if ((RAW == '<') && (NXT(1) == '?') && 7172 (NXT(2) == 'x') && (NXT(3) == 'm') && 7173 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7174 7175 /* 7176 * Note that we will switch encoding on the fly. 7177 */ 7178 xmlParseXMLDecl(ctxt); 7179 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7180 /* 7181 * The XML REC instructs us to stop parsing right here 7182 */ 7183 return(-1); 7184 } 7185 ctxt->standalone = ctxt->input->standalone; 7186 SKIP_BLANKS; 7187 } else { 7188 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7189 } 7190 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7191 ctxt->sax->startDocument(ctxt->userData); 7192 7193 /* 7194 * The Misc part of the Prolog 7195 */ 7196 GROW; 7197 xmlParseMisc(ctxt); 7198 7199 /* 7200 * Then possibly doc type declaration(s) and more Misc 7201 * (doctypedecl Misc*)? 7202 */ 7203 GROW; 7204 if ((RAW == '<') && (NXT(1) == '!') && 7205 (NXT(2) == 'D') && (NXT(3) == 'O') && 7206 (NXT(4) == 'C') && (NXT(5) == 'T') && 7207 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7208 (NXT(8) == 'E')) { 7209 7210 ctxt->inSubset = 1; 7211 xmlParseDocTypeDecl(ctxt); 7212 if (RAW == '[') { 7213 ctxt->instate = XML_PARSER_DTD; 7214 xmlParseInternalSubset(ctxt); 7215 } 7216 7217 /* 7218 * Create and update the external subset. 7219 */ 7220 ctxt->inSubset = 2; 7221 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7222 (!ctxt->disableSAX)) 7223 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7224 ctxt->extSubSystem, ctxt->extSubURI); 7225 ctxt->inSubset = 0; 7226 7227 7228 ctxt->instate = XML_PARSER_PROLOG; 7229 xmlParseMisc(ctxt); 7230 } 7231 7232 /* 7233 * Time to start parsing the tree itself 7234 */ 7235 GROW; 7236 if (RAW != '<') { 7237 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7239 ctxt->sax->error(ctxt->userData, 7240 "Start tag expected, '<' not found\n"); 7241 ctxt->wellFormed = 0; 7242 ctxt->disableSAX = 1; 7243 ctxt->instate = XML_PARSER_EOF; 7244 } else { 7245 ctxt->instate = XML_PARSER_CONTENT; 7246 xmlParseElement(ctxt); 7247 ctxt->instate = XML_PARSER_EPILOG; 7248 7249 7250 /* 7251 * The Misc part at the end 7252 */ 7253 xmlParseMisc(ctxt); 7254 7255 if (RAW != 0) { 7256 ctxt->errNo = XML_ERR_DOCUMENT_END; 7257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7258 ctxt->sax->error(ctxt->userData, 7259 "Extra content at the end of the document\n"); 7260 ctxt->wellFormed = 0; 7261 ctxt->disableSAX = 1; 7262 } 7263 ctxt->instate = XML_PARSER_EOF; 7264 } 7265 7266 /* 7267 * SAX: end of the document processing. 7268 */ 7269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7270 (!ctxt->disableSAX)) 7271 ctxt->sax->endDocument(ctxt->userData); 7272 7273 if (! ctxt->wellFormed) return(-1); 7274 return(0); 7275} 7276 7277/** 7278 * xmlParseExtParsedEnt: 7279 * @ctxt: an XML parser context 7280 * 7281 * parse a genreral parsed entity 7282 * An external general parsed entity is well-formed if it matches the 7283 * production labeled extParsedEnt. 7284 * 7285 * [78] extParsedEnt ::= TextDecl? content 7286 * 7287 * Returns 0, -1 in case of error. the parser context is augmented 7288 * as a result of the parsing. 7289 */ 7290 7291int 7292xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7293 xmlChar start[4]; 7294 xmlCharEncoding enc; 7295 7296 xmlDefaultSAXHandlerInit(); 7297 7298 GROW; 7299 7300 /* 7301 * SAX: beginning of the document processing. 7302 */ 7303 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7304 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7305 7306 /* 7307 * Get the 4 first bytes and decode the charset 7308 * if enc != XML_CHAR_ENCODING_NONE 7309 * plug some encoding conversion routines. 7310 */ 7311 start[0] = RAW; 7312 start[1] = NXT(1); 7313 start[2] = NXT(2); 7314 start[3] = NXT(3); 7315 enc = xmlDetectCharEncoding(start, 4); 7316 if (enc != XML_CHAR_ENCODING_NONE) { 7317 xmlSwitchEncoding(ctxt, enc); 7318 } 7319 7320 7321 if (CUR == 0) { 7322 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7324 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7325 ctxt->wellFormed = 0; 7326 ctxt->disableSAX = 1; 7327 } 7328 7329 /* 7330 * Check for the XMLDecl in the Prolog. 7331 */ 7332 GROW; 7333 if ((RAW == '<') && (NXT(1) == '?') && 7334 (NXT(2) == 'x') && (NXT(3) == 'm') && 7335 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7336 7337 /* 7338 * Note that we will switch encoding on the fly. 7339 */ 7340 xmlParseXMLDecl(ctxt); 7341 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7342 /* 7343 * The XML REC instructs us to stop parsing right here 7344 */ 7345 return(-1); 7346 } 7347 SKIP_BLANKS; 7348 } else { 7349 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7350 } 7351 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7352 ctxt->sax->startDocument(ctxt->userData); 7353 7354 /* 7355 * Doing validity checking on chunk doesn't make sense 7356 */ 7357 ctxt->instate = XML_PARSER_CONTENT; 7358 ctxt->validate = 0; 7359 ctxt->loadsubset = 0; 7360 ctxt->depth = 0; 7361 7362 xmlParseContent(ctxt); 7363 7364 if ((RAW == '<') && (NXT(1) == '/')) { 7365 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7367 ctxt->sax->error(ctxt->userData, 7368 "chunk is not well balanced\n"); 7369 ctxt->wellFormed = 0; 7370 ctxt->disableSAX = 1; 7371 } else if (RAW != 0) { 7372 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7374 ctxt->sax->error(ctxt->userData, 7375 "extra content at the end of well balanced chunk\n"); 7376 ctxt->wellFormed = 0; 7377 ctxt->disableSAX = 1; 7378 } 7379 7380 /* 7381 * SAX: end of the document processing. 7382 */ 7383 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7384 (!ctxt->disableSAX)) 7385 ctxt->sax->endDocument(ctxt->userData); 7386 7387 if (! ctxt->wellFormed) return(-1); 7388 return(0); 7389} 7390 7391/************************************************************************ 7392 * * 7393 * Progressive parsing interfaces * 7394 * * 7395 ************************************************************************/ 7396 7397/** 7398 * xmlParseLookupSequence: 7399 * @ctxt: an XML parser context 7400 * @first: the first char to lookup 7401 * @next: the next char to lookup or zero 7402 * @third: the next char to lookup or zero 7403 * 7404 * Try to find if a sequence (first, next, third) or just (first next) or 7405 * (first) is available in the input stream. 7406 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7407 * to avoid rescanning sequences of bytes, it DOES change the state of the 7408 * parser, do not use liberally. 7409 * 7410 * Returns the index to the current parsing point if the full sequence 7411 * is available, -1 otherwise. 7412 */ 7413static int 7414xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7415 xmlChar next, xmlChar third) { 7416 int base, len; 7417 xmlParserInputPtr in; 7418 const xmlChar *buf; 7419 7420 in = ctxt->input; 7421 if (in == NULL) return(-1); 7422 base = in->cur - in->base; 7423 if (base < 0) return(-1); 7424 if (ctxt->checkIndex > base) 7425 base = ctxt->checkIndex; 7426 if (in->buf == NULL) { 7427 buf = in->base; 7428 len = in->length; 7429 } else { 7430 buf = in->buf->buffer->content; 7431 len = in->buf->buffer->use; 7432 } 7433 /* take into account the sequence length */ 7434 if (third) len -= 2; 7435 else if (next) len --; 7436 for (;base < len;base++) { 7437 if (buf[base] == first) { 7438 if (third != 0) { 7439 if ((buf[base + 1] != next) || 7440 (buf[base + 2] != third)) continue; 7441 } else if (next != 0) { 7442 if (buf[base + 1] != next) continue; 7443 } 7444 ctxt->checkIndex = 0; 7445#ifdef DEBUG_PUSH 7446 if (next == 0) 7447 xmlGenericError(xmlGenericErrorContext, 7448 "PP: lookup '%c' found at %d\n", 7449 first, base); 7450 else if (third == 0) 7451 xmlGenericError(xmlGenericErrorContext, 7452 "PP: lookup '%c%c' found at %d\n", 7453 first, next, base); 7454 else 7455 xmlGenericError(xmlGenericErrorContext, 7456 "PP: lookup '%c%c%c' found at %d\n", 7457 first, next, third, base); 7458#endif 7459 return(base - (in->cur - in->base)); 7460 } 7461 } 7462 ctxt->checkIndex = base; 7463#ifdef DEBUG_PUSH 7464 if (next == 0) 7465 xmlGenericError(xmlGenericErrorContext, 7466 "PP: lookup '%c' failed\n", first); 7467 else if (third == 0) 7468 xmlGenericError(xmlGenericErrorContext, 7469 "PP: lookup '%c%c' failed\n", first, next); 7470 else 7471 xmlGenericError(xmlGenericErrorContext, 7472 "PP: lookup '%c%c%c' failed\n", first, next, third); 7473#endif 7474 return(-1); 7475} 7476 7477/** 7478 * xmlParseTryOrFinish: 7479 * @ctxt: an XML parser context 7480 * @terminate: last chunk indicator 7481 * 7482 * Try to progress on parsing 7483 * 7484 * Returns zero if no parsing was possible 7485 */ 7486static int 7487xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7488 int ret = 0; 7489 int avail; 7490 xmlChar cur, next; 7491 7492#ifdef DEBUG_PUSH 7493 switch (ctxt->instate) { 7494 case XML_PARSER_EOF: 7495 xmlGenericError(xmlGenericErrorContext, 7496 "PP: try EOF\n"); break; 7497 case XML_PARSER_START: 7498 xmlGenericError(xmlGenericErrorContext, 7499 "PP: try START\n"); break; 7500 case XML_PARSER_MISC: 7501 xmlGenericError(xmlGenericErrorContext, 7502 "PP: try MISC\n");break; 7503 case XML_PARSER_COMMENT: 7504 xmlGenericError(xmlGenericErrorContext, 7505 "PP: try COMMENT\n");break; 7506 case XML_PARSER_PROLOG: 7507 xmlGenericError(xmlGenericErrorContext, 7508 "PP: try PROLOG\n");break; 7509 case XML_PARSER_START_TAG: 7510 xmlGenericError(xmlGenericErrorContext, 7511 "PP: try START_TAG\n");break; 7512 case XML_PARSER_CONTENT: 7513 xmlGenericError(xmlGenericErrorContext, 7514 "PP: try CONTENT\n");break; 7515 case XML_PARSER_CDATA_SECTION: 7516 xmlGenericError(xmlGenericErrorContext, 7517 "PP: try CDATA_SECTION\n");break; 7518 case XML_PARSER_END_TAG: 7519 xmlGenericError(xmlGenericErrorContext, 7520 "PP: try END_TAG\n");break; 7521 case XML_PARSER_ENTITY_DECL: 7522 xmlGenericError(xmlGenericErrorContext, 7523 "PP: try ENTITY_DECL\n");break; 7524 case XML_PARSER_ENTITY_VALUE: 7525 xmlGenericError(xmlGenericErrorContext, 7526 "PP: try ENTITY_VALUE\n");break; 7527 case XML_PARSER_ATTRIBUTE_VALUE: 7528 xmlGenericError(xmlGenericErrorContext, 7529 "PP: try ATTRIBUTE_VALUE\n");break; 7530 case XML_PARSER_DTD: 7531 xmlGenericError(xmlGenericErrorContext, 7532 "PP: try DTD\n");break; 7533 case XML_PARSER_EPILOG: 7534 xmlGenericError(xmlGenericErrorContext, 7535 "PP: try EPILOG\n");break; 7536 case XML_PARSER_PI: 7537 xmlGenericError(xmlGenericErrorContext, 7538 "PP: try PI\n");break; 7539 case XML_PARSER_IGNORE: 7540 xmlGenericError(xmlGenericErrorContext, 7541 "PP: try IGNORE\n");break; 7542 } 7543#endif 7544 7545 while (1) { 7546 /* 7547 * Pop-up of finished entities. 7548 */ 7549 while ((RAW == 0) && (ctxt->inputNr > 1)) 7550 xmlPopInput(ctxt); 7551 7552 if (ctxt->input ==NULL) break; 7553 if (ctxt->input->buf == NULL) 7554 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7555 else 7556 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7557 if (avail < 1) 7558 goto done; 7559 switch (ctxt->instate) { 7560 case XML_PARSER_EOF: 7561 /* 7562 * Document parsing is done ! 7563 */ 7564 goto done; 7565 case XML_PARSER_START: 7566 /* 7567 * Very first chars read from the document flow. 7568 */ 7569 if (avail < 2) 7570 goto done; 7571 7572 cur = ctxt->input->cur[0]; 7573 next = ctxt->input->cur[1]; 7574 if (cur == 0) { 7575 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7576 ctxt->sax->setDocumentLocator(ctxt->userData, 7577 &xmlDefaultSAXLocator); 7578 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7580 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7581 ctxt->wellFormed = 0; 7582 ctxt->disableSAX = 1; 7583 ctxt->instate = XML_PARSER_EOF; 7584#ifdef DEBUG_PUSH 7585 xmlGenericError(xmlGenericErrorContext, 7586 "PP: entering EOF\n"); 7587#endif 7588 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7589 ctxt->sax->endDocument(ctxt->userData); 7590 goto done; 7591 } 7592 if ((cur == '<') && (next == '?')) { 7593 /* PI or XML decl */ 7594 if (avail < 5) return(ret); 7595 if ((!terminate) && 7596 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7597 return(ret); 7598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7599 ctxt->sax->setDocumentLocator(ctxt->userData, 7600 &xmlDefaultSAXLocator); 7601 if ((ctxt->input->cur[2] == 'x') && 7602 (ctxt->input->cur[3] == 'm') && 7603 (ctxt->input->cur[4] == 'l') && 7604 (IS_BLANK(ctxt->input->cur[5]))) { 7605 ret += 5; 7606#ifdef DEBUG_PUSH 7607 xmlGenericError(xmlGenericErrorContext, 7608 "PP: Parsing XML Decl\n"); 7609#endif 7610 xmlParseXMLDecl(ctxt); 7611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7612 /* 7613 * The XML REC instructs us to stop parsing right 7614 * here 7615 */ 7616 ctxt->instate = XML_PARSER_EOF; 7617 return(0); 7618 } 7619 ctxt->standalone = ctxt->input->standalone; 7620 if ((ctxt->encoding == NULL) && 7621 (ctxt->input->encoding != NULL)) 7622 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 7623 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7624 (!ctxt->disableSAX)) 7625 ctxt->sax->startDocument(ctxt->userData); 7626 ctxt->instate = XML_PARSER_MISC; 7627#ifdef DEBUG_PUSH 7628 xmlGenericError(xmlGenericErrorContext, 7629 "PP: entering MISC\n"); 7630#endif 7631 } else { 7632 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7633 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7634 (!ctxt->disableSAX)) 7635 ctxt->sax->startDocument(ctxt->userData); 7636 ctxt->instate = XML_PARSER_MISC; 7637#ifdef DEBUG_PUSH 7638 xmlGenericError(xmlGenericErrorContext, 7639 "PP: entering MISC\n"); 7640#endif 7641 } 7642 } else { 7643 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7644 ctxt->sax->setDocumentLocator(ctxt->userData, 7645 &xmlDefaultSAXLocator); 7646 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7647 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7648 (!ctxt->disableSAX)) 7649 ctxt->sax->startDocument(ctxt->userData); 7650 ctxt->instate = XML_PARSER_MISC; 7651#ifdef DEBUG_PUSH 7652 xmlGenericError(xmlGenericErrorContext, 7653 "PP: entering MISC\n"); 7654#endif 7655 } 7656 break; 7657 case XML_PARSER_MISC: 7658 SKIP_BLANKS; 7659 if (ctxt->input->buf == NULL) 7660 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7661 else 7662 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7663 if (avail < 2) 7664 goto done; 7665 cur = ctxt->input->cur[0]; 7666 next = ctxt->input->cur[1]; 7667 if ((cur == '<') && (next == '?')) { 7668 if ((!terminate) && 7669 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7670 goto done; 7671#ifdef DEBUG_PUSH 7672 xmlGenericError(xmlGenericErrorContext, 7673 "PP: Parsing PI\n"); 7674#endif 7675 xmlParsePI(ctxt); 7676 } else if ((cur == '<') && (next == '!') && 7677 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7678 if ((!terminate) && 7679 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7680 goto done; 7681#ifdef DEBUG_PUSH 7682 xmlGenericError(xmlGenericErrorContext, 7683 "PP: Parsing Comment\n"); 7684#endif 7685 xmlParseComment(ctxt); 7686 ctxt->instate = XML_PARSER_MISC; 7687 } else if ((cur == '<') && (next == '!') && 7688 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 7689 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 7690 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 7691 (ctxt->input->cur[8] == 'E')) { 7692 if ((!terminate) && 7693 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7694 goto done; 7695#ifdef DEBUG_PUSH 7696 xmlGenericError(xmlGenericErrorContext, 7697 "PP: Parsing internal subset\n"); 7698#endif 7699 ctxt->inSubset = 1; 7700 xmlParseDocTypeDecl(ctxt); 7701 if (RAW == '[') { 7702 ctxt->instate = XML_PARSER_DTD; 7703#ifdef DEBUG_PUSH 7704 xmlGenericError(xmlGenericErrorContext, 7705 "PP: entering DTD\n"); 7706#endif 7707 } else { 7708 /* 7709 * Create and update the external subset. 7710 */ 7711 ctxt->inSubset = 2; 7712 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7713 (ctxt->sax->externalSubset != NULL)) 7714 ctxt->sax->externalSubset(ctxt->userData, 7715 ctxt->intSubName, ctxt->extSubSystem, 7716 ctxt->extSubURI); 7717 ctxt->inSubset = 0; 7718 ctxt->instate = XML_PARSER_PROLOG; 7719#ifdef DEBUG_PUSH 7720 xmlGenericError(xmlGenericErrorContext, 7721 "PP: entering PROLOG\n"); 7722#endif 7723 } 7724 } else if ((cur == '<') && (next == '!') && 7725 (avail < 9)) { 7726 goto done; 7727 } else { 7728 ctxt->instate = XML_PARSER_START_TAG; 7729#ifdef DEBUG_PUSH 7730 xmlGenericError(xmlGenericErrorContext, 7731 "PP: entering START_TAG\n"); 7732#endif 7733 } 7734 break; 7735 case XML_PARSER_IGNORE: 7736 xmlGenericError(xmlGenericErrorContext, 7737 "PP: internal error, state == IGNORE"); 7738 ctxt->instate = XML_PARSER_DTD; 7739#ifdef DEBUG_PUSH 7740 xmlGenericError(xmlGenericErrorContext, 7741 "PP: entering DTD\n"); 7742#endif 7743 break; 7744 case XML_PARSER_PROLOG: 7745 SKIP_BLANKS; 7746 if (ctxt->input->buf == NULL) 7747 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7748 else 7749 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7750 if (avail < 2) 7751 goto done; 7752 cur = ctxt->input->cur[0]; 7753 next = ctxt->input->cur[1]; 7754 if ((cur == '<') && (next == '?')) { 7755 if ((!terminate) && 7756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7757 goto done; 7758#ifdef DEBUG_PUSH 7759 xmlGenericError(xmlGenericErrorContext, 7760 "PP: Parsing PI\n"); 7761#endif 7762 xmlParsePI(ctxt); 7763 } else if ((cur == '<') && (next == '!') && 7764 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7765 if ((!terminate) && 7766 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7767 goto done; 7768#ifdef DEBUG_PUSH 7769 xmlGenericError(xmlGenericErrorContext, 7770 "PP: Parsing Comment\n"); 7771#endif 7772 xmlParseComment(ctxt); 7773 ctxt->instate = XML_PARSER_PROLOG; 7774 } else if ((cur == '<') && (next == '!') && 7775 (avail < 4)) { 7776 goto done; 7777 } else { 7778 ctxt->instate = XML_PARSER_START_TAG; 7779#ifdef DEBUG_PUSH 7780 xmlGenericError(xmlGenericErrorContext, 7781 "PP: entering START_TAG\n"); 7782#endif 7783 } 7784 break; 7785 case XML_PARSER_EPILOG: 7786 SKIP_BLANKS; 7787 if (ctxt->input->buf == NULL) 7788 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7789 else 7790 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7791 if (avail < 2) 7792 goto done; 7793 cur = ctxt->input->cur[0]; 7794 next = ctxt->input->cur[1]; 7795 if ((cur == '<') && (next == '?')) { 7796 if ((!terminate) && 7797 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7798 goto done; 7799#ifdef DEBUG_PUSH 7800 xmlGenericError(xmlGenericErrorContext, 7801 "PP: Parsing PI\n"); 7802#endif 7803 xmlParsePI(ctxt); 7804 ctxt->instate = XML_PARSER_EPILOG; 7805 } else if ((cur == '<') && (next == '!') && 7806 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7807 if ((!terminate) && 7808 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7809 goto done; 7810#ifdef DEBUG_PUSH 7811 xmlGenericError(xmlGenericErrorContext, 7812 "PP: Parsing Comment\n"); 7813#endif 7814 xmlParseComment(ctxt); 7815 ctxt->instate = XML_PARSER_EPILOG; 7816 } else if ((cur == '<') && (next == '!') && 7817 (avail < 4)) { 7818 goto done; 7819 } else { 7820 ctxt->errNo = XML_ERR_DOCUMENT_END; 7821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7822 ctxt->sax->error(ctxt->userData, 7823 "Extra content at the end of the document\n"); 7824 ctxt->wellFormed = 0; 7825 ctxt->disableSAX = 1; 7826 ctxt->instate = XML_PARSER_EOF; 7827#ifdef DEBUG_PUSH 7828 xmlGenericError(xmlGenericErrorContext, 7829 "PP: entering EOF\n"); 7830#endif 7831 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7832 (!ctxt->disableSAX)) 7833 ctxt->sax->endDocument(ctxt->userData); 7834 goto done; 7835 } 7836 break; 7837 case XML_PARSER_START_TAG: { 7838 xmlChar *name, *oldname; 7839 7840 if ((avail < 2) && (ctxt->inputNr == 1)) 7841 goto done; 7842 cur = ctxt->input->cur[0]; 7843 if (cur != '<') { 7844 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7846 ctxt->sax->error(ctxt->userData, 7847 "Start tag expect, '<' not found\n"); 7848 ctxt->wellFormed = 0; 7849 ctxt->disableSAX = 1; 7850 ctxt->instate = XML_PARSER_EOF; 7851#ifdef DEBUG_PUSH 7852 xmlGenericError(xmlGenericErrorContext, 7853 "PP: entering EOF\n"); 7854#endif 7855 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7856 (!ctxt->disableSAX)) 7857 ctxt->sax->endDocument(ctxt->userData); 7858 goto done; 7859 } 7860 if ((!terminate) && 7861 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7862 goto done; 7863 if (ctxt->spaceNr == 0) 7864 spacePush(ctxt, -1); 7865 else 7866 spacePush(ctxt, *ctxt->space); 7867 name = xmlParseStartTag(ctxt); 7868 if (name == NULL) { 7869 spacePop(ctxt); 7870 ctxt->instate = XML_PARSER_EOF; 7871#ifdef DEBUG_PUSH 7872 xmlGenericError(xmlGenericErrorContext, 7873 "PP: entering EOF\n"); 7874#endif 7875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7876 (!ctxt->disableSAX)) 7877 ctxt->sax->endDocument(ctxt->userData); 7878 goto done; 7879 } 7880 namePush(ctxt, xmlStrdup(name)); 7881 7882 /* 7883 * [ VC: Root Element Type ] 7884 * The Name in the document type declaration must match 7885 * the element type of the root element. 7886 */ 7887 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7888 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7889 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7890 7891 /* 7892 * Check for an Empty Element. 7893 */ 7894 if ((RAW == '/') && (NXT(1) == '>')) { 7895 SKIP(2); 7896 if ((ctxt->sax != NULL) && 7897 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 7898 ctxt->sax->endElement(ctxt->userData, name); 7899 xmlFree(name); 7900 oldname = namePop(ctxt); 7901 spacePop(ctxt); 7902 if (oldname != NULL) { 7903#ifdef DEBUG_STACK 7904 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7905#endif 7906 xmlFree(oldname); 7907 } 7908 if (ctxt->name == NULL) { 7909 ctxt->instate = XML_PARSER_EPILOG; 7910#ifdef DEBUG_PUSH 7911 xmlGenericError(xmlGenericErrorContext, 7912 "PP: entering EPILOG\n"); 7913#endif 7914 } else { 7915 ctxt->instate = XML_PARSER_CONTENT; 7916#ifdef DEBUG_PUSH 7917 xmlGenericError(xmlGenericErrorContext, 7918 "PP: entering CONTENT\n"); 7919#endif 7920 } 7921 break; 7922 } 7923 if (RAW == '>') { 7924 NEXT; 7925 } else { 7926 ctxt->errNo = XML_ERR_GT_REQUIRED; 7927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7928 ctxt->sax->error(ctxt->userData, 7929 "Couldn't find end of Start Tag %s\n", 7930 name); 7931 ctxt->wellFormed = 0; 7932 ctxt->disableSAX = 1; 7933 7934 /* 7935 * end of parsing of this node. 7936 */ 7937 nodePop(ctxt); 7938 oldname = namePop(ctxt); 7939 spacePop(ctxt); 7940 if (oldname != NULL) { 7941#ifdef DEBUG_STACK 7942 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7943#endif 7944 xmlFree(oldname); 7945 } 7946 } 7947 xmlFree(name); 7948 ctxt->instate = XML_PARSER_CONTENT; 7949#ifdef DEBUG_PUSH 7950 xmlGenericError(xmlGenericErrorContext, 7951 "PP: entering CONTENT\n"); 7952#endif 7953 break; 7954 } 7955 case XML_PARSER_CONTENT: { 7956 const xmlChar *test; 7957 int cons; 7958 int tok; 7959 7960 /* 7961 * Handle preparsed entities and charRef 7962 */ 7963 if (ctxt->token != 0) { 7964 xmlChar current[2] = { 0 , 0 } ; 7965 7966 current[0] = (xmlChar) ctxt->token; 7967 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7968 (ctxt->sax->characters != NULL)) 7969 ctxt->sax->characters(ctxt->userData, current, 1); 7970 ctxt->token = 0; 7971 } 7972 if ((avail < 2) && (ctxt->inputNr == 1)) 7973 goto done; 7974 cur = ctxt->input->cur[0]; 7975 next = ctxt->input->cur[1]; 7976 7977 test = CUR_PTR; 7978 cons = ctxt->input->consumed; 7979 tok = ctxt->token; 7980 if ((cur == '<') && (next == '?')) { 7981 if ((!terminate) && 7982 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7983 goto done; 7984#ifdef DEBUG_PUSH 7985 xmlGenericError(xmlGenericErrorContext, 7986 "PP: Parsing PI\n"); 7987#endif 7988 xmlParsePI(ctxt); 7989 } else if ((cur == '<') && (next == '!') && 7990 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7991 if ((!terminate) && 7992 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7993 goto done; 7994#ifdef DEBUG_PUSH 7995 xmlGenericError(xmlGenericErrorContext, 7996 "PP: Parsing Comment\n"); 7997#endif 7998 xmlParseComment(ctxt); 7999 ctxt->instate = XML_PARSER_CONTENT; 8000 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8001 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8002 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8003 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8004 (ctxt->input->cur[8] == '[')) { 8005 SKIP(9); 8006 ctxt->instate = XML_PARSER_CDATA_SECTION; 8007#ifdef DEBUG_PUSH 8008 xmlGenericError(xmlGenericErrorContext, 8009 "PP: entering CDATA_SECTION\n"); 8010#endif 8011 break; 8012 } else if ((cur == '<') && (next == '!') && 8013 (avail < 9)) { 8014 goto done; 8015 } else if ((cur == '<') && (next == '/')) { 8016 ctxt->instate = XML_PARSER_END_TAG; 8017#ifdef DEBUG_PUSH 8018 xmlGenericError(xmlGenericErrorContext, 8019 "PP: entering END_TAG\n"); 8020#endif 8021 break; 8022 } else if (cur == '<') { 8023 ctxt->instate = XML_PARSER_START_TAG; 8024#ifdef DEBUG_PUSH 8025 xmlGenericError(xmlGenericErrorContext, 8026 "PP: entering START_TAG\n"); 8027#endif 8028 break; 8029 } else if (cur == '&') { 8030 if ((!terminate) && 8031 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8032 goto done; 8033#ifdef DEBUG_PUSH 8034 xmlGenericError(xmlGenericErrorContext, 8035 "PP: Parsing Reference\n"); 8036#endif 8037 xmlParseReference(ctxt); 8038 } else { 8039 /* TODO Avoid the extra copy, handle directly !!! */ 8040 /* 8041 * Goal of the following test is: 8042 * - minimize calls to the SAX 'character' callback 8043 * when they are mergeable 8044 * - handle an problem for isBlank when we only parse 8045 * a sequence of blank chars and the next one is 8046 * not available to check against '<' presence. 8047 * - tries to homogenize the differences in SAX 8048 * callbacks beween the push and pull versions 8049 * of the parser. 8050 */ 8051 if ((ctxt->inputNr == 1) && 8052 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8053 if ((!terminate) && 8054 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8055 goto done; 8056 } 8057 ctxt->checkIndex = 0; 8058#ifdef DEBUG_PUSH 8059 xmlGenericError(xmlGenericErrorContext, 8060 "PP: Parsing char data\n"); 8061#endif 8062 xmlParseCharData(ctxt, 0); 8063 } 8064 /* 8065 * Pop-up of finished entities. 8066 */ 8067 while ((RAW == 0) && (ctxt->inputNr > 1)) 8068 xmlPopInput(ctxt); 8069 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 8070 (tok == ctxt->token)) { 8071 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8073 ctxt->sax->error(ctxt->userData, 8074 "detected an error in element content\n"); 8075 ctxt->wellFormed = 0; 8076 ctxt->disableSAX = 1; 8077 ctxt->instate = XML_PARSER_EOF; 8078 break; 8079 } 8080 break; 8081 } 8082 case XML_PARSER_CDATA_SECTION: { 8083 /* 8084 * The Push mode need to have the SAX callback for 8085 * cdataBlock merge back contiguous callbacks. 8086 */ 8087 int base; 8088 8089 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8090 if (base < 0) { 8091 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8092 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8093 if (ctxt->sax->cdataBlock != NULL) 8094 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8095 XML_PARSER_BIG_BUFFER_SIZE); 8096 } 8097 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8098 ctxt->checkIndex = 0; 8099 } 8100 goto done; 8101 } else { 8102 if ((ctxt->sax != NULL) && (base > 0) && 8103 (!ctxt->disableSAX)) { 8104 if (ctxt->sax->cdataBlock != NULL) 8105 ctxt->sax->cdataBlock(ctxt->userData, 8106 ctxt->input->cur, base); 8107 } 8108 SKIP(base + 3); 8109 ctxt->checkIndex = 0; 8110 ctxt->instate = XML_PARSER_CONTENT; 8111#ifdef DEBUG_PUSH 8112 xmlGenericError(xmlGenericErrorContext, 8113 "PP: entering CONTENT\n"); 8114#endif 8115 } 8116 break; 8117 } 8118 case XML_PARSER_END_TAG: 8119 if (avail < 2) 8120 goto done; 8121 if ((!terminate) && 8122 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8123 goto done; 8124 xmlParseEndTag(ctxt); 8125 if (ctxt->name == NULL) { 8126 ctxt->instate = XML_PARSER_EPILOG; 8127#ifdef DEBUG_PUSH 8128 xmlGenericError(xmlGenericErrorContext, 8129 "PP: entering EPILOG\n"); 8130#endif 8131 } else { 8132 ctxt->instate = XML_PARSER_CONTENT; 8133#ifdef DEBUG_PUSH 8134 xmlGenericError(xmlGenericErrorContext, 8135 "PP: entering CONTENT\n"); 8136#endif 8137 } 8138 break; 8139 case XML_PARSER_DTD: { 8140 /* 8141 * Sorry but progressive parsing of the internal subset 8142 * is not expected to be supported. We first check that 8143 * the full content of the internal subset is available and 8144 * the parsing is launched only at that point. 8145 * Internal subset ends up with "']' S? '>'" in an unescaped 8146 * section and not in a ']]>' sequence which are conditional 8147 * sections (whoever argued to keep that crap in XML deserve 8148 * a place in hell !). 8149 */ 8150 int base, i; 8151 xmlChar *buf; 8152 xmlChar quote = 0; 8153 8154 base = ctxt->input->cur - ctxt->input->base; 8155 if (base < 0) return(0); 8156 if (ctxt->checkIndex > base) 8157 base = ctxt->checkIndex; 8158 buf = ctxt->input->buf->buffer->content; 8159 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8160 base++) { 8161 if (quote != 0) { 8162 if (buf[base] == quote) 8163 quote = 0; 8164 continue; 8165 } 8166 if (buf[base] == '"') { 8167 quote = '"'; 8168 continue; 8169 } 8170 if (buf[base] == '\'') { 8171 quote = '\''; 8172 continue; 8173 } 8174 if (buf[base] == ']') { 8175 if ((unsigned int) base +1 >= 8176 ctxt->input->buf->buffer->use) 8177 break; 8178 if (buf[base + 1] == ']') { 8179 /* conditional crap, skip both ']' ! */ 8180 base++; 8181 continue; 8182 } 8183 for (i = 0; 8184 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8185 i++) { 8186 if (buf[base + i] == '>') 8187 goto found_end_int_subset; 8188 } 8189 break; 8190 } 8191 } 8192 /* 8193 * We didn't found the end of the Internal subset 8194 */ 8195 if (quote == 0) 8196 ctxt->checkIndex = base; 8197#ifdef DEBUG_PUSH 8198 if (next == 0) 8199 xmlGenericError(xmlGenericErrorContext, 8200 "PP: lookup of int subset end filed\n"); 8201#endif 8202 goto done; 8203 8204found_end_int_subset: 8205 xmlParseInternalSubset(ctxt); 8206 ctxt->inSubset = 2; 8207 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8208 (ctxt->sax->externalSubset != NULL)) 8209 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8210 ctxt->extSubSystem, ctxt->extSubURI); 8211 ctxt->inSubset = 0; 8212 ctxt->instate = XML_PARSER_PROLOG; 8213 ctxt->checkIndex = 0; 8214#ifdef DEBUG_PUSH 8215 xmlGenericError(xmlGenericErrorContext, 8216 "PP: entering PROLOG\n"); 8217#endif 8218 break; 8219 } 8220 case XML_PARSER_COMMENT: 8221 xmlGenericError(xmlGenericErrorContext, 8222 "PP: internal error, state == COMMENT\n"); 8223 ctxt->instate = XML_PARSER_CONTENT; 8224#ifdef DEBUG_PUSH 8225 xmlGenericError(xmlGenericErrorContext, 8226 "PP: entering CONTENT\n"); 8227#endif 8228 break; 8229 case XML_PARSER_PI: 8230 xmlGenericError(xmlGenericErrorContext, 8231 "PP: internal error, state == PI\n"); 8232 ctxt->instate = XML_PARSER_CONTENT; 8233#ifdef DEBUG_PUSH 8234 xmlGenericError(xmlGenericErrorContext, 8235 "PP: entering CONTENT\n"); 8236#endif 8237 break; 8238 case XML_PARSER_ENTITY_DECL: 8239 xmlGenericError(xmlGenericErrorContext, 8240 "PP: internal error, state == ENTITY_DECL\n"); 8241 ctxt->instate = XML_PARSER_DTD; 8242#ifdef DEBUG_PUSH 8243 xmlGenericError(xmlGenericErrorContext, 8244 "PP: entering DTD\n"); 8245#endif 8246 break; 8247 case XML_PARSER_ENTITY_VALUE: 8248 xmlGenericError(xmlGenericErrorContext, 8249 "PP: internal error, state == ENTITY_VALUE\n"); 8250 ctxt->instate = XML_PARSER_CONTENT; 8251#ifdef DEBUG_PUSH 8252 xmlGenericError(xmlGenericErrorContext, 8253 "PP: entering DTD\n"); 8254#endif 8255 break; 8256 case XML_PARSER_ATTRIBUTE_VALUE: 8257 xmlGenericError(xmlGenericErrorContext, 8258 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8259 ctxt->instate = XML_PARSER_START_TAG; 8260#ifdef DEBUG_PUSH 8261 xmlGenericError(xmlGenericErrorContext, 8262 "PP: entering START_TAG\n"); 8263#endif 8264 break; 8265 case XML_PARSER_SYSTEM_LITERAL: 8266 xmlGenericError(xmlGenericErrorContext, 8267 "PP: internal error, state == SYSTEM_LITERAL\n"); 8268 ctxt->instate = XML_PARSER_START_TAG; 8269#ifdef DEBUG_PUSH 8270 xmlGenericError(xmlGenericErrorContext, 8271 "PP: entering START_TAG\n"); 8272#endif 8273 break; 8274 } 8275 } 8276done: 8277#ifdef DEBUG_PUSH 8278 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8279#endif 8280 return(ret); 8281} 8282 8283/** 8284 * xmlParseChunk: 8285 * @ctxt: an XML parser context 8286 * @chunk: an char array 8287 * @size: the size in byte of the chunk 8288 * @terminate: last chunk indicator 8289 * 8290 * Parse a Chunk of memory 8291 * 8292 * Returns zero if no error, the xmlParserErrors otherwise. 8293 */ 8294int 8295xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8296 int terminate) { 8297 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8298 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8299 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8300 int cur = ctxt->input->cur - ctxt->input->base; 8301 8302 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8303 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8304 ctxt->input->cur = ctxt->input->base + cur; 8305 ctxt->input->end = 8306 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8307#ifdef DEBUG_PUSH 8308 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8309#endif 8310 8311 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8312 xmlParseTryOrFinish(ctxt, terminate); 8313 } else if (ctxt->instate != XML_PARSER_EOF) { 8314 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8315 xmlParserInputBufferPtr in = ctxt->input->buf; 8316 if ((in->encoder != NULL) && (in->buffer != NULL) && 8317 (in->raw != NULL)) { 8318 int nbchars; 8319 8320 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8321 if (nbchars < 0) { 8322 xmlGenericError(xmlGenericErrorContext, 8323 "xmlParseChunk: encoder error\n"); 8324 return(XML_ERR_INVALID_ENCODING); 8325 } 8326 } 8327 } 8328 } 8329 xmlParseTryOrFinish(ctxt, terminate); 8330 if (terminate) { 8331 /* 8332 * Check for termination 8333 */ 8334 if ((ctxt->instate != XML_PARSER_EOF) && 8335 (ctxt->instate != XML_PARSER_EPILOG)) { 8336 ctxt->errNo = XML_ERR_DOCUMENT_END; 8337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8338 ctxt->sax->error(ctxt->userData, 8339 "Extra content at the end of the document\n"); 8340 ctxt->wellFormed = 0; 8341 ctxt->disableSAX = 1; 8342 } 8343 if (ctxt->instate != XML_PARSER_EOF) { 8344 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8345 (!ctxt->disableSAX)) 8346 ctxt->sax->endDocument(ctxt->userData); 8347 } 8348 ctxt->instate = XML_PARSER_EOF; 8349 } 8350 return((xmlParserErrors) ctxt->errNo); 8351} 8352 8353/************************************************************************ 8354 * * 8355 * I/O front end functions to the parser * 8356 * * 8357 ************************************************************************/ 8358 8359/** 8360 * xmlStopParser: 8361 * @ctxt: an XML parser context 8362 * 8363 * Blocks further parser processing 8364 */ 8365void 8366xmlStopParser(xmlParserCtxtPtr ctxt) { 8367 ctxt->instate = XML_PARSER_EOF; 8368 if (ctxt->input != NULL) 8369 ctxt->input->cur = BAD_CAST""; 8370} 8371 8372/** 8373 * xmlCreatePushParserCtxt: 8374 * @sax: a SAX handler 8375 * @user_data: The user data returned on SAX callbacks 8376 * @chunk: a pointer to an array of chars 8377 * @size: number of chars in the array 8378 * @filename: an optional file name or URI 8379 * 8380 * Create a parser context for using the XML parser in push mode 8381 * To allow content encoding detection, @size should be >= 4 8382 * The value of @filename is used for fetching external entities 8383 * and error/warning reports. 8384 * 8385 * Returns the new parser context or NULL 8386 */ 8387xmlParserCtxtPtr 8388xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8389 const char *chunk, int size, const char *filename) { 8390 xmlParserCtxtPtr ctxt; 8391 xmlParserInputPtr inputStream; 8392 xmlParserInputBufferPtr buf; 8393 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8394 8395 /* 8396 * plug some encoding conversion routines 8397 */ 8398 if ((chunk != NULL) && (size >= 4)) 8399 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8400 8401 buf = xmlAllocParserInputBuffer(enc); 8402 if (buf == NULL) return(NULL); 8403 8404 ctxt = xmlNewParserCtxt(); 8405 if (ctxt == NULL) { 8406 xmlFree(buf); 8407 return(NULL); 8408 } 8409 if (sax != NULL) { 8410 if (ctxt->sax != &xmlDefaultSAXHandler) 8411 xmlFree(ctxt->sax); 8412 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8413 if (ctxt->sax == NULL) { 8414 xmlFree(buf); 8415 xmlFree(ctxt); 8416 return(NULL); 8417 } 8418 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8419 if (user_data != NULL) 8420 ctxt->userData = user_data; 8421 } 8422 if (filename == NULL) { 8423 ctxt->directory = NULL; 8424 } else { 8425 ctxt->directory = xmlParserGetDirectory(filename); 8426 } 8427 8428 inputStream = xmlNewInputStream(ctxt); 8429 if (inputStream == NULL) { 8430 xmlFreeParserCtxt(ctxt); 8431 return(NULL); 8432 } 8433 8434 if (filename == NULL) 8435 inputStream->filename = NULL; 8436 else 8437 inputStream->filename = xmlMemStrdup(filename); 8438 inputStream->buf = buf; 8439 inputStream->base = inputStream->buf->buffer->content; 8440 inputStream->cur = inputStream->buf->buffer->content; 8441 inputStream->end = 8442 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 8443 if (enc != XML_CHAR_ENCODING_NONE) { 8444 xmlSwitchEncoding(ctxt, enc); 8445 } 8446 8447 inputPush(ctxt, inputStream); 8448 8449 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8450 (ctxt->input->buf != NULL)) { 8451 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8452#ifdef DEBUG_PUSH 8453 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8454#endif 8455 } 8456 8457 return(ctxt); 8458} 8459 8460/** 8461 * xmlCreateIOParserCtxt: 8462 * @sax: a SAX handler 8463 * @user_data: The user data returned on SAX callbacks 8464 * @ioread: an I/O read function 8465 * @ioclose: an I/O close function 8466 * @ioctx: an I/O handler 8467 * @enc: the charset encoding if known 8468 * 8469 * Create a parser context for using the XML parser with an existing 8470 * I/O stream 8471 * 8472 * Returns the new parser context or NULL 8473 */ 8474xmlParserCtxtPtr 8475xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8476 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8477 void *ioctx, xmlCharEncoding enc) { 8478 xmlParserCtxtPtr ctxt; 8479 xmlParserInputPtr inputStream; 8480 xmlParserInputBufferPtr buf; 8481 8482 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8483 if (buf == NULL) return(NULL); 8484 8485 ctxt = xmlNewParserCtxt(); 8486 if (ctxt == NULL) { 8487 xmlFree(buf); 8488 return(NULL); 8489 } 8490 if (sax != NULL) { 8491 if (ctxt->sax != &xmlDefaultSAXHandler) 8492 xmlFree(ctxt->sax); 8493 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8494 if (ctxt->sax == NULL) { 8495 xmlFree(buf); 8496 xmlFree(ctxt); 8497 return(NULL); 8498 } 8499 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8500 if (user_data != NULL) 8501 ctxt->userData = user_data; 8502 } 8503 8504 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8505 if (inputStream == NULL) { 8506 xmlFreeParserCtxt(ctxt); 8507 return(NULL); 8508 } 8509 inputPush(ctxt, inputStream); 8510 8511 return(ctxt); 8512} 8513 8514/************************************************************************ 8515 * * 8516 * Front ends when parsing a Dtd * 8517 * * 8518 ************************************************************************/ 8519 8520/** 8521 * xmlIOParseDTD: 8522 * @sax: the SAX handler block or NULL 8523 * @input: an Input Buffer 8524 * @enc: the charset encoding if known 8525 * 8526 * Load and parse a DTD 8527 * 8528 * Returns the resulting xmlDtdPtr or NULL in case of error. 8529 * @input will be freed at parsing end. 8530 */ 8531 8532xmlDtdPtr 8533xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 8534 xmlCharEncoding enc) { 8535 xmlDtdPtr ret = NULL; 8536 xmlParserCtxtPtr ctxt; 8537 xmlParserInputPtr pinput = NULL; 8538 8539 if (input == NULL) 8540 return(NULL); 8541 8542 ctxt = xmlNewParserCtxt(); 8543 if (ctxt == NULL) { 8544 return(NULL); 8545 } 8546 8547 /* 8548 * Set-up the SAX context 8549 */ 8550 if (sax != NULL) { 8551 if (ctxt->sax != NULL) 8552 xmlFree(ctxt->sax); 8553 ctxt->sax = sax; 8554 ctxt->userData = NULL; 8555 } 8556 8557 /* 8558 * generate a parser input from the I/O handler 8559 */ 8560 8561 pinput = xmlNewIOInputStream(ctxt, input, enc); 8562 if (pinput == NULL) { 8563 if (sax != NULL) ctxt->sax = NULL; 8564 xmlFreeParserCtxt(ctxt); 8565 return(NULL); 8566 } 8567 8568 /* 8569 * plug some encoding conversion routines here. 8570 */ 8571 xmlPushInput(ctxt, pinput); 8572 8573 pinput->filename = NULL; 8574 pinput->line = 1; 8575 pinput->col = 1; 8576 pinput->base = ctxt->input->cur; 8577 pinput->cur = ctxt->input->cur; 8578 pinput->free = NULL; 8579 8580 /* 8581 * let's parse that entity knowing it's an external subset. 8582 */ 8583 ctxt->inSubset = 2; 8584 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8585 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8586 BAD_CAST "none", BAD_CAST "none"); 8587 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 8588 8589 if (ctxt->myDoc != NULL) { 8590 if (ctxt->wellFormed) { 8591 ret = ctxt->myDoc->extSubset; 8592 ctxt->myDoc->extSubset = NULL; 8593 } else { 8594 ret = NULL; 8595 } 8596 xmlFreeDoc(ctxt->myDoc); 8597 ctxt->myDoc = NULL; 8598 } 8599 if (sax != NULL) ctxt->sax = NULL; 8600 xmlFreeParserCtxt(ctxt); 8601 8602 return(ret); 8603} 8604 8605/** 8606 * xmlSAXParseDTD: 8607 * @sax: the SAX handler block 8608 * @ExternalID: a NAME* containing the External ID of the DTD 8609 * @SystemID: a NAME* containing the URL to the DTD 8610 * 8611 * Load and parse an external subset. 8612 * 8613 * Returns the resulting xmlDtdPtr or NULL in case of error. 8614 */ 8615 8616xmlDtdPtr 8617xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 8618 const xmlChar *SystemID) { 8619 xmlDtdPtr ret = NULL; 8620 xmlParserCtxtPtr ctxt; 8621 xmlParserInputPtr input = NULL; 8622 xmlCharEncoding enc; 8623 8624 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 8625 8626 ctxt = xmlNewParserCtxt(); 8627 if (ctxt == NULL) { 8628 return(NULL); 8629 } 8630 8631 /* 8632 * Set-up the SAX context 8633 */ 8634 if (sax != NULL) { 8635 if (ctxt->sax != NULL) 8636 xmlFree(ctxt->sax); 8637 ctxt->sax = sax; 8638 ctxt->userData = NULL; 8639 } 8640 8641 /* 8642 * Ask the Entity resolver to load the damn thing 8643 */ 8644 8645 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8646 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8647 if (input == NULL) { 8648 if (sax != NULL) ctxt->sax = NULL; 8649 xmlFreeParserCtxt(ctxt); 8650 return(NULL); 8651 } 8652 8653 /* 8654 * plug some encoding conversion routines here. 8655 */ 8656 xmlPushInput(ctxt, input); 8657 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 8658 xmlSwitchEncoding(ctxt, enc); 8659 8660 if (input->filename == NULL) 8661 input->filename = (char *) xmlStrdup(SystemID); 8662 input->line = 1; 8663 input->col = 1; 8664 input->base = ctxt->input->cur; 8665 input->cur = ctxt->input->cur; 8666 input->free = NULL; 8667 8668 /* 8669 * let's parse that entity knowing it's an external subset. 8670 */ 8671 ctxt->inSubset = 2; 8672 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8673 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8674 ExternalID, SystemID); 8675 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8676 8677 if (ctxt->myDoc != NULL) { 8678 if (ctxt->wellFormed) { 8679 ret = ctxt->myDoc->extSubset; 8680 ctxt->myDoc->extSubset = NULL; 8681 } else { 8682 ret = NULL; 8683 } 8684 xmlFreeDoc(ctxt->myDoc); 8685 ctxt->myDoc = NULL; 8686 } 8687 if (sax != NULL) ctxt->sax = NULL; 8688 xmlFreeParserCtxt(ctxt); 8689 8690 return(ret); 8691} 8692 8693/** 8694 * xmlParseDTD: 8695 * @ExternalID: a NAME* containing the External ID of the DTD 8696 * @SystemID: a NAME* containing the URL to the DTD 8697 * 8698 * Load and parse an external subset. 8699 * 8700 * Returns the resulting xmlDtdPtr or NULL in case of error. 8701 */ 8702 8703xmlDtdPtr 8704xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 8705 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 8706} 8707 8708/************************************************************************ 8709 * * 8710 * Front ends when parsing an Entity * 8711 * * 8712 ************************************************************************/ 8713 8714/** 8715 * xmlParseCtxtExternalEntity: 8716 * @ctx: the existing parsing context 8717 * @URL: the URL for the entity to load 8718 * @ID: the System ID for the entity to load 8719 * @list: the return value for the set of parsed nodes 8720 * 8721 * Parse an external general entity within an existing parsing context 8722 * An external general parsed entity is well-formed if it matches the 8723 * production labeled extParsedEnt. 8724 * 8725 * [78] extParsedEnt ::= TextDecl? content 8726 * 8727 * Returns 0 if the entity is well formed, -1 in case of args problem and 8728 * the parser error code otherwise 8729 */ 8730 8731int 8732xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 8733 const xmlChar *ID, xmlNodePtr *list) { 8734 xmlParserCtxtPtr ctxt; 8735 xmlDocPtr newDoc; 8736 xmlSAXHandlerPtr oldsax = NULL; 8737 int ret = 0; 8738 8739 if (ctx->depth > 40) { 8740 return(XML_ERR_ENTITY_LOOP); 8741 } 8742 8743 if (list != NULL) 8744 *list = NULL; 8745 if ((URL == NULL) && (ID == NULL)) 8746 return(-1); 8747 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 8748 return(-1); 8749 8750 8751 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8752 if (ctxt == NULL) return(-1); 8753 ctxt->userData = ctxt; 8754 oldsax = ctxt->sax; 8755 ctxt->sax = ctx->sax; 8756 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8757 if (newDoc == NULL) { 8758 xmlFreeParserCtxt(ctxt); 8759 return(-1); 8760 } 8761 if (ctx->myDoc != NULL) { 8762 newDoc->intSubset = ctx->myDoc->intSubset; 8763 newDoc->extSubset = ctx->myDoc->extSubset; 8764 } 8765 if (ctx->myDoc->URL != NULL) { 8766 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 8767 } 8768 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8769 if (newDoc->children == NULL) { 8770 ctxt->sax = oldsax; 8771 xmlFreeParserCtxt(ctxt); 8772 newDoc->intSubset = NULL; 8773 newDoc->extSubset = NULL; 8774 xmlFreeDoc(newDoc); 8775 return(-1); 8776 } 8777 nodePush(ctxt, newDoc->children); 8778 if (ctx->myDoc == NULL) { 8779 ctxt->myDoc = newDoc; 8780 } else { 8781 ctxt->myDoc = ctx->myDoc; 8782 newDoc->children->doc = ctx->myDoc; 8783 } 8784 8785 /* 8786 * Parse a possible text declaration first 8787 */ 8788 GROW; 8789 if ((RAW == '<') && (NXT(1) == '?') && 8790 (NXT(2) == 'x') && (NXT(3) == 'm') && 8791 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8792 xmlParseTextDecl(ctxt); 8793 } 8794 8795 /* 8796 * Doing validity checking on chunk doesn't make sense 8797 */ 8798 ctxt->instate = XML_PARSER_CONTENT; 8799 ctxt->validate = ctx->validate; 8800 ctxt->loadsubset = ctx->loadsubset; 8801 ctxt->depth = ctx->depth + 1; 8802 ctxt->replaceEntities = ctx->replaceEntities; 8803 if (ctxt->validate) { 8804 ctxt->vctxt.error = ctx->vctxt.error; 8805 ctxt->vctxt.warning = ctx->vctxt.warning; 8806 /* Allocate the Node stack */ 8807 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); 8808 if (ctxt->vctxt.nodeTab == NULL) { 8809 xmlGenericError(xmlGenericErrorContext, 8810 "xmlParseCtxtExternalEntity: out of memory\n"); 8811 ctxt->validate = 0; 8812 ctxt->vctxt.error = NULL; 8813 ctxt->vctxt.warning = NULL; 8814 } else { 8815 ctxt->vctxt.nodeNr = 0; 8816 ctxt->vctxt.nodeMax = 4; 8817 ctxt->vctxt.node = NULL; 8818 } 8819 } else { 8820 ctxt->vctxt.error = NULL; 8821 ctxt->vctxt.warning = NULL; 8822 } 8823 8824 xmlParseContent(ctxt); 8825 8826 if ((RAW == '<') && (NXT(1) == '/')) { 8827 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8829 ctxt->sax->error(ctxt->userData, 8830 "chunk is not well balanced\n"); 8831 ctxt->wellFormed = 0; 8832 ctxt->disableSAX = 1; 8833 } else if (RAW != 0) { 8834 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8836 ctxt->sax->error(ctxt->userData, 8837 "extra content at the end of well balanced chunk\n"); 8838 ctxt->wellFormed = 0; 8839 ctxt->disableSAX = 1; 8840 } 8841 if (ctxt->node != newDoc->children) { 8842 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8844 ctxt->sax->error(ctxt->userData, 8845 "chunk is not well balanced\n"); 8846 ctxt->wellFormed = 0; 8847 ctxt->disableSAX = 1; 8848 } 8849 8850 if (!ctxt->wellFormed) { 8851 if (ctxt->errNo == 0) 8852 ret = 1; 8853 else 8854 ret = ctxt->errNo; 8855 } else { 8856 if (list != NULL) { 8857 xmlNodePtr cur; 8858 8859 /* 8860 * Return the newly created nodeset after unlinking it from 8861 * they pseudo parent. 8862 */ 8863 cur = newDoc->children->children; 8864 *list = cur; 8865 while (cur != NULL) { 8866 cur->parent = NULL; 8867 cur = cur->next; 8868 } 8869 newDoc->children->children = NULL; 8870 } 8871 ret = 0; 8872 } 8873 ctxt->sax = oldsax; 8874 xmlFreeParserCtxt(ctxt); 8875 newDoc->intSubset = NULL; 8876 newDoc->extSubset = NULL; 8877 xmlFreeDoc(newDoc); 8878 8879 return(ret); 8880} 8881 8882/** 8883 * xmlParseExternalEntity: 8884 * @doc: the document the chunk pertains to 8885 * @sax: the SAX handler bloc (possibly NULL) 8886 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8887 * @depth: Used for loop detection, use 0 8888 * @URL: the URL for the entity to load 8889 * @ID: the System ID for the entity to load 8890 * @list: the return value for the set of parsed nodes 8891 * 8892 * Parse an external general entity 8893 * An external general parsed entity is well-formed if it matches the 8894 * production labeled extParsedEnt. 8895 * 8896 * [78] extParsedEnt ::= TextDecl? content 8897 * 8898 * Returns 0 if the entity is well formed, -1 in case of args problem and 8899 * the parser error code otherwise 8900 */ 8901 8902int 8903xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 8904 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) { 8905 xmlParserCtxtPtr ctxt; 8906 xmlDocPtr newDoc; 8907 xmlSAXHandlerPtr oldsax = NULL; 8908 int ret = 0; 8909 8910 if (depth > 40) { 8911 return(XML_ERR_ENTITY_LOOP); 8912 } 8913 8914 8915 8916 if (list != NULL) 8917 *list = NULL; 8918 if ((URL == NULL) && (ID == NULL)) 8919 return(-1); 8920 if (doc == NULL) /* @@ relax but check for dereferences */ 8921 return(-1); 8922 8923 8924 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8925 if (ctxt == NULL) return(-1); 8926 ctxt->userData = ctxt; 8927 if (sax != NULL) { 8928 oldsax = ctxt->sax; 8929 ctxt->sax = sax; 8930 if (user_data != NULL) 8931 ctxt->userData = user_data; 8932 } 8933 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8934 if (newDoc == NULL) { 8935 xmlFreeParserCtxt(ctxt); 8936 return(-1); 8937 } 8938 if (doc != NULL) { 8939 newDoc->intSubset = doc->intSubset; 8940 newDoc->extSubset = doc->extSubset; 8941 } 8942 if (doc->URL != NULL) { 8943 newDoc->URL = xmlStrdup(doc->URL); 8944 } 8945 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8946 if (newDoc->children == NULL) { 8947 if (sax != NULL) 8948 ctxt->sax = oldsax; 8949 xmlFreeParserCtxt(ctxt); 8950 newDoc->intSubset = NULL; 8951 newDoc->extSubset = NULL; 8952 xmlFreeDoc(newDoc); 8953 return(-1); 8954 } 8955 nodePush(ctxt, newDoc->children); 8956 if (doc == NULL) { 8957 ctxt->myDoc = newDoc; 8958 } else { 8959 ctxt->myDoc = doc; 8960 newDoc->children->doc = doc; 8961 } 8962 8963 /* 8964 * Parse a possible text declaration first 8965 */ 8966 GROW; 8967 if ((RAW == '<') && (NXT(1) == '?') && 8968 (NXT(2) == 'x') && (NXT(3) == 'm') && 8969 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8970 xmlParseTextDecl(ctxt); 8971 } 8972 8973 /* 8974 * Doing validity checking on chunk doesn't make sense 8975 */ 8976 ctxt->instate = XML_PARSER_CONTENT; 8977 ctxt->validate = 0; 8978 ctxt->external = 2; 8979 ctxt->loadsubset = 0; 8980 ctxt->depth = depth; 8981 8982 xmlParseContent(ctxt); 8983 8984 if ((RAW == '<') && (NXT(1) == '/')) { 8985 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8987 ctxt->sax->error(ctxt->userData, 8988 "chunk is not well balanced\n"); 8989 ctxt->wellFormed = 0; 8990 ctxt->disableSAX = 1; 8991 } else if (RAW != 0) { 8992 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8994 ctxt->sax->error(ctxt->userData, 8995 "extra content at the end of well balanced chunk\n"); 8996 ctxt->wellFormed = 0; 8997 ctxt->disableSAX = 1; 8998 } 8999 if (ctxt->node != newDoc->children) { 9000 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9002 ctxt->sax->error(ctxt->userData, 9003 "chunk is not well balanced\n"); 9004 ctxt->wellFormed = 0; 9005 ctxt->disableSAX = 1; 9006 } 9007 9008 if (!ctxt->wellFormed) { 9009 if (ctxt->errNo == 0) 9010 ret = 1; 9011 else 9012 ret = ctxt->errNo; 9013 } else { 9014 if (list != NULL) { 9015 xmlNodePtr cur; 9016 9017 /* 9018 * Return the newly created nodeset after unlinking it from 9019 * they pseudo parent. 9020 */ 9021 cur = newDoc->children->children; 9022 *list = cur; 9023 while (cur != NULL) { 9024 cur->parent = NULL; 9025 cur = cur->next; 9026 } 9027 newDoc->children->children = NULL; 9028 } 9029 ret = 0; 9030 } 9031 if (sax != NULL) 9032 ctxt->sax = oldsax; 9033 xmlFreeParserCtxt(ctxt); 9034 newDoc->intSubset = NULL; 9035 newDoc->extSubset = NULL; 9036 xmlFreeDoc(newDoc); 9037 9038 return(ret); 9039} 9040 9041/** 9042 * xmlParseBalancedChunkMemory: 9043 * @doc: the document the chunk pertains to 9044 * @sax: the SAX handler bloc (possibly NULL) 9045 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9046 * @depth: Used for loop detection, use 0 9047 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9048 * @list: the return value for the set of parsed nodes 9049 * 9050 * Parse a well-balanced chunk of an XML document 9051 * called by the parser 9052 * The allowed sequence for the Well Balanced Chunk is the one defined by 9053 * the content production in the XML grammar: 9054 * 9055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9056 * 9057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9058 * the parser error code otherwise 9059 */ 9060 9061int 9062xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9063 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) { 9064 xmlParserCtxtPtr ctxt; 9065 xmlDocPtr newDoc; 9066 xmlSAXHandlerPtr oldsax = NULL; 9067 int size; 9068 int ret = 0; 9069 9070 if (depth > 40) { 9071 return(XML_ERR_ENTITY_LOOP); 9072 } 9073 9074 9075 if (list != NULL) 9076 *list = NULL; 9077 if (string == NULL) 9078 return(-1); 9079 9080 size = xmlStrlen(string); 9081 9082 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9083 if (ctxt == NULL) return(-1); 9084 ctxt->userData = ctxt; 9085 if (sax != NULL) { 9086 oldsax = ctxt->sax; 9087 ctxt->sax = sax; 9088 if (user_data != NULL) 9089 ctxt->userData = user_data; 9090 } 9091 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9092 if (newDoc == NULL) { 9093 xmlFreeParserCtxt(ctxt); 9094 return(-1); 9095 } 9096 if (doc != NULL) { 9097 newDoc->intSubset = doc->intSubset; 9098 newDoc->extSubset = doc->extSubset; 9099 } 9100 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9101 if (newDoc->children == NULL) { 9102 if (sax != NULL) 9103 ctxt->sax = oldsax; 9104 xmlFreeParserCtxt(ctxt); 9105 newDoc->intSubset = NULL; 9106 newDoc->extSubset = NULL; 9107 xmlFreeDoc(newDoc); 9108 return(-1); 9109 } 9110 nodePush(ctxt, newDoc->children); 9111 if (doc == NULL) { 9112 ctxt->myDoc = newDoc; 9113 } else { 9114 ctxt->myDoc = doc; 9115 newDoc->children->doc = doc; 9116 } 9117 ctxt->instate = XML_PARSER_CONTENT; 9118 ctxt->depth = depth; 9119 9120 /* 9121 * Doing validity checking on chunk doesn't make sense 9122 */ 9123 ctxt->validate = 0; 9124 ctxt->loadsubset = 0; 9125 9126 xmlParseContent(ctxt); 9127 9128 if ((RAW == '<') && (NXT(1) == '/')) { 9129 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9131 ctxt->sax->error(ctxt->userData, 9132 "chunk is not well balanced\n"); 9133 ctxt->wellFormed = 0; 9134 ctxt->disableSAX = 1; 9135 } else if (RAW != 0) { 9136 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9138 ctxt->sax->error(ctxt->userData, 9139 "extra content at the end of well balanced chunk\n"); 9140 ctxt->wellFormed = 0; 9141 ctxt->disableSAX = 1; 9142 } 9143 if (ctxt->node != newDoc->children) { 9144 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9146 ctxt->sax->error(ctxt->userData, 9147 "chunk is not well balanced\n"); 9148 ctxt->wellFormed = 0; 9149 ctxt->disableSAX = 1; 9150 } 9151 9152 if (!ctxt->wellFormed) { 9153 if (ctxt->errNo == 0) 9154 ret = 1; 9155 else 9156 ret = ctxt->errNo; 9157 } else { 9158 if (list != NULL) { 9159 xmlNodePtr cur; 9160 9161 /* 9162 * Return the newly created nodeset after unlinking it from 9163 * they pseudo parent. 9164 */ 9165 cur = newDoc->children->children; 9166 *list = cur; 9167 while (cur != NULL) { 9168 cur->parent = NULL; 9169 cur = cur->next; 9170 } 9171 newDoc->children->children = NULL; 9172 } 9173 ret = 0; 9174 } 9175 if (sax != NULL) 9176 ctxt->sax = oldsax; 9177 xmlFreeParserCtxt(ctxt); 9178 newDoc->intSubset = NULL; 9179 newDoc->extSubset = NULL; 9180 xmlFreeDoc(newDoc); 9181 9182 return(ret); 9183} 9184 9185/** 9186 * xmlSAXParseEntity: 9187 * @sax: the SAX handler block 9188 * @filename: the filename 9189 * 9190 * parse an XML external entity out of context and build a tree. 9191 * It use the given SAX function block to handle the parsing callback. 9192 * If sax is NULL, fallback to the default DOM tree building routines. 9193 * 9194 * [78] extParsedEnt ::= TextDecl? content 9195 * 9196 * This correspond to a "Well Balanced" chunk 9197 * 9198 * Returns the resulting document tree 9199 */ 9200 9201xmlDocPtr 9202xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9203 xmlDocPtr ret; 9204 xmlParserCtxtPtr ctxt; 9205 char *directory = NULL; 9206 9207 ctxt = xmlCreateFileParserCtxt(filename); 9208 if (ctxt == NULL) { 9209 return(NULL); 9210 } 9211 if (sax != NULL) { 9212 if (ctxt->sax != NULL) 9213 xmlFree(ctxt->sax); 9214 ctxt->sax = sax; 9215 ctxt->userData = NULL; 9216 } 9217 9218 if ((ctxt->directory == NULL) && (directory == NULL)) 9219 directory = xmlParserGetDirectory(filename); 9220 9221 xmlParseExtParsedEnt(ctxt); 9222 9223 if (ctxt->wellFormed) 9224 ret = ctxt->myDoc; 9225 else { 9226 ret = NULL; 9227 xmlFreeDoc(ctxt->myDoc); 9228 ctxt->myDoc = NULL; 9229 } 9230 if (sax != NULL) 9231 ctxt->sax = NULL; 9232 xmlFreeParserCtxt(ctxt); 9233 9234 return(ret); 9235} 9236 9237/** 9238 * xmlParseEntity: 9239 * @filename: the filename 9240 * 9241 * parse an XML external entity out of context and build a tree. 9242 * 9243 * [78] extParsedEnt ::= TextDecl? content 9244 * 9245 * This correspond to a "Well Balanced" chunk 9246 * 9247 * Returns the resulting document tree 9248 */ 9249 9250xmlDocPtr 9251xmlParseEntity(const char *filename) { 9252 return(xmlSAXParseEntity(NULL, filename)); 9253} 9254 9255/** 9256 * xmlCreateEntityParserCtxt: 9257 * @URL: the entity URL 9258 * @ID: the entity PUBLIC ID 9259 * @base: a posible base for the target URI 9260 * 9261 * Create a parser context for an external entity 9262 * Automatic support for ZLIB/Compress compressed document is provided 9263 * by default if found at compile-time. 9264 * 9265 * Returns the new parser context or NULL 9266 */ 9267xmlParserCtxtPtr 9268xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9269 const xmlChar *base) { 9270 xmlParserCtxtPtr ctxt; 9271 xmlParserInputPtr inputStream; 9272 char *directory = NULL; 9273 xmlChar *uri; 9274 9275 ctxt = xmlNewParserCtxt(); 9276 if (ctxt == NULL) { 9277 return(NULL); 9278 } 9279 9280 uri = xmlBuildURI(URL, base); 9281 9282 if (uri == NULL) { 9283 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9284 if (inputStream == NULL) { 9285 xmlFreeParserCtxt(ctxt); 9286 return(NULL); 9287 } 9288 9289 inputPush(ctxt, inputStream); 9290 9291 if ((ctxt->directory == NULL) && (directory == NULL)) 9292 directory = xmlParserGetDirectory((char *)URL); 9293 if ((ctxt->directory == NULL) && (directory != NULL)) 9294 ctxt->directory = directory; 9295 } else { 9296 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9297 if (inputStream == NULL) { 9298 xmlFree(uri); 9299 xmlFreeParserCtxt(ctxt); 9300 return(NULL); 9301 } 9302 9303 inputPush(ctxt, inputStream); 9304 9305 if ((ctxt->directory == NULL) && (directory == NULL)) 9306 directory = xmlParserGetDirectory((char *)uri); 9307 if ((ctxt->directory == NULL) && (directory != NULL)) 9308 ctxt->directory = directory; 9309 xmlFree(uri); 9310 } 9311 9312 return(ctxt); 9313} 9314 9315/************************************************************************ 9316 * * 9317 * Front ends when parsing from a file * 9318 * * 9319 ************************************************************************/ 9320 9321/** 9322 * xmlCreateFileParserCtxt: 9323 * @filename: the filename 9324 * 9325 * Create a parser context for a file content. 9326 * Automatic support for ZLIB/Compress compressed document is provided 9327 * by default if found at compile-time. 9328 * 9329 * Returns the new parser context or NULL 9330 */ 9331xmlParserCtxtPtr 9332xmlCreateFileParserCtxt(const char *filename) 9333{ 9334 xmlParserCtxtPtr ctxt; 9335 xmlParserInputPtr inputStream; 9336 xmlParserInputBufferPtr buf; 9337 char *directory = NULL; 9338 9339 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 9340 if (buf == NULL) { 9341 return(NULL); 9342 } 9343 9344 ctxt = xmlNewParserCtxt(); 9345 if (ctxt == NULL) { 9346 if (xmlDefaultSAXHandler.error != NULL) { 9347 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9348 } 9349 return(NULL); 9350 } 9351 9352 inputStream = xmlNewInputStream(ctxt); 9353 if (inputStream == NULL) { 9354 xmlFreeParserCtxt(ctxt); 9355 return(NULL); 9356 } 9357 9358 inputStream->filename = xmlMemStrdup(filename); 9359 inputStream->buf = buf; 9360 inputStream->base = inputStream->buf->buffer->content; 9361 inputStream->cur = inputStream->buf->buffer->content; 9362 inputStream->end = 9363 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 9364 9365 inputPush(ctxt, inputStream); 9366 if ((ctxt->directory == NULL) && (directory == NULL)) 9367 directory = xmlParserGetDirectory(filename); 9368 if ((ctxt->directory == NULL) && (directory != NULL)) 9369 ctxt->directory = directory; 9370 9371 return(ctxt); 9372} 9373 9374/** 9375 * xmlSAXParseFile: 9376 * @sax: the SAX handler block 9377 * @filename: the filename 9378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9379 * documents 9380 * 9381 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9382 * compressed document is provided by default if found at compile-time. 9383 * It use the given SAX function block to handle the parsing callback. 9384 * If sax is NULL, fallback to the default DOM tree building routines. 9385 * 9386 * Returns the resulting document tree 9387 */ 9388 9389xmlDocPtr 9390xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9391 int recovery) { 9392 xmlDocPtr ret; 9393 xmlParserCtxtPtr ctxt; 9394 char *directory = NULL; 9395 9396 ctxt = xmlCreateFileParserCtxt(filename); 9397 if (ctxt == NULL) { 9398 return(NULL); 9399 } 9400 if (sax != NULL) { 9401 if (ctxt->sax != NULL) 9402 xmlFree(ctxt->sax); 9403 ctxt->sax = sax; 9404 ctxt->userData = NULL; 9405 } 9406 9407 if ((ctxt->directory == NULL) && (directory == NULL)) 9408 directory = xmlParserGetDirectory(filename); 9409 if ((ctxt->directory == NULL) && (directory != NULL)) 9410 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9411 9412 xmlParseDocument(ctxt); 9413 9414 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9415 else { 9416 ret = NULL; 9417 xmlFreeDoc(ctxt->myDoc); 9418 ctxt->myDoc = NULL; 9419 } 9420 if (sax != NULL) 9421 ctxt->sax = NULL; 9422 xmlFreeParserCtxt(ctxt); 9423 9424 return(ret); 9425} 9426 9427/** 9428 * xmlRecoverDoc: 9429 * @cur: a pointer to an array of xmlChar 9430 * 9431 * parse an XML in-memory document and build a tree. 9432 * In the case the document is not Well Formed, a tree is built anyway 9433 * 9434 * Returns the resulting document tree 9435 */ 9436 9437xmlDocPtr 9438xmlRecoverDoc(xmlChar *cur) { 9439 return(xmlSAXParseDoc(NULL, cur, 1)); 9440} 9441 9442/** 9443 * xmlParseFile: 9444 * @filename: the filename 9445 * 9446 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9447 * compressed document is provided by default if found at compile-time. 9448 * 9449 * Returns the resulting document tree 9450 */ 9451 9452xmlDocPtr 9453xmlParseFile(const char *filename) { 9454 return(xmlSAXParseFile(NULL, filename, 0)); 9455} 9456 9457/** 9458 * xmlRecoverFile: 9459 * @filename: the filename 9460 * 9461 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9462 * compressed document is provided by default if found at compile-time. 9463 * In the case the document is not Well Formed, a tree is built anyway 9464 * 9465 * Returns the resulting document tree 9466 */ 9467 9468xmlDocPtr 9469xmlRecoverFile(const char *filename) { 9470 return(xmlSAXParseFile(NULL, filename, 1)); 9471} 9472 9473 9474/** 9475 * xmlSetupParserForBuffer: 9476 * @ctxt: an XML parser context 9477 * @buffer: a xmlChar * buffer 9478 * @filename: a file name 9479 * 9480 * Setup the parser context to parse a new buffer; Clears any prior 9481 * contents from the parser context. The buffer parameter must not be 9482 * NULL, but the filename parameter can be 9483 */ 9484void 9485xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9486 const char* filename) 9487{ 9488 xmlParserInputPtr input; 9489 9490 input = xmlNewInputStream(ctxt); 9491 if (input == NULL) { 9492 perror("malloc"); 9493 xmlFree(ctxt); 9494 return; 9495 } 9496 9497 xmlClearParserCtxt(ctxt); 9498 if (filename != NULL) 9499 input->filename = xmlMemStrdup(filename); 9500 input->base = buffer; 9501 input->cur = buffer; 9502 input->end = &buffer[xmlStrlen(buffer)]; 9503 inputPush(ctxt, input); 9504} 9505 9506/** 9507 * xmlSAXUserParseFile: 9508 * @sax: a SAX handler 9509 * @user_data: The user data returned on SAX callbacks 9510 * @filename: a file name 9511 * 9512 * parse an XML file and call the given SAX handler routines. 9513 * Automatic support for ZLIB/Compress compressed document is provided 9514 * 9515 * Returns 0 in case of success or a error number otherwise 9516 */ 9517int 9518xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 9519 const char *filename) { 9520 int ret = 0; 9521 xmlParserCtxtPtr ctxt; 9522 9523 ctxt = xmlCreateFileParserCtxt(filename); 9524 if (ctxt == NULL) return -1; 9525 if (ctxt->sax != &xmlDefaultSAXHandler) 9526 xmlFree(ctxt->sax); 9527 ctxt->sax = sax; 9528 if (user_data != NULL) 9529 ctxt->userData = user_data; 9530 9531 xmlParseDocument(ctxt); 9532 9533 if (ctxt->wellFormed) 9534 ret = 0; 9535 else { 9536 if (ctxt->errNo != 0) 9537 ret = ctxt->errNo; 9538 else 9539 ret = -1; 9540 } 9541 if (sax != NULL) 9542 ctxt->sax = NULL; 9543 xmlFreeParserCtxt(ctxt); 9544 9545 return ret; 9546} 9547 9548/************************************************************************ 9549 * * 9550 * Front ends when parsing from memory * 9551 * * 9552 ************************************************************************/ 9553 9554/** 9555 * xmlCreateMemoryParserCtxt: 9556 * @buffer: a pointer to a char array 9557 * @size: the size of the array 9558 * 9559 * Create a parser context for an XML in-memory document. 9560 * 9561 * Returns the new parser context or NULL 9562 */ 9563xmlParserCtxtPtr 9564xmlCreateMemoryParserCtxt(char *buffer, int size) { 9565 xmlParserCtxtPtr ctxt; 9566 xmlParserInputPtr input; 9567 xmlParserInputBufferPtr buf; 9568 9569 if (buffer == NULL) 9570 return(NULL); 9571 if (size <= 0) 9572 return(NULL); 9573 9574 ctxt = xmlNewParserCtxt(); 9575 if (ctxt == NULL) 9576 return(NULL); 9577 9578 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 9579 if (buf == NULL) return(NULL); 9580 9581 input = xmlNewInputStream(ctxt); 9582 if (input == NULL) { 9583 xmlFreeParserCtxt(ctxt); 9584 return(NULL); 9585 } 9586 9587 input->filename = NULL; 9588 input->buf = buf; 9589 input->base = input->buf->buffer->content; 9590 input->cur = input->buf->buffer->content; 9591 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 9592 9593 inputPush(ctxt, input); 9594 return(ctxt); 9595} 9596 9597/** 9598 * xmlSAXParseMemory: 9599 * @sax: the SAX handler block 9600 * @buffer: an pointer to a char array 9601 * @size: the size of the array 9602 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 9603 * documents 9604 * 9605 * parse an XML in-memory block and use the given SAX function block 9606 * to handle the parsing callback. If sax is NULL, fallback to the default 9607 * DOM tree building routines. 9608 * 9609 * Returns the resulting document tree 9610 */ 9611xmlDocPtr 9612xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) { 9613 xmlDocPtr ret; 9614 xmlParserCtxtPtr ctxt; 9615 9616 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9617 if (ctxt == NULL) return(NULL); 9618 if (sax != NULL) { 9619 ctxt->sax = sax; 9620 ctxt->userData = NULL; 9621 } 9622 9623 xmlParseDocument(ctxt); 9624 9625 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9626 else { 9627 ret = NULL; 9628 xmlFreeDoc(ctxt->myDoc); 9629 ctxt->myDoc = NULL; 9630 } 9631 if (sax != NULL) 9632 ctxt->sax = NULL; 9633 xmlFreeParserCtxt(ctxt); 9634 9635 return(ret); 9636} 9637 9638/** 9639 * xmlParseMemory: 9640 * @buffer: an pointer to a char array 9641 * @size: the size of the array 9642 * 9643 * parse an XML in-memory block and build a tree. 9644 * 9645 * Returns the resulting document tree 9646 */ 9647 9648xmlDocPtr xmlParseMemory(char *buffer, int size) { 9649 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 9650} 9651 9652/** 9653 * xmlRecoverMemory: 9654 * @buffer: an pointer to a char array 9655 * @size: the size of the array 9656 * 9657 * parse an XML in-memory block and build a tree. 9658 * In the case the document is not Well Formed, a tree is built anyway 9659 * 9660 * Returns the resulting document tree 9661 */ 9662 9663xmlDocPtr xmlRecoverMemory(char *buffer, int size) { 9664 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 9665} 9666 9667/** 9668 * xmlSAXUserParseMemory: 9669 * @sax: a SAX handler 9670 * @user_data: The user data returned on SAX callbacks 9671 * @buffer: an in-memory XML document input 9672 * @size: the length of the XML document in bytes 9673 * 9674 * A better SAX parsing routine. 9675 * parse an XML in-memory buffer and call the given SAX handler routines. 9676 * 9677 * Returns 0 in case of success or a error number otherwise 9678 */ 9679int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 9680 char *buffer, int size) { 9681 int ret = 0; 9682 xmlParserCtxtPtr ctxt; 9683 xmlSAXHandlerPtr oldsax = NULL; 9684 9685 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9686 if (ctxt == NULL) return -1; 9687 if (sax != NULL) { 9688 oldsax = ctxt->sax; 9689 ctxt->sax = sax; 9690 } 9691 ctxt->userData = user_data; 9692 9693 xmlParseDocument(ctxt); 9694 9695 if (ctxt->wellFormed) 9696 ret = 0; 9697 else { 9698 if (ctxt->errNo != 0) 9699 ret = ctxt->errNo; 9700 else 9701 ret = -1; 9702 } 9703 if (sax != NULL) { 9704 ctxt->sax = oldsax; 9705 } 9706 xmlFreeParserCtxt(ctxt); 9707 9708 return ret; 9709} 9710 9711/** 9712 * xmlCreateDocParserCtxt: 9713 * @cur: a pointer to an array of xmlChar 9714 * 9715 * Creates a parser context for an XML in-memory document. 9716 * 9717 * Returns the new parser context or NULL 9718 */ 9719xmlParserCtxtPtr 9720xmlCreateDocParserCtxt(xmlChar *cur) { 9721 int len; 9722 9723 if (cur == NULL) 9724 return(NULL); 9725 len = xmlStrlen(cur); 9726 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 9727} 9728 9729/** 9730 * xmlSAXParseDoc: 9731 * @sax: the SAX handler block 9732 * @cur: a pointer to an array of xmlChar 9733 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9734 * documents 9735 * 9736 * parse an XML in-memory document and build a tree. 9737 * It use the given SAX function block to handle the parsing callback. 9738 * If sax is NULL, fallback to the default DOM tree building routines. 9739 * 9740 * Returns the resulting document tree 9741 */ 9742 9743xmlDocPtr 9744xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 9745 xmlDocPtr ret; 9746 xmlParserCtxtPtr ctxt; 9747 9748 if (cur == NULL) return(NULL); 9749 9750 9751 ctxt = xmlCreateDocParserCtxt(cur); 9752 if (ctxt == NULL) return(NULL); 9753 if (sax != NULL) { 9754 ctxt->sax = sax; 9755 ctxt->userData = NULL; 9756 } 9757 9758 xmlParseDocument(ctxt); 9759 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9760 else { 9761 ret = NULL; 9762 xmlFreeDoc(ctxt->myDoc); 9763 ctxt->myDoc = NULL; 9764 } 9765 if (sax != NULL) 9766 ctxt->sax = NULL; 9767 xmlFreeParserCtxt(ctxt); 9768 9769 return(ret); 9770} 9771 9772/** 9773 * xmlParseDoc: 9774 * @cur: a pointer to an array of xmlChar 9775 * 9776 * parse an XML in-memory document and build a tree. 9777 * 9778 * Returns the resulting document tree 9779 */ 9780 9781xmlDocPtr 9782xmlParseDoc(xmlChar *cur) { 9783 return(xmlSAXParseDoc(NULL, cur, 0)); 9784} 9785 9786 9787/************************************************************************ 9788 * * 9789 * Miscellaneous * 9790 * * 9791 ************************************************************************/ 9792 9793#ifdef LIBXML_XPATH_ENABLED 9794#include <libxml/xpath.h> 9795#endif 9796 9797static int xmlParserInitialized = 0; 9798 9799/** 9800 * xmlInitParser: 9801 * 9802 * Initialization function for the XML parser. 9803 * This is not reentrant. Call once before processing in case of 9804 * use in multithreaded programs. 9805 */ 9806 9807void 9808xmlInitParser(void) { 9809 if (xmlParserInitialized) return; 9810 9811 xmlInitCharEncodingHandlers(); 9812 xmlInitializePredefinedEntities(); 9813 xmlDefaultSAXHandlerInit(); 9814 xmlRegisterDefaultInputCallbacks(); 9815 xmlRegisterDefaultOutputCallbacks(); 9816#ifdef LIBXML_HTML_ENABLED 9817 htmlInitAutoClose(); 9818 htmlDefaultSAXHandlerInit(); 9819#endif 9820#ifdef LIBXML_XPATH_ENABLED 9821 xmlXPathInit(); 9822#endif 9823 xmlParserInitialized = 1; 9824} 9825 9826/** 9827 * xmlCleanupParser: 9828 * 9829 * Cleanup function for the XML parser. It tries to reclaim all 9830 * parsing related global memory allocated for the parser processing. 9831 * It doesn't deallocate any document related memory. Calling this 9832 * function should not prevent reusing the parser. 9833 */ 9834 9835void 9836xmlCleanupParser(void) { 9837 xmlParserInitialized = 0; 9838 xmlCleanupCharEncodingHandlers(); 9839 xmlCleanupPredefinedEntities(); 9840} 9841 9842/** 9843 * xmlPedanticParserDefault: 9844 * @val: int 0 or 1 9845 * 9846 * Set and return the previous value for enabling pedantic warnings. 9847 * 9848 * Returns the last value for 0 for no substitution, 1 for substitution. 9849 */ 9850 9851int 9852xmlPedanticParserDefault(int val) { 9853 int old = xmlPedanticParserDefaultValue; 9854 9855 xmlPedanticParserDefaultValue = val; 9856 return(old); 9857} 9858 9859/** 9860 * xmlSubstituteEntitiesDefault: 9861 * @val: int 0 or 1 9862 * 9863 * Set and return the previous value for default entity support. 9864 * Initially the parser always keep entity references instead of substituting 9865 * entity values in the output. This function has to be used to change the 9866 * default parser behaviour 9867 * SAX::subtituteEntities() has to be used for changing that on a file by 9868 * file basis. 9869 * 9870 * Returns the last value for 0 for no substitution, 1 for substitution. 9871 */ 9872 9873int 9874xmlSubstituteEntitiesDefault(int val) { 9875 int old = xmlSubstituteEntitiesDefaultValue; 9876 9877 xmlSubstituteEntitiesDefaultValue = val; 9878 return(old); 9879} 9880 9881/** 9882 * xmlKeepBlanksDefault: 9883 * @val: int 0 or 1 9884 * 9885 * Set and return the previous value for default blanks text nodes support. 9886 * The 1.x version of the parser used an heuristic to try to detect 9887 * ignorable white spaces. As a result the SAX callback was generating 9888 * ignorableWhitespace() callbacks instead of characters() one, and when 9889 * using the DOM output text nodes containing those blanks were not generated. 9890 * The 2.x and later version will switch to the XML standard way and 9891 * ignorableWhitespace() are only generated when running the parser in 9892 * validating mode and when the current element doesn't allow CDATA or 9893 * mixed content. 9894 * This function is provided as a way to force the standard behaviour 9895 * on 1.X libs and to switch back to the old mode for compatibility when 9896 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 9897 * by using xmlIsBlankNode() commodity function to detect the "empty" 9898 * nodes generated. 9899 * This value also affect autogeneration of indentation when saving code 9900 * if blanks sections are kept, indentation is not generated. 9901 * 9902 * Returns the last value for 0 for no substitution, 1 for substitution. 9903 */ 9904 9905int 9906xmlKeepBlanksDefault(int val) { 9907 int old = xmlKeepBlanksDefaultValue; 9908 9909 xmlKeepBlanksDefaultValue = val; 9910 xmlIndentTreeOutput = !val; 9911 return(old); 9912} 9913 9914