parser.c revision d6d7f7bf96a87688cc4bf756cf98367018e3ef88
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscelaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAx callbacks or as standalones functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * Daniel.Veillard@w3.org 31 */ 32 33#ifdef WIN32 34#include "win32config.h" 35#define XML_DIR_SEP '\\' 36#else 37#include "config.h" 38#define XML_DIR_SEP '/' 39#endif 40 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <libxml/xmlmemory.h> 45#include <libxml/tree.h> 46#include <libxml/parser.h> 47#include <libxml/parserInternals.h> 48#include <libxml/valid.h> 49#include <libxml/entities.h> 50#include <libxml/xmlerror.h> 51#include <libxml/encoding.h> 52#include <libxml/xmlIO.h> 53#include <libxml/uri.h> 54 55#ifdef HAVE_CTYPE_H 56#include <ctype.h> 57#endif 58#ifdef HAVE_STDLIB_H 59#include <stdlib.h> 60#endif 61#ifdef HAVE_SYS_STAT_H 62#include <sys/stat.h> 63#endif 64#ifdef HAVE_FCNTL_H 65#include <fcntl.h> 66#endif 67#ifdef HAVE_UNISTD_H 68#include <unistd.h> 69#endif 70#ifdef HAVE_ZLIB_H 71#include <zlib.h> 72#endif 73 74 75#define XML_PARSER_BIG_BUFFER_SIZE 1000 76#define XML_PARSER_BUFFER_SIZE 100 77 78/* 79 * Various global defaults for parsing 80 */ 81int xmlGetWarningsDefaultValue = 1; 82int xmlParserDebugEntities = 0; 83int xmlSubstituteEntitiesDefaultValue = 0; 84int xmlDoValidityCheckingDefaultValue = 0; 85int xmlPedanticParserDefaultValue = 0; 86int xmlKeepBlanksDefaultValue = 1; 87 88/* 89 * List of XML prefixed PI allowed by W3C specs 90 */ 91 92const char *xmlW3CPIs[] = { 93 "xml-stylesheet", 94 NULL 95}; 96 97/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 98void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 99xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 100 const xmlChar **str); 101 102 103/************************************************************************ 104 * * 105 * Parser stacks related functions and macros * 106 * * 107 ************************************************************************/ 108 109xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 110 const xmlChar ** str); 111 112/* 113 * Generic function for accessing stacks in the Parser Context 114 */ 115 116#define PUSH_AND_POP(scope, type, name) \ 117scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 118 if (ctxt->name##Nr >= ctxt->name##Max) { \ 119 ctxt->name##Max *= 2; \ 120 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 121 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 122 if (ctxt->name##Tab == NULL) { \ 123 xmlGenericError(xmlGenericErrorContext, \ 124 "realloc failed !\n"); \ 125 return(0); \ 126 } \ 127 } \ 128 ctxt->name##Tab[ctxt->name##Nr] = value; \ 129 ctxt->name = value; \ 130 return(ctxt->name##Nr++); \ 131} \ 132scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 133 type ret; \ 134 if (ctxt->name##Nr <= 0) return(0); \ 135 ctxt->name##Nr--; \ 136 if (ctxt->name##Nr > 0) \ 137 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 138 else \ 139 ctxt->name = NULL; \ 140 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 141 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 142 return(ret); \ 143} \ 144 145/* 146 * Those macros actually generate the functions 147 */ 148PUSH_AND_POP(extern, xmlParserInputPtr, input) 149PUSH_AND_POP(extern, xmlNodePtr, node) 150PUSH_AND_POP(extern, xmlChar*, name) 151 152int spacePush(xmlParserCtxtPtr ctxt, int val) { 153 if (ctxt->spaceNr >= ctxt->spaceMax) { 154 ctxt->spaceMax *= 2; 155 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 156 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 157 if (ctxt->spaceTab == NULL) { 158 xmlGenericError(xmlGenericErrorContext, 159 "realloc failed !\n"); 160 return(0); 161 } 162 } 163 ctxt->spaceTab[ctxt->spaceNr] = val; 164 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 165 return(ctxt->spaceNr++); 166} 167 168int spacePop(xmlParserCtxtPtr ctxt) { 169 int ret; 170 if (ctxt->spaceNr <= 0) return(0); 171 ctxt->spaceNr--; 172 if (ctxt->spaceNr > 0) 173 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 174 else 175 ctxt->space = NULL; 176 ret = ctxt->spaceTab[ctxt->spaceNr]; 177 ctxt->spaceTab[ctxt->spaceNr] = -1; 178 return(ret); 179} 180 181/* 182 * Macros for accessing the content. Those should be used only by the parser, 183 * and not exported. 184 * 185 * Dirty macros, i.e. one often need to make assumption on the context to 186 * use them 187 * 188 * CUR_PTR return the current pointer to the xmlChar to be parsed. 189 * To be used with extreme caution since operations consuming 190 * characters may move the input buffer to a different location ! 191 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 192 * This should be used internally by the parser 193 * only to compare to ASCII values otherwise it would break when 194 * running with UTF-8 encoding. 195 * RAW same as CUR but in the input buffer, bypass any token 196 * extraction that may have been done 197 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 198 * to compare on ASCII based substring. 199 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 200 * strings within the parser. 201 * 202 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 203 * 204 * NEXT Skip to the next character, this does the proper decoding 205 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 206 * NEXTL(l) Skip l xmlChars in the input buffer 207 * CUR_CHAR(l) returns the current unicode character (int), set l 208 * to the number of xmlChars used for the encoding [0-5]. 209 * CUR_SCHAR same but operate on a string instead of the context 210 * COPY_BUF copy the current unicode char to the target buffer, increment 211 * the index 212 * GROW, SHRINK handling of input buffers 213 */ 214 215#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 216#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 217#define NXT(val) ctxt->input->cur[(val)] 218#define CUR_PTR ctxt->input->cur 219 220#define SKIP(val) do { \ 221 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 222 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 223 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\ 224 if ((*ctxt->input->cur == 0) && \ 225 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 226 xmlPopInput(ctxt); \ 227 } while (0) 228 229#define SHRINK do { \ 230 xmlParserInputShrink(ctxt->input); \ 231 if ((*ctxt->input->cur == 0) && \ 232 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 233 xmlPopInput(ctxt); \ 234 } while (0) 235 236#define GROW do { \ 237 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 238 if ((*ctxt->input->cur == 0) && \ 239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 240 xmlPopInput(ctxt); \ 241 } while (0) 242 243#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 244 245#define NEXT xmlNextChar(ctxt) 246 247#define NEXTL(l) do { \ 248 if (*(ctxt->input->cur) == '\n') { \ 249 ctxt->input->line++; ctxt->input->col = 1; \ 250 } else ctxt->input->col++; \ 251 ctxt->token = 0; ctxt->input->cur += l; \ 252 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 253 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\ 254 } while (0) 255 256#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 257#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 258 259#define COPY_BUF(l,b,i,v) \ 260 if (l == 1) b[i++] = (xmlChar) v; \ 261 else i += xmlCopyChar(l,&b[i],v) 262 263/** 264 * xmlSkipBlankChars: 265 * @ctxt: the XML parser context 266 * 267 * skip all blanks character found at that point in the input streams. 268 * It pops up finished entities in the process if allowable at that point. 269 * 270 * Returns the number of space chars skipped 271 */ 272 273int 274xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 275 int cur, res = 0; 276 277 /* 278 * It's Okay to use CUR/NEXT here since all the blanks are on 279 * the ASCII range. 280 */ 281 do { 282 cur = CUR; 283 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 284 NEXT; 285 cur = CUR; 286 res++; 287 } 288 while ((cur == 0) && (ctxt->inputNr > 1) && 289 (ctxt->instate != XML_PARSER_COMMENT)) { 290 xmlPopInput(ctxt); 291 cur = CUR; 292 } 293 /* 294 * Need to handle support of entities branching here 295 */ 296 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 297 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */ 298 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 299 return(res); 300} 301 302/************************************************************************ 303 * * 304 * Commodity functions to handle entities * 305 * * 306 ************************************************************************/ 307 308/** 309 * xmlPopInput: 310 * @ctxt: an XML parser context 311 * 312 * xmlPopInput: the current input pointed by ctxt->input came to an end 313 * pop it and return the next char. 314 * 315 * Returns the current xmlChar in the parser context 316 */ 317xmlChar 318xmlPopInput(xmlParserCtxtPtr ctxt) { 319 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 320 if (xmlParserDebugEntities) 321 xmlGenericError(xmlGenericErrorContext, 322 "Popping input %d\n", ctxt->inputNr); 323 xmlFreeInputStream(inputPop(ctxt)); 324 if ((*ctxt->input->cur == 0) && 325 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 326 return(xmlPopInput(ctxt)); 327 return(CUR); 328} 329 330/** 331 * xmlPushInput: 332 * @ctxt: an XML parser context 333 * @input: an XML parser input fragment (entity, XML fragment ...). 334 * 335 * xmlPushInput: switch to a new input stream which is stacked on top 336 * of the previous one(s). 337 */ 338void 339xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 340 if (input == NULL) return; 341 342 if (xmlParserDebugEntities) { 343 if ((ctxt->input != NULL) && (ctxt->input->filename)) 344 xmlGenericError(xmlGenericErrorContext, 345 "%s(%d): ", ctxt->input->filename, 346 ctxt->input->line); 347 xmlGenericError(xmlGenericErrorContext, 348 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 349 } 350 inputPush(ctxt, input); 351 GROW; 352} 353 354/** 355 * xmlParseCharRef: 356 * @ctxt: an XML parser context 357 * 358 * parse Reference declarations 359 * 360 * [66] CharRef ::= '&#' [0-9]+ ';' | 361 * '&#x' [0-9a-fA-F]+ ';' 362 * 363 * [ WFC: Legal Character ] 364 * Characters referred to using character references must match the 365 * production for Char. 366 * 367 * Returns the value parsed (as an int), 0 in case of error 368 */ 369int 370xmlParseCharRef(xmlParserCtxtPtr ctxt) { 371 int val = 0; 372 int count = 0; 373 374 if (ctxt->token != 0) { 375 val = ctxt->token; 376 ctxt->token = 0; 377 return(val); 378 } 379 /* 380 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 381 */ 382 if ((RAW == '&') && (NXT(1) == '#') && 383 (NXT(2) == 'x')) { 384 SKIP(3); 385 GROW; 386 while (RAW != ';') { /* loop blocked by count */ 387 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 388 val = val * 16 + (CUR - '0'); 389 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 390 val = val * 16 + (CUR - 'a') + 10; 391 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 392 val = val * 16 + (CUR - 'A') + 10; 393 else { 394 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 396 ctxt->sax->error(ctxt->userData, 397 "xmlParseCharRef: invalid hexadecimal value\n"); 398 ctxt->wellFormed = 0; 399 ctxt->disableSAX = 1; 400 val = 0; 401 break; 402 } 403 NEXT; 404 count++; 405 } 406 if (RAW == ';') { 407 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 408 ctxt->nbChars ++; 409 ctxt->input->cur++; 410 } 411 } else if ((RAW == '&') && (NXT(1) == '#')) { 412 SKIP(2); 413 GROW; 414 while (RAW != ';') { /* loop blocked by count */ 415 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 416 val = val * 10 + (CUR - '0'); 417 else { 418 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 420 ctxt->sax->error(ctxt->userData, 421 "xmlParseCharRef: invalid decimal value\n"); 422 ctxt->wellFormed = 0; 423 ctxt->disableSAX = 1; 424 val = 0; 425 break; 426 } 427 NEXT; 428 count++; 429 } 430 if (RAW == ';') { 431 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 432 ctxt->nbChars ++; 433 ctxt->input->cur++; 434 } 435 } else { 436 ctxt->errNo = XML_ERR_INVALID_CHARREF; 437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 438 ctxt->sax->error(ctxt->userData, 439 "xmlParseCharRef: invalid value\n"); 440 ctxt->wellFormed = 0; 441 ctxt->disableSAX = 1; 442 } 443 444 /* 445 * [ WFC: Legal Character ] 446 * Characters referred to using character references must match the 447 * production for Char. 448 */ 449 if (IS_CHAR(val)) { 450 return(val); 451 } else { 452 ctxt->errNo = XML_ERR_INVALID_CHAR; 453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 454 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 455 val); 456 ctxt->wellFormed = 0; 457 ctxt->disableSAX = 1; 458 } 459 return(0); 460} 461 462/** 463 * xmlParseStringCharRef: 464 * @ctxt: an XML parser context 465 * @str: a pointer to an index in the string 466 * 467 * parse Reference declarations, variant parsing from a string rather 468 * than an an input flow. 469 * 470 * [66] CharRef ::= '&#' [0-9]+ ';' | 471 * '&#x' [0-9a-fA-F]+ ';' 472 * 473 * [ WFC: Legal Character ] 474 * Characters referred to using character references must match the 475 * production for Char. 476 * 477 * Returns the value parsed (as an int), 0 in case of error, str will be 478 * updated to the current value of the index 479 */ 480int 481xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 482 const xmlChar *ptr; 483 xmlChar cur; 484 int val = 0; 485 486 if ((str == NULL) || (*str == NULL)) return(0); 487 ptr = *str; 488 cur = *ptr; 489 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 490 ptr += 3; 491 cur = *ptr; 492 while (cur != ';') { /* Non input consuming loop */ 493 if ((cur >= '0') && (cur <= '9')) 494 val = val * 16 + (cur - '0'); 495 else if ((cur >= 'a') && (cur <= 'f')) 496 val = val * 16 + (cur - 'a') + 10; 497 else if ((cur >= 'A') && (cur <= 'F')) 498 val = val * 16 + (cur - 'A') + 10; 499 else { 500 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 502 ctxt->sax->error(ctxt->userData, 503 "xmlParseStringCharRef: invalid hexadecimal value\n"); 504 ctxt->wellFormed = 0; 505 ctxt->disableSAX = 1; 506 val = 0; 507 break; 508 } 509 ptr++; 510 cur = *ptr; 511 } 512 if (cur == ';') 513 ptr++; 514 } else if ((cur == '&') && (ptr[1] == '#')){ 515 ptr += 2; 516 cur = *ptr; 517 while (cur != ';') { /* Non input consuming loops */ 518 if ((cur >= '0') && (cur <= '9')) 519 val = val * 10 + (cur - '0'); 520 else { 521 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 523 ctxt->sax->error(ctxt->userData, 524 "xmlParseStringCharRef: invalid decimal value\n"); 525 ctxt->wellFormed = 0; 526 ctxt->disableSAX = 1; 527 val = 0; 528 break; 529 } 530 ptr++; 531 cur = *ptr; 532 } 533 if (cur == ';') 534 ptr++; 535 } else { 536 ctxt->errNo = XML_ERR_INVALID_CHARREF; 537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 538 ctxt->sax->error(ctxt->userData, 539 "xmlParseCharRef: invalid value\n"); 540 ctxt->wellFormed = 0; 541 ctxt->disableSAX = 1; 542 return(0); 543 } 544 *str = ptr; 545 546 /* 547 * [ WFC: Legal Character ] 548 * Characters referred to using character references must match the 549 * production for Char. 550 */ 551 if (IS_CHAR(val)) { 552 return(val); 553 } else { 554 ctxt->errNo = XML_ERR_INVALID_CHAR; 555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 556 ctxt->sax->error(ctxt->userData, 557 "CharRef: invalid xmlChar value %d\n", val); 558 ctxt->wellFormed = 0; 559 ctxt->disableSAX = 1; 560 } 561 return(0); 562} 563 564/** 565 * xmlParserHandlePEReference: 566 * @ctxt: the parser context 567 * 568 * [69] PEReference ::= '%' Name ';' 569 * 570 * [ WFC: No Recursion ] 571 * A parsed entity must not contain a recursive 572 * reference to itself, either directly or indirectly. 573 * 574 * [ WFC: Entity Declared ] 575 * In a document without any DTD, a document with only an internal DTD 576 * subset which contains no parameter entity references, or a document 577 * with "standalone='yes'", ... ... The declaration of a parameter 578 * entity must precede any reference to it... 579 * 580 * [ VC: Entity Declared ] 581 * In a document with an external subset or external parameter entities 582 * with "standalone='no'", ... ... The declaration of a parameter entity 583 * must precede any reference to it... 584 * 585 * [ WFC: In DTD ] 586 * Parameter-entity references may only appear in the DTD. 587 * NOTE: misleading but this is handled. 588 * 589 * A PEReference may have been detected in the current input stream 590 * the handling is done accordingly to 591 * http://www.w3.org/TR/REC-xml#entproc 592 * i.e. 593 * - Included in literal in entity values 594 * - Included as Paraemeter Entity reference within DTDs 595 */ 596void 597xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 598 xmlChar *name; 599 xmlEntityPtr entity = NULL; 600 xmlParserInputPtr input; 601 602 if (ctxt->token != 0) { 603 return; 604 } 605 if (RAW != '%') return; 606 switch(ctxt->instate) { 607 case XML_PARSER_CDATA_SECTION: 608 return; 609 case XML_PARSER_COMMENT: 610 return; 611 case XML_PARSER_START_TAG: 612 return; 613 case XML_PARSER_END_TAG: 614 return; 615 case XML_PARSER_EOF: 616 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 618 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 619 ctxt->wellFormed = 0; 620 ctxt->disableSAX = 1; 621 return; 622 case XML_PARSER_PROLOG: 623 case XML_PARSER_START: 624 case XML_PARSER_MISC: 625 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 627 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 628 ctxt->wellFormed = 0; 629 ctxt->disableSAX = 1; 630 return; 631 case XML_PARSER_ENTITY_DECL: 632 case XML_PARSER_CONTENT: 633 case XML_PARSER_ATTRIBUTE_VALUE: 634 case XML_PARSER_PI: 635 case XML_PARSER_SYSTEM_LITERAL: 636 /* we just ignore it there */ 637 return; 638 case XML_PARSER_EPILOG: 639 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 641 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 642 ctxt->wellFormed = 0; 643 ctxt->disableSAX = 1; 644 return; 645 case XML_PARSER_ENTITY_VALUE: 646 /* 647 * NOTE: in the case of entity values, we don't do the 648 * substitution here since we need the literal 649 * entity value to be able to save the internal 650 * subset of the document. 651 * This will be handled by xmlStringDecodeEntities 652 */ 653 return; 654 case XML_PARSER_DTD: 655 /* 656 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 657 * In the internal DTD subset, parameter-entity references 658 * can occur only where markup declarations can occur, not 659 * within markup declarations. 660 * In that case this is handled in xmlParseMarkupDecl 661 */ 662 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 663 return; 664 } 665 666 NEXT; 667 name = xmlParseName(ctxt); 668 if (xmlParserDebugEntities) 669 xmlGenericError(xmlGenericErrorContext, 670 "PE Reference: %s\n", name); 671 if (name == NULL) { 672 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 674 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 675 ctxt->wellFormed = 0; 676 ctxt->disableSAX = 1; 677 } else { 678 if (RAW == ';') { 679 NEXT; 680 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 681 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 682 if (entity == NULL) { 683 684 /* 685 * [ WFC: Entity Declared ] 686 * In a document without any DTD, a document with only an 687 * internal DTD subset which contains no parameter entity 688 * references, or a document with "standalone='yes'", ... 689 * ... The declaration of a parameter entity must precede 690 * any reference to it... 691 */ 692 if ((ctxt->standalone == 1) || 693 ((ctxt->hasExternalSubset == 0) && 694 (ctxt->hasPErefs == 0))) { 695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 696 ctxt->sax->error(ctxt->userData, 697 "PEReference: %%%s; not found\n", name); 698 ctxt->wellFormed = 0; 699 ctxt->disableSAX = 1; 700 } else { 701 /* 702 * [ VC: Entity Declared ] 703 * In a document with an external subset or external 704 * parameter entities with "standalone='no'", ... 705 * ... The declaration of a parameter entity must precede 706 * any reference to it... 707 */ 708 if ((!ctxt->disableSAX) && 709 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 710 ctxt->vctxt.error(ctxt->vctxt.userData, 711 "PEReference: %%%s; not found\n", name); 712 } else if ((!ctxt->disableSAX) && 713 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 714 ctxt->sax->warning(ctxt->userData, 715 "PEReference: %%%s; not found\n", name); 716 ctxt->valid = 0; 717 } 718 } else { 719 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 720 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 721 /* 722 * handle the extra spaces added before and after 723 * c.f. http://www.w3.org/TR/REC-xml#as-PE 724 * this is done independantly. 725 */ 726 input = xmlNewEntityInputStream(ctxt, entity); 727 xmlPushInput(ctxt, input); 728 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 729 (RAW == '<') && (NXT(1) == '?') && 730 (NXT(2) == 'x') && (NXT(3) == 'm') && 731 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 732 xmlParseTextDecl(ctxt); 733 } 734 if (ctxt->token == 0) 735 ctxt->token = ' '; 736 } else { 737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 738 ctxt->sax->error(ctxt->userData, 739 "xmlHandlePEReference: %s is not a parameter entity\n", 740 name); 741 ctxt->wellFormed = 0; 742 ctxt->disableSAX = 1; 743 } 744 } 745 } else { 746 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 748 ctxt->sax->error(ctxt->userData, 749 "xmlHandlePEReference: expecting ';'\n"); 750 ctxt->wellFormed = 0; 751 ctxt->disableSAX = 1; 752 } 753 xmlFree(name); 754 } 755} 756 757/* 758 * Macro used to grow the current buffer. 759 */ 760#define growBuffer(buffer) { \ 761 buffer##_size *= 2; \ 762 buffer = (xmlChar *) \ 763 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 764 if (buffer == NULL) { \ 765 perror("realloc failed"); \ 766 return(NULL); \ 767 } \ 768} 769 770/** 771 * xmlStringDecodeEntities: 772 * @ctxt: the parser context 773 * @str: the input string 774 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 775 * @end: an end marker xmlChar, 0 if none 776 * @end2: an end marker xmlChar, 0 if none 777 * @end3: an end marker xmlChar, 0 if none 778 * 779 * Takes a entity string content and process to do the adequate subtitutions. 780 * 781 * [67] Reference ::= EntityRef | CharRef 782 * 783 * [69] PEReference ::= '%' Name ';' 784 * 785 * Returns A newly allocated string with the substitution done. The caller 786 * must deallocate it ! 787 */ 788xmlChar * 789xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 790 xmlChar end, xmlChar end2, xmlChar end3) { 791 xmlChar *buffer = NULL; 792 int buffer_size = 0; 793 794 xmlChar *current = NULL; 795 xmlEntityPtr ent; 796 int c,l; 797 int nbchars = 0; 798 799 if (str == NULL) 800 return(NULL); 801 802 if (ctxt->depth > 40) { 803 ctxt->errNo = XML_ERR_ENTITY_LOOP; 804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 805 ctxt->sax->error(ctxt->userData, 806 "Detected entity reference loop\n"); 807 ctxt->wellFormed = 0; 808 ctxt->disableSAX = 1; 809 return(NULL); 810 } 811 812 /* 813 * allocate a translation buffer. 814 */ 815 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 816 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 817 if (buffer == NULL) { 818 perror("xmlDecodeEntities: malloc failed"); 819 return(NULL); 820 } 821 822 /* 823 * Ok loop until we reach one of the ending char or a size limit. 824 * we are operating on already parsed values. 825 */ 826 c = CUR_SCHAR(str, l); 827 while ((c != 0) && (c != end) && /* non input consuming loop */ 828 (c != end2) && (c != end3)) { 829 830 if (c == 0) break; 831 if ((c == '&') && (str[1] == '#')) { 832 int val = xmlParseStringCharRef(ctxt, &str); 833 if (val != 0) { 834 COPY_BUF(0,buffer,nbchars,val); 835 } 836 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 837 if (xmlParserDebugEntities) 838 xmlGenericError(xmlGenericErrorContext, 839 "String decoding Entity Reference: %.30s\n", 840 str); 841 ent = xmlParseStringEntityRef(ctxt, &str); 842 if ((ent != NULL) && 843 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 844 if (ent->content != NULL) { 845 COPY_BUF(0,buffer,nbchars,ent->content[0]); 846 } else { 847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 848 ctxt->sax->error(ctxt->userData, 849 "internal error entity has no content\n"); 850 } 851 } else if ((ent != NULL) && (ent->content != NULL)) { 852 xmlChar *rep; 853 854 ctxt->depth++; 855 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 856 0, 0, 0); 857 ctxt->depth--; 858 if (rep != NULL) { 859 current = rep; 860 while (*current != 0) { /* non input consuming loop */ 861 buffer[nbchars++] = *current++; 862 if (nbchars > 863 buffer_size - XML_PARSER_BUFFER_SIZE) { 864 growBuffer(buffer); 865 } 866 } 867 xmlFree(rep); 868 } 869 } else if (ent != NULL) { 870 int i = xmlStrlen(ent->name); 871 const xmlChar *cur = ent->name; 872 873 buffer[nbchars++] = '&'; 874 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 875 growBuffer(buffer); 876 } 877 for (;i > 0;i--) 878 buffer[nbchars++] = *cur++; 879 buffer[nbchars++] = ';'; 880 } 881 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 882 if (xmlParserDebugEntities) 883 xmlGenericError(xmlGenericErrorContext, 884 "String decoding PE Reference: %.30s\n", str); 885 ent = xmlParseStringPEReference(ctxt, &str); 886 if (ent != NULL) { 887 xmlChar *rep; 888 889 ctxt->depth++; 890 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 891 0, 0, 0); 892 ctxt->depth--; 893 if (rep != NULL) { 894 current = rep; 895 while (*current != 0) { /* non input consuming loop */ 896 buffer[nbchars++] = *current++; 897 if (nbchars > 898 buffer_size - XML_PARSER_BUFFER_SIZE) { 899 growBuffer(buffer); 900 } 901 } 902 xmlFree(rep); 903 } 904 } 905 } else { 906 COPY_BUF(l,buffer,nbchars,c); 907 str += l; 908 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 909 growBuffer(buffer); 910 } 911 } 912 c = CUR_SCHAR(str, l); 913 } 914 buffer[nbchars++] = 0; 915 return(buffer); 916} 917 918 919/************************************************************************ 920 * * 921 * Commodity functions to handle xmlChars * 922 * * 923 ************************************************************************/ 924 925/** 926 * xmlStrndup: 927 * @cur: the input xmlChar * 928 * @len: the len of @cur 929 * 930 * a strndup for array of xmlChar's 931 * 932 * Returns a new xmlChar * or NULL 933 */ 934xmlChar * 935xmlStrndup(const xmlChar *cur, int len) { 936 xmlChar *ret; 937 938 if ((cur == NULL) || (len < 0)) return(NULL); 939 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 940 if (ret == NULL) { 941 xmlGenericError(xmlGenericErrorContext, 942 "malloc of %ld byte failed\n", 943 (len + 1) * (long)sizeof(xmlChar)); 944 return(NULL); 945 } 946 memcpy(ret, cur, len * sizeof(xmlChar)); 947 ret[len] = 0; 948 return(ret); 949} 950 951/** 952 * xmlStrdup: 953 * @cur: the input xmlChar * 954 * 955 * a strdup for array of xmlChar's. Since they are supposed to be 956 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 957 * a termination mark of '0'. 958 * 959 * Returns a new xmlChar * or NULL 960 */ 961xmlChar * 962xmlStrdup(const xmlChar *cur) { 963 const xmlChar *p = cur; 964 965 if (cur == NULL) return(NULL); 966 while (*p != 0) p++; /* non input consuming */ 967 return(xmlStrndup(cur, p - cur)); 968} 969 970/** 971 * xmlCharStrndup: 972 * @cur: the input char * 973 * @len: the len of @cur 974 * 975 * a strndup for char's to xmlChar's 976 * 977 * Returns a new xmlChar * or NULL 978 */ 979 980xmlChar * 981xmlCharStrndup(const char *cur, int len) { 982 int i; 983 xmlChar *ret; 984 985 if ((cur == NULL) || (len < 0)) return(NULL); 986 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 987 if (ret == NULL) { 988 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 989 (len + 1) * (long)sizeof(xmlChar)); 990 return(NULL); 991 } 992 for (i = 0;i < len;i++) 993 ret[i] = (xmlChar) cur[i]; 994 ret[len] = 0; 995 return(ret); 996} 997 998/** 999 * xmlCharStrdup: 1000 * @cur: the input char * 1001 * @len: the len of @cur 1002 * 1003 * a strdup for char's to xmlChar's 1004 * 1005 * Returns a new xmlChar * or NULL 1006 */ 1007 1008xmlChar * 1009xmlCharStrdup(const char *cur) { 1010 const char *p = cur; 1011 1012 if (cur == NULL) return(NULL); 1013 while (*p != '\0') p++; /* non input consuming */ 1014 return(xmlCharStrndup(cur, p - cur)); 1015} 1016 1017/** 1018 * xmlStrcmp: 1019 * @str1: the first xmlChar * 1020 * @str2: the second xmlChar * 1021 * 1022 * a strcmp for xmlChar's 1023 * 1024 * Returns the integer result of the comparison 1025 */ 1026 1027int 1028xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1029 register int tmp; 1030 1031 if (str1 == str2) return(0); 1032 if (str1 == NULL) return(-1); 1033 if (str2 == NULL) return(1); 1034 do { 1035 tmp = *str1++ - *str2; 1036 if (tmp != 0) return(tmp); 1037 } while (*str2++ != 0); 1038 return 0; 1039} 1040 1041/** 1042 * xmlStrEqual: 1043 * @str1: the first xmlChar * 1044 * @str2: the second xmlChar * 1045 * 1046 * Check if both string are equal of have same content 1047 * Should be a bit more readable and faster than xmlStrEqual() 1048 * 1049 * Returns 1 if they are equal, 0 if they are different 1050 */ 1051 1052int 1053xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1054 if (str1 == str2) return(1); 1055 if (str1 == NULL) return(0); 1056 if (str2 == NULL) return(0); 1057 do { 1058 if (*str1++ != *str2) return(0); 1059 } while (*str2++); 1060 return(1); 1061} 1062 1063/** 1064 * xmlStrncmp: 1065 * @str1: the first xmlChar * 1066 * @str2: the second xmlChar * 1067 * @len: the max comparison length 1068 * 1069 * a strncmp for xmlChar's 1070 * 1071 * Returns the integer result of the comparison 1072 */ 1073 1074int 1075xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1076 register int tmp; 1077 1078 if (len <= 0) return(0); 1079 if (str1 == str2) return(0); 1080 if (str1 == NULL) return(-1); 1081 if (str2 == NULL) return(1); 1082 do { 1083 tmp = *str1++ - *str2; 1084 if (tmp != 0 || --len == 0) return(tmp); 1085 } while (*str2++ != 0); 1086 return 0; 1087} 1088 1089static xmlChar casemap[256] = { 1090 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1091 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1092 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1093 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1094 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1095 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1096 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1097 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1098 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1099 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1100 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1101 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1102 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1103 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1104 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1105 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1106 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1107 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1108 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1109 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1110 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1111 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1112 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1113 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1114 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1115 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1116 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1117 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1118 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1119 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1120 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1121 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1122}; 1123 1124/** 1125 * xmlStrcasecmp: 1126 * @str1: the first xmlChar * 1127 * @str2: the second xmlChar * 1128 * 1129 * a strcasecmp for xmlChar's 1130 * 1131 * Returns the integer result of the comparison 1132 */ 1133 1134int 1135xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1136 register int tmp; 1137 1138 if (str1 == str2) return(0); 1139 if (str1 == NULL) return(-1); 1140 if (str2 == NULL) return(1); 1141 do { 1142 tmp = casemap[*str1++] - casemap[*str2]; 1143 if (tmp != 0) return(tmp); 1144 } while (*str2++ != 0); 1145 return 0; 1146} 1147 1148/** 1149 * xmlStrncasecmp: 1150 * @str1: the first xmlChar * 1151 * @str2: the second xmlChar * 1152 * @len: the max comparison length 1153 * 1154 * a strncasecmp for xmlChar's 1155 * 1156 * Returns the integer result of the comparison 1157 */ 1158 1159int 1160xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1161 register int tmp; 1162 1163 if (len <= 0) return(0); 1164 if (str1 == str2) return(0); 1165 if (str1 == NULL) return(-1); 1166 if (str2 == NULL) return(1); 1167 do { 1168 tmp = casemap[*str1++] - casemap[*str2]; 1169 if (tmp != 0 || --len == 0) return(tmp); 1170 } while (*str2++ != 0); 1171 return 0; 1172} 1173 1174/** 1175 * xmlStrchr: 1176 * @str: the xmlChar * array 1177 * @val: the xmlChar to search 1178 * 1179 * a strchr for xmlChar's 1180 * 1181 * Returns the xmlChar * for the first occurence or NULL. 1182 */ 1183 1184const xmlChar * 1185xmlStrchr(const xmlChar *str, xmlChar val) { 1186 if (str == NULL) return(NULL); 1187 while (*str != 0) { /* non input consuming */ 1188 if (*str == val) return((xmlChar *) str); 1189 str++; 1190 } 1191 return(NULL); 1192} 1193 1194/** 1195 * xmlStrstr: 1196 * @str: the xmlChar * array (haystack) 1197 * @val: the xmlChar to search (needle) 1198 * 1199 * a strstr for xmlChar's 1200 * 1201 * Returns the xmlChar * for the first occurence or NULL. 1202 */ 1203 1204const xmlChar * 1205xmlStrstr(const xmlChar *str, xmlChar *val) { 1206 int n; 1207 1208 if (str == NULL) return(NULL); 1209 if (val == NULL) return(NULL); 1210 n = xmlStrlen(val); 1211 1212 if (n == 0) return(str); 1213 while (*str != 0) { /* non input consuming */ 1214 if (*str == *val) { 1215 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1216 } 1217 str++; 1218 } 1219 return(NULL); 1220} 1221 1222/** 1223 * xmlStrcasestr: 1224 * @str: the xmlChar * array (haystack) 1225 * @val: the xmlChar to search (needle) 1226 * 1227 * a case-ignoring strstr for xmlChar's 1228 * 1229 * Returns the xmlChar * for the first occurence or NULL. 1230 */ 1231 1232const xmlChar * 1233xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1234 int n; 1235 1236 if (str == NULL) return(NULL); 1237 if (val == NULL) return(NULL); 1238 n = xmlStrlen(val); 1239 1240 if (n == 0) return(str); 1241 while (*str != 0) { /* non input consuming */ 1242 if (casemap[*str] == casemap[*val]) 1243 if (!xmlStrncasecmp(str, val, n)) return(str); 1244 str++; 1245 } 1246 return(NULL); 1247} 1248 1249/** 1250 * xmlStrsub: 1251 * @str: the xmlChar * array (haystack) 1252 * @start: the index of the first char (zero based) 1253 * @len: the length of the substring 1254 * 1255 * Extract a substring of a given string 1256 * 1257 * Returns the xmlChar * for the first occurence or NULL. 1258 */ 1259 1260xmlChar * 1261xmlStrsub(const xmlChar *str, int start, int len) { 1262 int i; 1263 1264 if (str == NULL) return(NULL); 1265 if (start < 0) return(NULL); 1266 if (len < 0) return(NULL); 1267 1268 for (i = 0;i < start;i++) { 1269 if (*str == 0) return(NULL); 1270 str++; 1271 } 1272 if (*str == 0) return(NULL); 1273 return(xmlStrndup(str, len)); 1274} 1275 1276/** 1277 * xmlStrlen: 1278 * @str: the xmlChar * array 1279 * 1280 * length of a xmlChar's string 1281 * 1282 * Returns the number of xmlChar contained in the ARRAY. 1283 */ 1284 1285int 1286xmlStrlen(const xmlChar *str) { 1287 int len = 0; 1288 1289 if (str == NULL) return(0); 1290 while (*str != 0) { /* non input consuming */ 1291 str++; 1292 len++; 1293 } 1294 return(len); 1295} 1296 1297/** 1298 * xmlStrncat: 1299 * @cur: the original xmlChar * array 1300 * @add: the xmlChar * array added 1301 * @len: the length of @add 1302 * 1303 * a strncat for array of xmlChar's 1304 * 1305 * Returns a new xmlChar * containing the concatenated string. 1306 */ 1307 1308xmlChar * 1309xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1310 int size; 1311 xmlChar *ret; 1312 1313 if ((add == NULL) || (len == 0)) 1314 return(cur); 1315 if (cur == NULL) 1316 return(xmlStrndup(add, len)); 1317 1318 size = xmlStrlen(cur); 1319 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1320 if (ret == NULL) { 1321 xmlGenericError(xmlGenericErrorContext, 1322 "xmlStrncat: realloc of %ld byte failed\n", 1323 (size + len + 1) * (long)sizeof(xmlChar)); 1324 return(cur); 1325 } 1326 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1327 ret[size + len] = 0; 1328 return(ret); 1329} 1330 1331/** 1332 * xmlStrcat: 1333 * @cur: the original xmlChar * array 1334 * @add: the xmlChar * array added 1335 * 1336 * a strcat for array of xmlChar's. Since they are supposed to be 1337 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1338 * a termination mark of '0'. 1339 * 1340 * Returns a new xmlChar * containing the concatenated string. 1341 */ 1342xmlChar * 1343xmlStrcat(xmlChar *cur, const xmlChar *add) { 1344 const xmlChar *p = add; 1345 1346 if (add == NULL) return(cur); 1347 if (cur == NULL) 1348 return(xmlStrdup(add)); 1349 1350 while (*p != 0) p++; /* non input consuming */ 1351 return(xmlStrncat(cur, add, p - add)); 1352} 1353 1354/************************************************************************ 1355 * * 1356 * Commodity functions, cleanup needed ? * 1357 * * 1358 ************************************************************************/ 1359 1360/** 1361 * areBlanks: 1362 * @ctxt: an XML parser context 1363 * @str: a xmlChar * 1364 * @len: the size of @str 1365 * 1366 * Is this a sequence of blank chars that one can ignore ? 1367 * 1368 * Returns 1 if ignorable 0 otherwise. 1369 */ 1370 1371static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1372 int i, ret; 1373 xmlNodePtr lastChild; 1374 1375 /* 1376 * Check for xml:space value. 1377 */ 1378 if (*(ctxt->space) == 1) 1379 return(0); 1380 1381 /* 1382 * Check that the string is made of blanks 1383 */ 1384 for (i = 0;i < len;i++) 1385 if (!(IS_BLANK(str[i]))) return(0); 1386 1387 /* 1388 * Look if the element is mixed content in the Dtd if available 1389 */ 1390 if (ctxt->myDoc != NULL) { 1391 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1392 if (ret == 0) return(1); 1393 if (ret == 1) return(0); 1394 } 1395 1396 /* 1397 * Otherwise, heuristic :-\ 1398 */ 1399 if (ctxt->keepBlanks) 1400 return(0); 1401 if (RAW != '<') return(0); 1402 if (ctxt->node == NULL) return(0); 1403 if ((ctxt->node->children == NULL) && 1404 (RAW == '<') && (NXT(1) == '/')) return(0); 1405 1406 lastChild = xmlGetLastChild(ctxt->node); 1407 if (lastChild == NULL) { 1408 if (ctxt->node->content != NULL) return(0); 1409 } else if (xmlNodeIsText(lastChild)) 1410 return(0); 1411 else if ((ctxt->node->children != NULL) && 1412 (xmlNodeIsText(ctxt->node->children))) 1413 return(0); 1414 return(1); 1415} 1416 1417/* 1418 * Forward definition for recusive behaviour. 1419 */ 1420void xmlParsePEReference(xmlParserCtxtPtr ctxt); 1421void xmlParseReference(xmlParserCtxtPtr ctxt); 1422 1423/************************************************************************ 1424 * * 1425 * Extra stuff for namespace support * 1426 * Relates to http://www.w3.org/TR/WD-xml-names * 1427 * * 1428 ************************************************************************/ 1429 1430/** 1431 * xmlSplitQName: 1432 * @ctxt: an XML parser context 1433 * @name: an XML parser context 1434 * @prefix: a xmlChar ** 1435 * 1436 * parse an UTF8 encoded XML qualified name string 1437 * 1438 * [NS 5] QName ::= (Prefix ':')? LocalPart 1439 * 1440 * [NS 6] Prefix ::= NCName 1441 * 1442 * [NS 7] LocalPart ::= NCName 1443 * 1444 * Returns the local part, and prefix is updated 1445 * to get the Prefix if any. 1446 */ 1447 1448xmlChar * 1449xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1450 xmlChar buf[XML_MAX_NAMELEN + 5]; 1451 xmlChar *buffer = NULL; 1452 int len = 0; 1453 int max = XML_MAX_NAMELEN; 1454 xmlChar *ret = NULL; 1455 const xmlChar *cur = name; 1456 int c; 1457 1458 *prefix = NULL; 1459 1460 /* xml: prefix is not really a namespace */ 1461 if ((cur[0] == 'x') && (cur[1] == 'm') && 1462 (cur[2] == 'l') && (cur[3] == ':')) 1463 return(xmlStrdup(name)); 1464 1465 /* nasty but valid */ 1466 if (cur[0] == ':') 1467 return(xmlStrdup(name)); 1468 1469 c = *cur++; 1470 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1471 buf[len++] = c; 1472 c = *cur++; 1473 } 1474 if (len >= max) { 1475 /* 1476 * Okay someone managed to make a huge name, so he's ready to pay 1477 * for the processing speed. 1478 */ 1479 max = len * 2; 1480 1481 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1482 if (buffer == NULL) { 1483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1484 ctxt->sax->error(ctxt->userData, 1485 "xmlSplitQName: out of memory\n"); 1486 return(NULL); 1487 } 1488 memcpy(buffer, buf, len); 1489 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1490 if (len + 10 > max) { 1491 max *= 2; 1492 buffer = (xmlChar *) xmlRealloc(buffer, 1493 max * sizeof(xmlChar)); 1494 if (buffer == NULL) { 1495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1496 ctxt->sax->error(ctxt->userData, 1497 "xmlSplitQName: out of memory\n"); 1498 return(NULL); 1499 } 1500 } 1501 buffer[len++] = c; 1502 c = *cur++; 1503 } 1504 buffer[len] = 0; 1505 } 1506 1507 if (buffer == NULL) 1508 ret = xmlStrndup(buf, len); 1509 else { 1510 ret = buffer; 1511 buffer = NULL; 1512 max = XML_MAX_NAMELEN; 1513 } 1514 1515 1516 if (c == ':') { 1517 c = *cur++; 1518 if (c == 0) return(ret); 1519 *prefix = ret; 1520 len = 0; 1521 1522 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1523 buf[len++] = c; 1524 c = *cur++; 1525 } 1526 if (len >= max) { 1527 /* 1528 * Okay someone managed to make a huge name, so he's ready to pay 1529 * for the processing speed. 1530 */ 1531 max = len * 2; 1532 1533 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1534 if (buffer == NULL) { 1535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1536 ctxt->sax->error(ctxt->userData, 1537 "xmlSplitQName: out of memory\n"); 1538 return(NULL); 1539 } 1540 memcpy(buffer, buf, len); 1541 while (c != 0) { /* tested bigname2.xml */ 1542 if (len + 10 > max) { 1543 max *= 2; 1544 buffer = (xmlChar *) xmlRealloc(buffer, 1545 max * sizeof(xmlChar)); 1546 if (buffer == NULL) { 1547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1548 ctxt->sax->error(ctxt->userData, 1549 "xmlSplitQName: out of memory\n"); 1550 return(NULL); 1551 } 1552 } 1553 buffer[len++] = c; 1554 c = *cur++; 1555 } 1556 buffer[len] = 0; 1557 } 1558 1559 if (buffer == NULL) 1560 ret = xmlStrndup(buf, len); 1561 else { 1562 ret = buffer; 1563 } 1564 } 1565 1566 return(ret); 1567} 1568 1569/************************************************************************ 1570 * * 1571 * The parser itself * 1572 * Relates to http://www.w3.org/TR/REC-xml * 1573 * * 1574 ************************************************************************/ 1575 1576/** 1577 * xmlParseName: 1578 * @ctxt: an XML parser context 1579 * 1580 * parse an XML name. 1581 * 1582 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1583 * CombiningChar | Extender 1584 * 1585 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1586 * 1587 * [6] Names ::= Name (S Name)* 1588 * 1589 * Returns the Name parsed or NULL 1590 */ 1591 1592xmlChar * 1593xmlParseName(xmlParserCtxtPtr ctxt) { 1594 xmlChar buf[XML_MAX_NAMELEN + 5]; 1595 int len = 0, l; 1596 int c; 1597 int count = 0; 1598 1599 GROW; 1600 c = CUR_CHAR(l); 1601 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1602 (!IS_LETTER(c) && (c != '_') && 1603 (c != ':'))) { 1604 return(NULL); 1605 } 1606 1607 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1608 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1609 (c == '.') || (c == '-') || 1610 (c == '_') || (c == ':') || 1611 (IS_COMBINING(c)) || 1612 (IS_EXTENDER(c)))) { 1613 if (count++ > 100) { 1614 count = 0; 1615 GROW; 1616 } 1617 COPY_BUF(l,buf,len,c); 1618 NEXTL(l); 1619 c = CUR_CHAR(l); 1620 if (len >= XML_MAX_NAMELEN) { 1621 /* 1622 * Okay someone managed to make a huge name, so he's ready to pay 1623 * for the processing speed. 1624 */ 1625 xmlChar *buffer; 1626 int max = len * 2; 1627 1628 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1629 if (buffer == NULL) { 1630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1631 ctxt->sax->error(ctxt->userData, 1632 "xmlParseName: out of memory\n"); 1633 return(NULL); 1634 } 1635 memcpy(buffer, buf, len); 1636 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1637 (c == '.') || (c == '-') || 1638 (c == '_') || (c == ':') || 1639 (IS_COMBINING(c)) || 1640 (IS_EXTENDER(c))) { 1641 if (count++ > 100) { 1642 count = 0; 1643 GROW; 1644 } 1645 if (len + 10 > max) { 1646 max *= 2; 1647 buffer = (xmlChar *) xmlRealloc(buffer, 1648 max * sizeof(xmlChar)); 1649 if (buffer == NULL) { 1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1651 ctxt->sax->error(ctxt->userData, 1652 "xmlParseName: out of memory\n"); 1653 return(NULL); 1654 } 1655 } 1656 COPY_BUF(l,buffer,len,c); 1657 NEXTL(l); 1658 c = CUR_CHAR(l); 1659 } 1660 buffer[len] = 0; 1661 return(buffer); 1662 } 1663 } 1664 return(xmlStrndup(buf, len)); 1665} 1666 1667/** 1668 * xmlParseStringName: 1669 * @ctxt: an XML parser context 1670 * @str: a pointer to the string pointer (IN/OUT) 1671 * 1672 * parse an XML name. 1673 * 1674 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1675 * CombiningChar | Extender 1676 * 1677 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1678 * 1679 * [6] Names ::= Name (S Name)* 1680 * 1681 * Returns the Name parsed or NULL. The str pointer 1682 * is updated to the current location in the string. 1683 */ 1684 1685xmlChar * 1686xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1687 xmlChar buf[XML_MAX_NAMELEN + 5]; 1688 const xmlChar *cur = *str; 1689 int len = 0, l; 1690 int c; 1691 1692 c = CUR_SCHAR(cur, l); 1693 if (!IS_LETTER(c) && (c != '_') && 1694 (c != ':')) { 1695 return(NULL); 1696 } 1697 1698 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1699 (c == '.') || (c == '-') || 1700 (c == '_') || (c == ':') || 1701 (IS_COMBINING(c)) || 1702 (IS_EXTENDER(c))) { 1703 COPY_BUF(l,buf,len,c); 1704 cur += l; 1705 c = CUR_SCHAR(cur, l); 1706 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1707 /* 1708 * Okay someone managed to make a huge name, so he's ready to pay 1709 * for the processing speed. 1710 */ 1711 xmlChar *buffer; 1712 int max = len * 2; 1713 1714 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1715 if (buffer == NULL) { 1716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1717 ctxt->sax->error(ctxt->userData, 1718 "xmlParseStringName: out of memory\n"); 1719 return(NULL); 1720 } 1721 memcpy(buffer, buf, len); 1722 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1723 (c == '.') || (c == '-') || 1724 (c == '_') || (c == ':') || 1725 (IS_COMBINING(c)) || 1726 (IS_EXTENDER(c))) { 1727 if (len + 10 > max) { 1728 max *= 2; 1729 buffer = (xmlChar *) xmlRealloc(buffer, 1730 max * sizeof(xmlChar)); 1731 if (buffer == NULL) { 1732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1733 ctxt->sax->error(ctxt->userData, 1734 "xmlParseStringName: out of memory\n"); 1735 return(NULL); 1736 } 1737 } 1738 COPY_BUF(l,buffer,len,c); 1739 cur += l; 1740 c = CUR_SCHAR(cur, l); 1741 } 1742 buffer[len] = 0; 1743 *str = cur; 1744 return(buffer); 1745 } 1746 } 1747 *str = cur; 1748 return(xmlStrndup(buf, len)); 1749} 1750 1751/** 1752 * xmlParseNmtoken: 1753 * @ctxt: an XML parser context 1754 * 1755 * parse an XML Nmtoken. 1756 * 1757 * [7] Nmtoken ::= (NameChar)+ 1758 * 1759 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1760 * 1761 * Returns the Nmtoken parsed or NULL 1762 */ 1763 1764xmlChar * 1765xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1766 xmlChar buf[XML_MAX_NAMELEN + 5]; 1767 int len = 0, l; 1768 int c; 1769 int count = 0; 1770 1771 GROW; 1772 c = CUR_CHAR(l); 1773 1774 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1775 (c == '.') || (c == '-') || 1776 (c == '_') || (c == ':') || 1777 (IS_COMBINING(c)) || 1778 (IS_EXTENDER(c))) { 1779 if (count++ > 100) { 1780 count = 0; 1781 GROW; 1782 } 1783 COPY_BUF(l,buf,len,c); 1784 NEXTL(l); 1785 c = CUR_CHAR(l); 1786 if (len >= XML_MAX_NAMELEN) { 1787 /* 1788 * Okay someone managed to make a huge token, so he's ready to pay 1789 * for the processing speed. 1790 */ 1791 xmlChar *buffer; 1792 int max = len * 2; 1793 1794 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1795 if (buffer == NULL) { 1796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1797 ctxt->sax->error(ctxt->userData, 1798 "xmlParseNmtoken: out of memory\n"); 1799 return(NULL); 1800 } 1801 memcpy(buffer, buf, len); 1802 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1803 (c == '.') || (c == '-') || 1804 (c == '_') || (c == ':') || 1805 (IS_COMBINING(c)) || 1806 (IS_EXTENDER(c))) { 1807 if (count++ > 100) { 1808 count = 0; 1809 GROW; 1810 } 1811 if (len + 10 > max) { 1812 max *= 2; 1813 buffer = (xmlChar *) xmlRealloc(buffer, 1814 max * sizeof(xmlChar)); 1815 if (buffer == NULL) { 1816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1817 ctxt->sax->error(ctxt->userData, 1818 "xmlParseName: out of memory\n"); 1819 return(NULL); 1820 } 1821 } 1822 COPY_BUF(l,buffer,len,c); 1823 NEXTL(l); 1824 c = CUR_CHAR(l); 1825 } 1826 buffer[len] = 0; 1827 return(buffer); 1828 } 1829 } 1830 if (len == 0) 1831 return(NULL); 1832 return(xmlStrndup(buf, len)); 1833} 1834 1835/** 1836 * xmlParseEntityValue: 1837 * @ctxt: an XML parser context 1838 * @orig: if non-NULL store a copy of the original entity value 1839 * 1840 * parse a value for ENTITY declarations 1841 * 1842 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 1843 * "'" ([^%&'] | PEReference | Reference)* "'" 1844 * 1845 * Returns the EntityValue parsed with reference substitued or NULL 1846 */ 1847 1848xmlChar * 1849xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 1850 xmlChar *buf = NULL; 1851 int len = 0; 1852 int size = XML_PARSER_BUFFER_SIZE; 1853 int c, l; 1854 xmlChar stop; 1855 xmlChar *ret = NULL; 1856 const xmlChar *cur = NULL; 1857 xmlParserInputPtr input; 1858 1859 if (RAW == '"') stop = '"'; 1860 else if (RAW == '\'') stop = '\''; 1861 else { 1862 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1864 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 1865 ctxt->wellFormed = 0; 1866 ctxt->disableSAX = 1; 1867 return(NULL); 1868 } 1869 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 1870 if (buf == NULL) { 1871 xmlGenericError(xmlGenericErrorContext, 1872 "malloc of %d byte failed\n", size); 1873 return(NULL); 1874 } 1875 1876 /* 1877 * The content of the entity definition is copied in a buffer. 1878 */ 1879 1880 ctxt->instate = XML_PARSER_ENTITY_VALUE; 1881 input = ctxt->input; 1882 GROW; 1883 NEXT; 1884 c = CUR_CHAR(l); 1885 /* 1886 * NOTE: 4.4.5 Included in Literal 1887 * When a parameter entity reference appears in a literal entity 1888 * value, ... a single or double quote character in the replacement 1889 * text is always treated as a normal data character and will not 1890 * terminate the literal. 1891 * In practice it means we stop the loop only when back at parsing 1892 * the initial entity and the quote is found 1893 */ 1894 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 1895 (ctxt->input != input))) { 1896 if (len + 5 >= size) { 1897 size *= 2; 1898 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 1899 if (buf == NULL) { 1900 xmlGenericError(xmlGenericErrorContext, 1901 "realloc of %d byte failed\n", size); 1902 return(NULL); 1903 } 1904 } 1905 COPY_BUF(l,buf,len,c); 1906 NEXTL(l); 1907 /* 1908 * Pop-up of finished entities. 1909 */ 1910 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 1911 xmlPopInput(ctxt); 1912 1913 GROW; 1914 c = CUR_CHAR(l); 1915 if (c == 0) { 1916 GROW; 1917 c = CUR_CHAR(l); 1918 } 1919 } 1920 buf[len] = 0; 1921 1922 /* 1923 * Raise problem w.r.t. '&' and '%' being used in non-entities 1924 * reference constructs. Note Charref will be handled in 1925 * xmlStringDecodeEntities() 1926 */ 1927 cur = buf; 1928 while (*cur != 0) { /* non input consuming */ 1929 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 1930 xmlChar *name; 1931 xmlChar tmp = *cur; 1932 1933 cur++; 1934 name = xmlParseStringName(ctxt, &cur); 1935 if ((name == NULL) || (*cur != ';')) { 1936 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 1937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1938 ctxt->sax->error(ctxt->userData, 1939 "EntityValue: '%c' forbidden except for entities references\n", 1940 tmp); 1941 ctxt->wellFormed = 0; 1942 ctxt->disableSAX = 1; 1943 } 1944 if ((ctxt->inSubset == 1) && (tmp == '%')) { 1945 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 1946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1947 ctxt->sax->error(ctxt->userData, 1948 "EntityValue: PEReferences forbidden in internal subset\n", 1949 tmp); 1950 ctxt->wellFormed = 0; 1951 ctxt->disableSAX = 1; 1952 } 1953 if (name != NULL) 1954 xmlFree(name); 1955 } 1956 cur++; 1957 } 1958 1959 /* 1960 * Then PEReference entities are substituted. 1961 */ 1962 if (c != stop) { 1963 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 1964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1965 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 1966 ctxt->wellFormed = 0; 1967 ctxt->disableSAX = 1; 1968 xmlFree(buf); 1969 } else { 1970 NEXT; 1971 /* 1972 * NOTE: 4.4.7 Bypassed 1973 * When a general entity reference appears in the EntityValue in 1974 * an entity declaration, it is bypassed and left as is. 1975 * so XML_SUBSTITUTE_REF is not set here. 1976 */ 1977 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 1978 0, 0, 0); 1979 if (orig != NULL) 1980 *orig = buf; 1981 else 1982 xmlFree(buf); 1983 } 1984 1985 return(ret); 1986} 1987 1988/** 1989 * xmlParseAttValue: 1990 * @ctxt: an XML parser context 1991 * 1992 * parse a value for an attribute 1993 * Note: the parser won't do substitution of entities here, this 1994 * will be handled later in xmlStringGetNodeList 1995 * 1996 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 1997 * "'" ([^<&'] | Reference)* "'" 1998 * 1999 * 3.3.3 Attribute-Value Normalization: 2000 * Before the value of an attribute is passed to the application or 2001 * checked for validity, the XML processor must normalize it as follows: 2002 * - a character reference is processed by appending the referenced 2003 * character to the attribute value 2004 * - an entity reference is processed by recursively processing the 2005 * replacement text of the entity 2006 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2007 * appending #x20 to the normalized value, except that only a single 2008 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2009 * parsed entity or the literal entity value of an internal parsed entity 2010 * - other characters are processed by appending them to the normalized value 2011 * If the declared value is not CDATA, then the XML processor must further 2012 * process the normalized attribute value by discarding any leading and 2013 * trailing space (#x20) characters, and by replacing sequences of space 2014 * (#x20) characters by a single space (#x20) character. 2015 * All attributes for which no declaration has been read should be treated 2016 * by a non-validating parser as if declared CDATA. 2017 * 2018 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2019 */ 2020 2021xmlChar * 2022xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2023 xmlChar limit = 0; 2024 xmlChar *buf = NULL; 2025 int len = 0; 2026 int buf_size = 0; 2027 int c, l; 2028 xmlChar *current = NULL; 2029 xmlEntityPtr ent; 2030 2031 2032 SHRINK; 2033 if (NXT(0) == '"') { 2034 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2035 limit = '"'; 2036 NEXT; 2037 } else if (NXT(0) == '\'') { 2038 limit = '\''; 2039 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2040 NEXT; 2041 } else { 2042 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2044 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2045 ctxt->wellFormed = 0; 2046 ctxt->disableSAX = 1; 2047 return(NULL); 2048 } 2049 2050 /* 2051 * allocate a translation buffer. 2052 */ 2053 buf_size = XML_PARSER_BUFFER_SIZE; 2054 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2055 if (buf == NULL) { 2056 perror("xmlParseAttValue: malloc failed"); 2057 return(NULL); 2058 } 2059 2060 /* 2061 * Ok loop until we reach one of the ending char or a size limit. 2062 */ 2063 c = CUR_CHAR(l); 2064 while (((NXT(0) != limit) && /* checked */ 2065 (c != '<')) || (ctxt->token != 0)) { 2066 if (c == 0) break; 2067 if (ctxt->token == '&') { 2068 /* 2069 * The reparsing will be done in xmlStringGetNodeList() 2070 * called by the attribute() function in SAX.c 2071 */ 2072 static xmlChar buffer[6] = "&"; 2073 2074 if (len > buf_size - 10) { 2075 growBuffer(buf); 2076 } 2077 current = &buffer[0]; 2078 while (*current != 0) { /* non input consuming */ 2079 buf[len++] = *current++; 2080 } 2081 ctxt->token = 0; 2082 } else if ((c == '&') && (NXT(1) == '#')) { 2083 int val = xmlParseCharRef(ctxt); 2084 if (val == '&') { 2085 /* 2086 * The reparsing will be done in xmlStringGetNodeList() 2087 * called by the attribute() function in SAX.c 2088 */ 2089 static xmlChar buffer[6] = "&"; 2090 2091 if (len > buf_size - 10) { 2092 growBuffer(buf); 2093 } 2094 current = &buffer[0]; 2095 while (*current != 0) { /* non input consuming */ 2096 buf[len++] = *current++; 2097 } 2098 } else { 2099 len += xmlCopyChar(0, &buf[len], val); 2100 } 2101 } else if (c == '&') { 2102 ent = xmlParseEntityRef(ctxt); 2103 if ((ent != NULL) && 2104 (ctxt->replaceEntities != 0)) { 2105 xmlChar *rep; 2106 2107 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2108 rep = xmlStringDecodeEntities(ctxt, ent->content, 2109 XML_SUBSTITUTE_REF, 0, 0, 0); 2110 if (rep != NULL) { 2111 current = rep; 2112 while (*current != 0) { /* non input consuming */ 2113 buf[len++] = *current++; 2114 if (len > buf_size - 10) { 2115 growBuffer(buf); 2116 } 2117 } 2118 xmlFree(rep); 2119 } 2120 } else { 2121 if (ent->content != NULL) 2122 buf[len++] = ent->content[0]; 2123 } 2124 } else if (ent != NULL) { 2125 int i = xmlStrlen(ent->name); 2126 const xmlChar *cur = ent->name; 2127 2128 /* 2129 * This may look absurd but is needed to detect 2130 * entities problems 2131 */ 2132 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2133 (ent->content != NULL)) { 2134 xmlChar *rep; 2135 rep = xmlStringDecodeEntities(ctxt, ent->content, 2136 XML_SUBSTITUTE_REF, 0, 0, 0); 2137 if (rep != NULL) 2138 xmlFree(rep); 2139 } 2140 2141 /* 2142 * Just output the reference 2143 */ 2144 buf[len++] = '&'; 2145 if (len > buf_size - i - 10) { 2146 growBuffer(buf); 2147 } 2148 for (;i > 0;i--) 2149 buf[len++] = *cur++; 2150 buf[len++] = ';'; 2151 } 2152 } else { 2153 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2154 COPY_BUF(l,buf,len,0x20); 2155 if (len > buf_size - 10) { 2156 growBuffer(buf); 2157 } 2158 } else { 2159 COPY_BUF(l,buf,len,c); 2160 if (len > buf_size - 10) { 2161 growBuffer(buf); 2162 } 2163 } 2164 NEXTL(l); 2165 } 2166 GROW; 2167 c = CUR_CHAR(l); 2168 } 2169 buf[len++] = 0; 2170 if (RAW == '<') { 2171 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2173 ctxt->sax->error(ctxt->userData, 2174 "Unescaped '<' not allowed in attributes values\n"); 2175 ctxt->wellFormed = 0; 2176 ctxt->disableSAX = 1; 2177 } else if (RAW != limit) { 2178 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2180 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2181 ctxt->wellFormed = 0; 2182 ctxt->disableSAX = 1; 2183 } else 2184 NEXT; 2185 return(buf); 2186} 2187 2188/** 2189 * xmlParseSystemLiteral: 2190 * @ctxt: an XML parser context 2191 * 2192 * parse an XML Literal 2193 * 2194 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2195 * 2196 * Returns the SystemLiteral parsed or NULL 2197 */ 2198 2199xmlChar * 2200xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2201 xmlChar *buf = NULL; 2202 int len = 0; 2203 int size = XML_PARSER_BUFFER_SIZE; 2204 int cur, l; 2205 xmlChar stop; 2206 int state = ctxt->instate; 2207 int count = 0; 2208 2209 SHRINK; 2210 if (RAW == '"') { 2211 NEXT; 2212 stop = '"'; 2213 } else if (RAW == '\'') { 2214 NEXT; 2215 stop = '\''; 2216 } else { 2217 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2219 ctxt->sax->error(ctxt->userData, 2220 "SystemLiteral \" or ' expected\n"); 2221 ctxt->wellFormed = 0; 2222 ctxt->disableSAX = 1; 2223 return(NULL); 2224 } 2225 2226 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2227 if (buf == NULL) { 2228 xmlGenericError(xmlGenericErrorContext, 2229 "malloc of %d byte failed\n", size); 2230 return(NULL); 2231 } 2232 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2233 cur = CUR_CHAR(l); 2234 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2235 if (len + 5 >= size) { 2236 size *= 2; 2237 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2238 if (buf == NULL) { 2239 xmlGenericError(xmlGenericErrorContext, 2240 "realloc of %d byte failed\n", size); 2241 ctxt->instate = (xmlParserInputState) state; 2242 return(NULL); 2243 } 2244 } 2245 count++; 2246 if (count > 50) { 2247 GROW; 2248 count = 0; 2249 } 2250 COPY_BUF(l,buf,len,cur); 2251 NEXTL(l); 2252 cur = CUR_CHAR(l); 2253 if (cur == 0) { 2254 GROW; 2255 SHRINK; 2256 cur = CUR_CHAR(l); 2257 } 2258 } 2259 buf[len] = 0; 2260 ctxt->instate = (xmlParserInputState) state; 2261 if (!IS_CHAR(cur)) { 2262 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2264 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2265 ctxt->wellFormed = 0; 2266 ctxt->disableSAX = 1; 2267 } else { 2268 NEXT; 2269 } 2270 return(buf); 2271} 2272 2273/** 2274 * xmlParsePubidLiteral: 2275 * @ctxt: an XML parser context 2276 * 2277 * parse an XML public literal 2278 * 2279 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2280 * 2281 * Returns the PubidLiteral parsed or NULL. 2282 */ 2283 2284xmlChar * 2285xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2286 xmlChar *buf = NULL; 2287 int len = 0; 2288 int size = XML_PARSER_BUFFER_SIZE; 2289 xmlChar cur; 2290 xmlChar stop; 2291 int count = 0; 2292 2293 SHRINK; 2294 if (RAW == '"') { 2295 NEXT; 2296 stop = '"'; 2297 } else if (RAW == '\'') { 2298 NEXT; 2299 stop = '\''; 2300 } else { 2301 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2303 ctxt->sax->error(ctxt->userData, 2304 "SystemLiteral \" or ' expected\n"); 2305 ctxt->wellFormed = 0; 2306 ctxt->disableSAX = 1; 2307 return(NULL); 2308 } 2309 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2310 if (buf == NULL) { 2311 xmlGenericError(xmlGenericErrorContext, 2312 "malloc of %d byte failed\n", size); 2313 return(NULL); 2314 } 2315 cur = CUR; 2316 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2317 if (len + 1 >= size) { 2318 size *= 2; 2319 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2320 if (buf == NULL) { 2321 xmlGenericError(xmlGenericErrorContext, 2322 "realloc of %d byte failed\n", size); 2323 return(NULL); 2324 } 2325 } 2326 buf[len++] = cur; 2327 count++; 2328 if (count > 50) { 2329 GROW; 2330 count = 0; 2331 } 2332 NEXT; 2333 cur = CUR; 2334 if (cur == 0) { 2335 GROW; 2336 SHRINK; 2337 cur = CUR; 2338 } 2339 } 2340 buf[len] = 0; 2341 if (cur != stop) { 2342 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2344 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2345 ctxt->wellFormed = 0; 2346 ctxt->disableSAX = 1; 2347 } else { 2348 NEXT; 2349 } 2350 return(buf); 2351} 2352 2353/** 2354 * xmlParseCharData: 2355 * @ctxt: an XML parser context 2356 * @cdata: int indicating whether we are within a CDATA section 2357 * 2358 * parse a CharData section. 2359 * if we are within a CDATA section ']]>' marks an end of section. 2360 * 2361 * The right angle bracket (>) may be represented using the string ">", 2362 * and must, for compatibility, be escaped using ">" or a character 2363 * reference when it appears in the string "]]>" in content, when that 2364 * string is not marking the end of a CDATA section. 2365 * 2366 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2367 */ 2368 2369void 2370xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2371 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2372 int nbchar = 0; 2373 int cur, l; 2374 int count = 0; 2375 2376 SHRINK; 2377 GROW; 2378 cur = CUR_CHAR(l); 2379 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2380 ((cur != '&') || (ctxt->token == '&')) && 2381 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2382 if ((cur == ']') && (NXT(1) == ']') && 2383 (NXT(2) == '>')) { 2384 if (cdata) break; 2385 else { 2386 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2388 ctxt->sax->error(ctxt->userData, 2389 "Sequence ']]>' not allowed in content\n"); 2390 /* Should this be relaxed ??? I see a "must here */ 2391 ctxt->wellFormed = 0; 2392 ctxt->disableSAX = 1; 2393 } 2394 } 2395 COPY_BUF(l,buf,nbchar,cur); 2396 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2397 /* 2398 * Ok the segment is to be consumed as chars. 2399 */ 2400 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2401 if (areBlanks(ctxt, buf, nbchar)) { 2402 if (ctxt->sax->ignorableWhitespace != NULL) 2403 ctxt->sax->ignorableWhitespace(ctxt->userData, 2404 buf, nbchar); 2405 } else { 2406 if (ctxt->sax->characters != NULL) 2407 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2408 } 2409 } 2410 nbchar = 0; 2411 } 2412 count++; 2413 if (count > 50) { 2414 GROW; 2415 count = 0; 2416 } 2417 NEXTL(l); 2418 cur = CUR_CHAR(l); 2419 } 2420 if (nbchar != 0) { 2421 /* 2422 * Ok the segment is to be consumed as chars. 2423 */ 2424 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2425 if (areBlanks(ctxt, buf, nbchar)) { 2426 if (ctxt->sax->ignorableWhitespace != NULL) 2427 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2428 } else { 2429 if (ctxt->sax->characters != NULL) 2430 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2431 } 2432 } 2433 } 2434} 2435 2436/** 2437 * xmlParseExternalID: 2438 * @ctxt: an XML parser context 2439 * @publicID: a xmlChar** receiving PubidLiteral 2440 * @strict: indicate whether we should restrict parsing to only 2441 * production [75], see NOTE below 2442 * 2443 * Parse an External ID or a Public ID 2444 * 2445 * NOTE: Productions [75] and [83] interract badly since [75] can generate 2446 * 'PUBLIC' S PubidLiteral S SystemLiteral 2447 * 2448 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2449 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2450 * 2451 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2452 * 2453 * Returns the function returns SystemLiteral and in the second 2454 * case publicID receives PubidLiteral, is strict is off 2455 * it is possible to return NULL and have publicID set. 2456 */ 2457 2458xmlChar * 2459xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2460 xmlChar *URI = NULL; 2461 2462 SHRINK; 2463 if ((RAW == 'S') && (NXT(1) == 'Y') && 2464 (NXT(2) == 'S') && (NXT(3) == 'T') && 2465 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2466 SKIP(6); 2467 if (!IS_BLANK(CUR)) { 2468 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2470 ctxt->sax->error(ctxt->userData, 2471 "Space required after 'SYSTEM'\n"); 2472 ctxt->wellFormed = 0; 2473 ctxt->disableSAX = 1; 2474 } 2475 SKIP_BLANKS; 2476 URI = xmlParseSystemLiteral(ctxt); 2477 if (URI == NULL) { 2478 ctxt->errNo = XML_ERR_URI_REQUIRED; 2479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2480 ctxt->sax->error(ctxt->userData, 2481 "xmlParseExternalID: SYSTEM, no URI\n"); 2482 ctxt->wellFormed = 0; 2483 ctxt->disableSAX = 1; 2484 } 2485 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2486 (NXT(2) == 'B') && (NXT(3) == 'L') && 2487 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2488 SKIP(6); 2489 if (!IS_BLANK(CUR)) { 2490 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2492 ctxt->sax->error(ctxt->userData, 2493 "Space required after 'PUBLIC'\n"); 2494 ctxt->wellFormed = 0; 2495 ctxt->disableSAX = 1; 2496 } 2497 SKIP_BLANKS; 2498 *publicID = xmlParsePubidLiteral(ctxt); 2499 if (*publicID == NULL) { 2500 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2502 ctxt->sax->error(ctxt->userData, 2503 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2504 ctxt->wellFormed = 0; 2505 ctxt->disableSAX = 1; 2506 } 2507 if (strict) { 2508 /* 2509 * We don't handle [83] so "S SystemLiteral" is required. 2510 */ 2511 if (!IS_BLANK(CUR)) { 2512 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2514 ctxt->sax->error(ctxt->userData, 2515 "Space required after the Public Identifier\n"); 2516 ctxt->wellFormed = 0; 2517 ctxt->disableSAX = 1; 2518 } 2519 } else { 2520 /* 2521 * We handle [83] so we return immediately, if 2522 * "S SystemLiteral" is not detected. From a purely parsing 2523 * point of view that's a nice mess. 2524 */ 2525 const xmlChar *ptr; 2526 GROW; 2527 2528 ptr = CUR_PTR; 2529 if (!IS_BLANK(*ptr)) return(NULL); 2530 2531 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2532 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2533 } 2534 SKIP_BLANKS; 2535 URI = xmlParseSystemLiteral(ctxt); 2536 if (URI == NULL) { 2537 ctxt->errNo = XML_ERR_URI_REQUIRED; 2538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2539 ctxt->sax->error(ctxt->userData, 2540 "xmlParseExternalID: PUBLIC, no URI\n"); 2541 ctxt->wellFormed = 0; 2542 ctxt->disableSAX = 1; 2543 } 2544 } 2545 return(URI); 2546} 2547 2548/** 2549 * xmlParseComment: 2550 * @ctxt: an XML parser context 2551 * 2552 * Skip an XML (SGML) comment <!-- .... --> 2553 * The spec says that "For compatibility, the string "--" (double-hyphen) 2554 * must not occur within comments. " 2555 * 2556 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2557 */ 2558void 2559xmlParseComment(xmlParserCtxtPtr ctxt) { 2560 xmlChar *buf = NULL; 2561 int len; 2562 int size = XML_PARSER_BUFFER_SIZE; 2563 int q, ql; 2564 int r, rl; 2565 int cur, l; 2566 xmlParserInputState state; 2567 xmlParserInputPtr input = ctxt->input; 2568 int count = 0; 2569 2570 /* 2571 * Check that there is a comment right here. 2572 */ 2573 if ((RAW != '<') || (NXT(1) != '!') || 2574 (NXT(2) != '-') || (NXT(3) != '-')) return; 2575 2576 state = ctxt->instate; 2577 ctxt->instate = XML_PARSER_COMMENT; 2578 SHRINK; 2579 SKIP(4); 2580 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2581 if (buf == NULL) { 2582 xmlGenericError(xmlGenericErrorContext, 2583 "malloc of %d byte failed\n", size); 2584 ctxt->instate = state; 2585 return; 2586 } 2587 q = CUR_CHAR(ql); 2588 NEXTL(ql); 2589 r = CUR_CHAR(rl); 2590 NEXTL(rl); 2591 cur = CUR_CHAR(l); 2592 len = 0; 2593 while (IS_CHAR(cur) && /* checked */ 2594 ((cur != '>') || 2595 (r != '-') || (q != '-'))) { 2596 if ((r == '-') && (q == '-') && (len > 1)) { 2597 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2599 ctxt->sax->error(ctxt->userData, 2600 "Comment must not contain '--' (double-hyphen)`\n"); 2601 ctxt->wellFormed = 0; 2602 ctxt->disableSAX = 1; 2603 } 2604 if (len + 5 >= size) { 2605 size *= 2; 2606 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2607 if (buf == NULL) { 2608 xmlGenericError(xmlGenericErrorContext, 2609 "realloc of %d byte failed\n", size); 2610 ctxt->instate = state; 2611 return; 2612 } 2613 } 2614 COPY_BUF(ql,buf,len,q); 2615 q = r; 2616 ql = rl; 2617 r = cur; 2618 rl = l; 2619 2620 count++; 2621 if (count > 50) { 2622 GROW; 2623 count = 0; 2624 } 2625 NEXTL(l); 2626 cur = CUR_CHAR(l); 2627 if (cur == 0) { 2628 SHRINK; 2629 GROW; 2630 cur = CUR_CHAR(l); 2631 } 2632 } 2633 buf[len] = 0; 2634 if (!IS_CHAR(cur)) { 2635 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2637 ctxt->sax->error(ctxt->userData, 2638 "Comment not terminated \n<!--%.50s\n", buf); 2639 ctxt->wellFormed = 0; 2640 ctxt->disableSAX = 1; 2641 xmlFree(buf); 2642 } else { 2643 if (input != ctxt->input) { 2644 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2646 ctxt->sax->error(ctxt->userData, 2647"Comment doesn't start and stop in the same entity\n"); 2648 ctxt->wellFormed = 0; 2649 ctxt->disableSAX = 1; 2650 } 2651 NEXT; 2652 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2653 (!ctxt->disableSAX)) 2654 ctxt->sax->comment(ctxt->userData, buf); 2655 xmlFree(buf); 2656 } 2657 ctxt->instate = state; 2658} 2659 2660/** 2661 * xmlParsePITarget: 2662 * @ctxt: an XML parser context 2663 * 2664 * parse the name of a PI 2665 * 2666 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2667 * 2668 * Returns the PITarget name or NULL 2669 */ 2670 2671xmlChar * 2672xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2673 xmlChar *name; 2674 2675 name = xmlParseName(ctxt); 2676 if ((name != NULL) && 2677 ((name[0] == 'x') || (name[0] == 'X')) && 2678 ((name[1] == 'm') || (name[1] == 'M')) && 2679 ((name[2] == 'l') || (name[2] == 'L'))) { 2680 int i; 2681 if ((name[0] == 'x') && (name[1] == 'm') && 2682 (name[2] == 'l') && (name[3] == 0)) { 2683 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2685 ctxt->sax->error(ctxt->userData, 2686 "XML declaration allowed only at the start of the document\n"); 2687 ctxt->wellFormed = 0; 2688 ctxt->disableSAX = 1; 2689 return(name); 2690 } else if (name[3] == 0) { 2691 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2693 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2694 ctxt->wellFormed = 0; 2695 ctxt->disableSAX = 1; 2696 return(name); 2697 } 2698 for (i = 0;;i++) { 2699 if (xmlW3CPIs[i] == NULL) break; 2700 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2701 return(name); 2702 } 2703 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2704 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2705 ctxt->sax->warning(ctxt->userData, 2706 "xmlParsePItarget: invalid name prefix 'xml'\n"); 2707 } 2708 } 2709 return(name); 2710} 2711 2712/** 2713 * xmlParsePI: 2714 * @ctxt: an XML parser context 2715 * 2716 * parse an XML Processing Instruction. 2717 * 2718 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2719 * 2720 * The processing is transfered to SAX once parsed. 2721 */ 2722 2723void 2724xmlParsePI(xmlParserCtxtPtr ctxt) { 2725 xmlChar *buf = NULL; 2726 int len = 0; 2727 int size = XML_PARSER_BUFFER_SIZE; 2728 int cur, l; 2729 xmlChar *target; 2730 xmlParserInputState state; 2731 int count = 0; 2732 2733 if ((RAW == '<') && (NXT(1) == '?')) { 2734 xmlParserInputPtr input = ctxt->input; 2735 state = ctxt->instate; 2736 ctxt->instate = XML_PARSER_PI; 2737 /* 2738 * this is a Processing Instruction. 2739 */ 2740 SKIP(2); 2741 SHRINK; 2742 2743 /* 2744 * Parse the target name and check for special support like 2745 * namespace. 2746 */ 2747 target = xmlParsePITarget(ctxt); 2748 if (target != NULL) { 2749 if ((RAW == '?') && (NXT(1) == '>')) { 2750 if (input != ctxt->input) { 2751 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2753 ctxt->sax->error(ctxt->userData, 2754 "PI declaration doesn't start and stop in the same entity\n"); 2755 ctxt->wellFormed = 0; 2756 ctxt->disableSAX = 1; 2757 } 2758 SKIP(2); 2759 2760 /* 2761 * SAX: PI detected. 2762 */ 2763 if ((ctxt->sax) && (!ctxt->disableSAX) && 2764 (ctxt->sax->processingInstruction != NULL)) 2765 ctxt->sax->processingInstruction(ctxt->userData, 2766 target, NULL); 2767 ctxt->instate = state; 2768 xmlFree(target); 2769 return; 2770 } 2771 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2772 if (buf == NULL) { 2773 xmlGenericError(xmlGenericErrorContext, 2774 "malloc of %d byte failed\n", size); 2775 ctxt->instate = state; 2776 return; 2777 } 2778 cur = CUR; 2779 if (!IS_BLANK(cur)) { 2780 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2782 ctxt->sax->error(ctxt->userData, 2783 "xmlParsePI: PI %s space expected\n", target); 2784 ctxt->wellFormed = 0; 2785 ctxt->disableSAX = 1; 2786 } 2787 SKIP_BLANKS; 2788 cur = CUR_CHAR(l); 2789 while (IS_CHAR(cur) && /* checked */ 2790 ((cur != '?') || (NXT(1) != '>'))) { 2791 if (len + 5 >= size) { 2792 size *= 2; 2793 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2794 if (buf == NULL) { 2795 xmlGenericError(xmlGenericErrorContext, 2796 "realloc of %d byte failed\n", size); 2797 ctxt->instate = state; 2798 return; 2799 } 2800 } 2801 count++; 2802 if (count > 50) { 2803 GROW; 2804 count = 0; 2805 } 2806 COPY_BUF(l,buf,len,cur); 2807 NEXTL(l); 2808 cur = CUR_CHAR(l); 2809 if (cur == 0) { 2810 SHRINK; 2811 GROW; 2812 cur = CUR_CHAR(l); 2813 } 2814 } 2815 buf[len] = 0; 2816 if (cur != '?') { 2817 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 2818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2819 ctxt->sax->error(ctxt->userData, 2820 "xmlParsePI: PI %s never end ...\n", target); 2821 ctxt->wellFormed = 0; 2822 ctxt->disableSAX = 1; 2823 } else { 2824 if (input != ctxt->input) { 2825 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2827 ctxt->sax->error(ctxt->userData, 2828 "PI declaration doesn't start and stop in the same entity\n"); 2829 ctxt->wellFormed = 0; 2830 ctxt->disableSAX = 1; 2831 } 2832 SKIP(2); 2833 2834 /* 2835 * SAX: PI detected. 2836 */ 2837 if ((ctxt->sax) && (!ctxt->disableSAX) && 2838 (ctxt->sax->processingInstruction != NULL)) 2839 ctxt->sax->processingInstruction(ctxt->userData, 2840 target, buf); 2841 } 2842 xmlFree(buf); 2843 xmlFree(target); 2844 } else { 2845 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 2846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2847 ctxt->sax->error(ctxt->userData, 2848 "xmlParsePI : no target name\n"); 2849 ctxt->wellFormed = 0; 2850 ctxt->disableSAX = 1; 2851 } 2852 ctxt->instate = state; 2853 } 2854} 2855 2856/** 2857 * xmlParseNotationDecl: 2858 * @ctxt: an XML parser context 2859 * 2860 * parse a notation declaration 2861 * 2862 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 2863 * 2864 * Hence there is actually 3 choices: 2865 * 'PUBLIC' S PubidLiteral 2866 * 'PUBLIC' S PubidLiteral S SystemLiteral 2867 * and 'SYSTEM' S SystemLiteral 2868 * 2869 * See the NOTE on xmlParseExternalID(). 2870 */ 2871 2872void 2873xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 2874 xmlChar *name; 2875 xmlChar *Pubid; 2876 xmlChar *Systemid; 2877 2878 if ((RAW == '<') && (NXT(1) == '!') && 2879 (NXT(2) == 'N') && (NXT(3) == 'O') && 2880 (NXT(4) == 'T') && (NXT(5) == 'A') && 2881 (NXT(6) == 'T') && (NXT(7) == 'I') && 2882 (NXT(8) == 'O') && (NXT(9) == 'N')) { 2883 xmlParserInputPtr input = ctxt->input; 2884 SHRINK; 2885 SKIP(10); 2886 if (!IS_BLANK(CUR)) { 2887 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2889 ctxt->sax->error(ctxt->userData, 2890 "Space required after '<!NOTATION'\n"); 2891 ctxt->wellFormed = 0; 2892 ctxt->disableSAX = 1; 2893 return; 2894 } 2895 SKIP_BLANKS; 2896 2897 name = xmlParseName(ctxt); 2898 if (name == NULL) { 2899 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2901 ctxt->sax->error(ctxt->userData, 2902 "NOTATION: Name expected here\n"); 2903 ctxt->wellFormed = 0; 2904 ctxt->disableSAX = 1; 2905 return; 2906 } 2907 if (!IS_BLANK(CUR)) { 2908 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2910 ctxt->sax->error(ctxt->userData, 2911 "Space required after the NOTATION name'\n"); 2912 ctxt->wellFormed = 0; 2913 ctxt->disableSAX = 1; 2914 return; 2915 } 2916 SKIP_BLANKS; 2917 2918 /* 2919 * Parse the IDs. 2920 */ 2921 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 2922 SKIP_BLANKS; 2923 2924 if (RAW == '>') { 2925 if (input != ctxt->input) { 2926 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2928 ctxt->sax->error(ctxt->userData, 2929"Notation declaration doesn't start and stop in the same entity\n"); 2930 ctxt->wellFormed = 0; 2931 ctxt->disableSAX = 1; 2932 } 2933 NEXT; 2934 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 2935 (ctxt->sax->notationDecl != NULL)) 2936 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 2937 } else { 2938 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 2939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2940 ctxt->sax->error(ctxt->userData, 2941 "'>' required to close NOTATION declaration\n"); 2942 ctxt->wellFormed = 0; 2943 ctxt->disableSAX = 1; 2944 } 2945 xmlFree(name); 2946 if (Systemid != NULL) xmlFree(Systemid); 2947 if (Pubid != NULL) xmlFree(Pubid); 2948 } 2949} 2950 2951/** 2952 * xmlParseEntityDecl: 2953 * @ctxt: an XML parser context 2954 * 2955 * parse <!ENTITY declarations 2956 * 2957 * [70] EntityDecl ::= GEDecl | PEDecl 2958 * 2959 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 2960 * 2961 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 2962 * 2963 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 2964 * 2965 * [74] PEDef ::= EntityValue | ExternalID 2966 * 2967 * [76] NDataDecl ::= S 'NDATA' S Name 2968 * 2969 * [ VC: Notation Declared ] 2970 * The Name must match the declared name of a notation. 2971 */ 2972 2973void 2974xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 2975 xmlChar *name = NULL; 2976 xmlChar *value = NULL; 2977 xmlChar *URI = NULL, *literal = NULL; 2978 xmlChar *ndata = NULL; 2979 int isParameter = 0; 2980 xmlChar *orig = NULL; 2981 2982 GROW; 2983 if ((RAW == '<') && (NXT(1) == '!') && 2984 (NXT(2) == 'E') && (NXT(3) == 'N') && 2985 (NXT(4) == 'T') && (NXT(5) == 'I') && 2986 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 2987 xmlParserInputPtr input = ctxt->input; 2988 ctxt->instate = XML_PARSER_ENTITY_DECL; 2989 SHRINK; 2990 SKIP(8); 2991 if (!IS_BLANK(CUR)) { 2992 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2994 ctxt->sax->error(ctxt->userData, 2995 "Space required after '<!ENTITY'\n"); 2996 ctxt->wellFormed = 0; 2997 ctxt->disableSAX = 1; 2998 } 2999 SKIP_BLANKS; 3000 3001 if (RAW == '%') { 3002 NEXT; 3003 if (!IS_BLANK(CUR)) { 3004 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3006 ctxt->sax->error(ctxt->userData, 3007 "Space required after '%'\n"); 3008 ctxt->wellFormed = 0; 3009 ctxt->disableSAX = 1; 3010 } 3011 SKIP_BLANKS; 3012 isParameter = 1; 3013 } 3014 3015 name = xmlParseName(ctxt); 3016 if (name == NULL) { 3017 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3019 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3020 ctxt->wellFormed = 0; 3021 ctxt->disableSAX = 1; 3022 return; 3023 } 3024 if (!IS_BLANK(CUR)) { 3025 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3027 ctxt->sax->error(ctxt->userData, 3028 "Space required after the entity name\n"); 3029 ctxt->wellFormed = 0; 3030 ctxt->disableSAX = 1; 3031 } 3032 SKIP_BLANKS; 3033 3034 /* 3035 * handle the various case of definitions... 3036 */ 3037 if (isParameter) { 3038 if ((RAW == '"') || (RAW == '\'')) { 3039 value = xmlParseEntityValue(ctxt, &orig); 3040 if (value) { 3041 if ((ctxt->sax != NULL) && 3042 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3043 ctxt->sax->entityDecl(ctxt->userData, name, 3044 XML_INTERNAL_PARAMETER_ENTITY, 3045 NULL, NULL, value); 3046 } 3047 } else { 3048 URI = xmlParseExternalID(ctxt, &literal, 1); 3049 if ((URI == NULL) && (literal == NULL)) { 3050 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3052 ctxt->sax->error(ctxt->userData, 3053 "Entity value required\n"); 3054 ctxt->wellFormed = 0; 3055 ctxt->disableSAX = 1; 3056 } 3057 if (URI) { 3058 xmlURIPtr uri; 3059 3060 uri = xmlParseURI((const char *) URI); 3061 if (uri == NULL) { 3062 ctxt->errNo = XML_ERR_INVALID_URI; 3063 if ((ctxt->sax != NULL) && 3064 (!ctxt->disableSAX) && 3065 (ctxt->sax->error != NULL)) 3066 ctxt->sax->error(ctxt->userData, 3067 "Invalid URI: %s\n", URI); 3068 ctxt->wellFormed = 0; 3069 } else { 3070 if (uri->fragment != NULL) { 3071 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3072 if ((ctxt->sax != NULL) && 3073 (!ctxt->disableSAX) && 3074 (ctxt->sax->error != NULL)) 3075 ctxt->sax->error(ctxt->userData, 3076 "Fragment not allowed: %s\n", URI); 3077 ctxt->wellFormed = 0; 3078 } else { 3079 if ((ctxt->sax != NULL) && 3080 (!ctxt->disableSAX) && 3081 (ctxt->sax->entityDecl != NULL)) 3082 ctxt->sax->entityDecl(ctxt->userData, name, 3083 XML_EXTERNAL_PARAMETER_ENTITY, 3084 literal, URI, NULL); 3085 } 3086 xmlFreeURI(uri); 3087 } 3088 } 3089 } 3090 } else { 3091 if ((RAW == '"') || (RAW == '\'')) { 3092 value = xmlParseEntityValue(ctxt, &orig); 3093 if ((ctxt->sax != NULL) && 3094 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3095 ctxt->sax->entityDecl(ctxt->userData, name, 3096 XML_INTERNAL_GENERAL_ENTITY, 3097 NULL, NULL, value); 3098 } else { 3099 URI = xmlParseExternalID(ctxt, &literal, 1); 3100 if ((URI == NULL) && (literal == NULL)) { 3101 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3103 ctxt->sax->error(ctxt->userData, 3104 "Entity value required\n"); 3105 ctxt->wellFormed = 0; 3106 ctxt->disableSAX = 1; 3107 } 3108 if (URI) { 3109 xmlURIPtr uri; 3110 3111 uri = xmlParseURI((const char *)URI); 3112 if (uri == NULL) { 3113 ctxt->errNo = XML_ERR_INVALID_URI; 3114 if ((ctxt->sax != NULL) && 3115 (!ctxt->disableSAX) && 3116 (ctxt->sax->error != NULL)) 3117 ctxt->sax->error(ctxt->userData, 3118 "Invalid URI: %s\n", URI); 3119 ctxt->wellFormed = 0; 3120 } else { 3121 if (uri->fragment != NULL) { 3122 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3123 if ((ctxt->sax != NULL) && 3124 (!ctxt->disableSAX) && 3125 (ctxt->sax->error != NULL)) 3126 ctxt->sax->error(ctxt->userData, 3127 "Fragment not allowed: %s\n", URI); 3128 ctxt->wellFormed = 0; 3129 } 3130 xmlFreeURI(uri); 3131 } 3132 } 3133 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3134 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3136 ctxt->sax->error(ctxt->userData, 3137 "Space required before 'NDATA'\n"); 3138 ctxt->wellFormed = 0; 3139 ctxt->disableSAX = 1; 3140 } 3141 SKIP_BLANKS; 3142 if ((RAW == 'N') && (NXT(1) == 'D') && 3143 (NXT(2) == 'A') && (NXT(3) == 'T') && 3144 (NXT(4) == 'A')) { 3145 SKIP(5); 3146 if (!IS_BLANK(CUR)) { 3147 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3149 ctxt->sax->error(ctxt->userData, 3150 "Space required after 'NDATA'\n"); 3151 ctxt->wellFormed = 0; 3152 ctxt->disableSAX = 1; 3153 } 3154 SKIP_BLANKS; 3155 ndata = xmlParseName(ctxt); 3156 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3157 (ctxt->sax->unparsedEntityDecl != NULL)) 3158 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3159 literal, URI, ndata); 3160 } else { 3161 if ((ctxt->sax != NULL) && 3162 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3163 ctxt->sax->entityDecl(ctxt->userData, name, 3164 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3165 literal, URI, NULL); 3166 } 3167 } 3168 } 3169 SKIP_BLANKS; 3170 if (RAW != '>') { 3171 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3173 ctxt->sax->error(ctxt->userData, 3174 "xmlParseEntityDecl: entity %s not terminated\n", name); 3175 ctxt->wellFormed = 0; 3176 ctxt->disableSAX = 1; 3177 } else { 3178 if (input != ctxt->input) { 3179 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3181 ctxt->sax->error(ctxt->userData, 3182"Entity declaration doesn't start and stop in the same entity\n"); 3183 ctxt->wellFormed = 0; 3184 ctxt->disableSAX = 1; 3185 } 3186 NEXT; 3187 } 3188 if (orig != NULL) { 3189 /* 3190 * Ugly mechanism to save the raw entity value. 3191 */ 3192 xmlEntityPtr cur = NULL; 3193 3194 if (isParameter) { 3195 if ((ctxt->sax != NULL) && 3196 (ctxt->sax->getParameterEntity != NULL)) 3197 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3198 } else { 3199 if ((ctxt->sax != NULL) && 3200 (ctxt->sax->getEntity != NULL)) 3201 cur = ctxt->sax->getEntity(ctxt->userData, name); 3202 } 3203 if (cur != NULL) { 3204 if (cur->orig != NULL) 3205 xmlFree(orig); 3206 else 3207 cur->orig = orig; 3208 } else 3209 xmlFree(orig); 3210 } 3211 if (name != NULL) xmlFree(name); 3212 if (value != NULL) xmlFree(value); 3213 if (URI != NULL) xmlFree(URI); 3214 if (literal != NULL) xmlFree(literal); 3215 if (ndata != NULL) xmlFree(ndata); 3216 } 3217} 3218 3219/** 3220 * xmlParseDefaultDecl: 3221 * @ctxt: an XML parser context 3222 * @value: Receive a possible fixed default value for the attribute 3223 * 3224 * Parse an attribute default declaration 3225 * 3226 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3227 * 3228 * [ VC: Required Attribute ] 3229 * if the default declaration is the keyword #REQUIRED, then the 3230 * attribute must be specified for all elements of the type in the 3231 * attribute-list declaration. 3232 * 3233 * [ VC: Attribute Default Legal ] 3234 * The declared default value must meet the lexical constraints of 3235 * the declared attribute type c.f. xmlValidateAttributeDecl() 3236 * 3237 * [ VC: Fixed Attribute Default ] 3238 * if an attribute has a default value declared with the #FIXED 3239 * keyword, instances of that attribute must match the default value. 3240 * 3241 * [ WFC: No < in Attribute Values ] 3242 * handled in xmlParseAttValue() 3243 * 3244 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3245 * or XML_ATTRIBUTE_FIXED. 3246 */ 3247 3248int 3249xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3250 int val; 3251 xmlChar *ret; 3252 3253 *value = NULL; 3254 if ((RAW == '#') && (NXT(1) == 'R') && 3255 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3256 (NXT(4) == 'U') && (NXT(5) == 'I') && 3257 (NXT(6) == 'R') && (NXT(7) == 'E') && 3258 (NXT(8) == 'D')) { 3259 SKIP(9); 3260 return(XML_ATTRIBUTE_REQUIRED); 3261 } 3262 if ((RAW == '#') && (NXT(1) == 'I') && 3263 (NXT(2) == 'M') && (NXT(3) == 'P') && 3264 (NXT(4) == 'L') && (NXT(5) == 'I') && 3265 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3266 SKIP(8); 3267 return(XML_ATTRIBUTE_IMPLIED); 3268 } 3269 val = XML_ATTRIBUTE_NONE; 3270 if ((RAW == '#') && (NXT(1) == 'F') && 3271 (NXT(2) == 'I') && (NXT(3) == 'X') && 3272 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3273 SKIP(6); 3274 val = XML_ATTRIBUTE_FIXED; 3275 if (!IS_BLANK(CUR)) { 3276 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3278 ctxt->sax->error(ctxt->userData, 3279 "Space required after '#FIXED'\n"); 3280 ctxt->wellFormed = 0; 3281 ctxt->disableSAX = 1; 3282 } 3283 SKIP_BLANKS; 3284 } 3285 ret = xmlParseAttValue(ctxt); 3286 ctxt->instate = XML_PARSER_DTD; 3287 if (ret == NULL) { 3288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3289 ctxt->sax->error(ctxt->userData, 3290 "Attribute default value declaration error\n"); 3291 ctxt->wellFormed = 0; 3292 ctxt->disableSAX = 1; 3293 } else 3294 *value = ret; 3295 return(val); 3296} 3297 3298/** 3299 * xmlParseNotationType: 3300 * @ctxt: an XML parser context 3301 * 3302 * parse an Notation attribute type. 3303 * 3304 * Note: the leading 'NOTATION' S part has already being parsed... 3305 * 3306 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3307 * 3308 * [ VC: Notation Attributes ] 3309 * Values of this type must match one of the notation names included 3310 * in the declaration; all notation names in the declaration must be declared. 3311 * 3312 * Returns: the notation attribute tree built while parsing 3313 */ 3314 3315xmlEnumerationPtr 3316xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3317 xmlChar *name; 3318 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3319 3320 if (RAW != '(') { 3321 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3323 ctxt->sax->error(ctxt->userData, 3324 "'(' required to start 'NOTATION'\n"); 3325 ctxt->wellFormed = 0; 3326 ctxt->disableSAX = 1; 3327 return(NULL); 3328 } 3329 SHRINK; 3330 do { 3331 NEXT; 3332 SKIP_BLANKS; 3333 name = xmlParseName(ctxt); 3334 if (name == NULL) { 3335 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3337 ctxt->sax->error(ctxt->userData, 3338 "Name expected in NOTATION declaration\n"); 3339 ctxt->wellFormed = 0; 3340 ctxt->disableSAX = 1; 3341 return(ret); 3342 } 3343 cur = xmlCreateEnumeration(name); 3344 xmlFree(name); 3345 if (cur == NULL) return(ret); 3346 if (last == NULL) ret = last = cur; 3347 else { 3348 last->next = cur; 3349 last = cur; 3350 } 3351 SKIP_BLANKS; 3352 } while (RAW == '|'); 3353 if (RAW != ')') { 3354 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3356 ctxt->sax->error(ctxt->userData, 3357 "')' required to finish NOTATION declaration\n"); 3358 ctxt->wellFormed = 0; 3359 ctxt->disableSAX = 1; 3360 if ((last != NULL) && (last != ret)) 3361 xmlFreeEnumeration(last); 3362 return(ret); 3363 } 3364 NEXT; 3365 return(ret); 3366} 3367 3368/** 3369 * xmlParseEnumerationType: 3370 * @ctxt: an XML parser context 3371 * 3372 * parse an Enumeration attribute type. 3373 * 3374 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3375 * 3376 * [ VC: Enumeration ] 3377 * Values of this type must match one of the Nmtoken tokens in 3378 * the declaration 3379 * 3380 * Returns: the enumeration attribute tree built while parsing 3381 */ 3382 3383xmlEnumerationPtr 3384xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3385 xmlChar *name; 3386 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3387 3388 if (RAW != '(') { 3389 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3391 ctxt->sax->error(ctxt->userData, 3392 "'(' required to start ATTLIST enumeration\n"); 3393 ctxt->wellFormed = 0; 3394 ctxt->disableSAX = 1; 3395 return(NULL); 3396 } 3397 SHRINK; 3398 do { 3399 NEXT; 3400 SKIP_BLANKS; 3401 name = xmlParseNmtoken(ctxt); 3402 if (name == NULL) { 3403 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3405 ctxt->sax->error(ctxt->userData, 3406 "NmToken expected in ATTLIST enumeration\n"); 3407 ctxt->wellFormed = 0; 3408 ctxt->disableSAX = 1; 3409 return(ret); 3410 } 3411 cur = xmlCreateEnumeration(name); 3412 xmlFree(name); 3413 if (cur == NULL) return(ret); 3414 if (last == NULL) ret = last = cur; 3415 else { 3416 last->next = cur; 3417 last = cur; 3418 } 3419 SKIP_BLANKS; 3420 } while (RAW == '|'); 3421 if (RAW != ')') { 3422 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3424 ctxt->sax->error(ctxt->userData, 3425 "')' required to finish ATTLIST enumeration\n"); 3426 ctxt->wellFormed = 0; 3427 ctxt->disableSAX = 1; 3428 return(ret); 3429 } 3430 NEXT; 3431 return(ret); 3432} 3433 3434/** 3435 * xmlParseEnumeratedType: 3436 * @ctxt: an XML parser context 3437 * @tree: the enumeration tree built while parsing 3438 * 3439 * parse an Enumerated attribute type. 3440 * 3441 * [57] EnumeratedType ::= NotationType | Enumeration 3442 * 3443 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3444 * 3445 * 3446 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3447 */ 3448 3449int 3450xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3451 if ((RAW == 'N') && (NXT(1) == 'O') && 3452 (NXT(2) == 'T') && (NXT(3) == 'A') && 3453 (NXT(4) == 'T') && (NXT(5) == 'I') && 3454 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3455 SKIP(8); 3456 if (!IS_BLANK(CUR)) { 3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3459 ctxt->sax->error(ctxt->userData, 3460 "Space required after 'NOTATION'\n"); 3461 ctxt->wellFormed = 0; 3462 ctxt->disableSAX = 1; 3463 return(0); 3464 } 3465 SKIP_BLANKS; 3466 *tree = xmlParseNotationType(ctxt); 3467 if (*tree == NULL) return(0); 3468 return(XML_ATTRIBUTE_NOTATION); 3469 } 3470 *tree = xmlParseEnumerationType(ctxt); 3471 if (*tree == NULL) return(0); 3472 return(XML_ATTRIBUTE_ENUMERATION); 3473} 3474 3475/** 3476 * xmlParseAttributeType: 3477 * @ctxt: an XML parser context 3478 * @tree: the enumeration tree built while parsing 3479 * 3480 * parse the Attribute list def for an element 3481 * 3482 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3483 * 3484 * [55] StringType ::= 'CDATA' 3485 * 3486 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3487 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3488 * 3489 * Validity constraints for attribute values syntax are checked in 3490 * xmlValidateAttributeValue() 3491 * 3492 * [ VC: ID ] 3493 * Values of type ID must match the Name production. A name must not 3494 * appear more than once in an XML document as a value of this type; 3495 * i.e., ID values must uniquely identify the elements which bear them. 3496 * 3497 * [ VC: One ID per Element Type ] 3498 * No element type may have more than one ID attribute specified. 3499 * 3500 * [ VC: ID Attribute Default ] 3501 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3502 * 3503 * [ VC: IDREF ] 3504 * Values of type IDREF must match the Name production, and values 3505 * of type IDREFS must match Names; each IDREF Name must match the value 3506 * of an ID attribute on some element in the XML document; i.e. IDREF 3507 * values must match the value of some ID attribute. 3508 * 3509 * [ VC: Entity Name ] 3510 * Values of type ENTITY must match the Name production, values 3511 * of type ENTITIES must match Names; each Entity Name must match the 3512 * name of an unparsed entity declared in the DTD. 3513 * 3514 * [ VC: Name Token ] 3515 * Values of type NMTOKEN must match the Nmtoken production; values 3516 * of type NMTOKENS must match Nmtokens. 3517 * 3518 * Returns the attribute type 3519 */ 3520int 3521xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3522 SHRINK; 3523 if ((RAW == 'C') && (NXT(1) == 'D') && 3524 (NXT(2) == 'A') && (NXT(3) == 'T') && 3525 (NXT(4) == 'A')) { 3526 SKIP(5); 3527 return(XML_ATTRIBUTE_CDATA); 3528 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3529 (NXT(2) == 'R') && (NXT(3) == 'E') && 3530 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3531 SKIP(6); 3532 return(XML_ATTRIBUTE_IDREFS); 3533 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3534 (NXT(2) == 'R') && (NXT(3) == 'E') && 3535 (NXT(4) == 'F')) { 3536 SKIP(5); 3537 return(XML_ATTRIBUTE_IDREF); 3538 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3539 SKIP(2); 3540 return(XML_ATTRIBUTE_ID); 3541 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3542 (NXT(2) == 'T') && (NXT(3) == 'I') && 3543 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3544 SKIP(6); 3545 return(XML_ATTRIBUTE_ENTITY); 3546 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3547 (NXT(2) == 'T') && (NXT(3) == 'I') && 3548 (NXT(4) == 'T') && (NXT(5) == 'I') && 3549 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3550 SKIP(8); 3551 return(XML_ATTRIBUTE_ENTITIES); 3552 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3553 (NXT(2) == 'T') && (NXT(3) == 'O') && 3554 (NXT(4) == 'K') && (NXT(5) == 'E') && 3555 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3556 SKIP(8); 3557 return(XML_ATTRIBUTE_NMTOKENS); 3558 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3559 (NXT(2) == 'T') && (NXT(3) == 'O') && 3560 (NXT(4) == 'K') && (NXT(5) == 'E') && 3561 (NXT(6) == 'N')) { 3562 SKIP(7); 3563 return(XML_ATTRIBUTE_NMTOKEN); 3564 } 3565 return(xmlParseEnumeratedType(ctxt, tree)); 3566} 3567 3568/** 3569 * xmlParseAttributeListDecl: 3570 * @ctxt: an XML parser context 3571 * 3572 * : parse the Attribute list def for an element 3573 * 3574 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3575 * 3576 * [53] AttDef ::= S Name S AttType S DefaultDecl 3577 * 3578 */ 3579void 3580xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3581 xmlChar *elemName; 3582 xmlChar *attrName; 3583 xmlEnumerationPtr tree; 3584 3585 if ((RAW == '<') && (NXT(1) == '!') && 3586 (NXT(2) == 'A') && (NXT(3) == 'T') && 3587 (NXT(4) == 'T') && (NXT(5) == 'L') && 3588 (NXT(6) == 'I') && (NXT(7) == 'S') && 3589 (NXT(8) == 'T')) { 3590 xmlParserInputPtr input = ctxt->input; 3591 3592 SKIP(9); 3593 if (!IS_BLANK(CUR)) { 3594 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3596 ctxt->sax->error(ctxt->userData, 3597 "Space required after '<!ATTLIST'\n"); 3598 ctxt->wellFormed = 0; 3599 ctxt->disableSAX = 1; 3600 } 3601 SKIP_BLANKS; 3602 elemName = xmlParseName(ctxt); 3603 if (elemName == NULL) { 3604 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3606 ctxt->sax->error(ctxt->userData, 3607 "ATTLIST: no name for Element\n"); 3608 ctxt->wellFormed = 0; 3609 ctxt->disableSAX = 1; 3610 return; 3611 } 3612 SKIP_BLANKS; 3613 GROW; 3614 while (RAW != '>') { 3615 const xmlChar *check = CUR_PTR; 3616 int type; 3617 int def; 3618 xmlChar *defaultValue = NULL; 3619 3620 GROW; 3621 tree = NULL; 3622 attrName = xmlParseName(ctxt); 3623 if (attrName == NULL) { 3624 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3626 ctxt->sax->error(ctxt->userData, 3627 "ATTLIST: no name for Attribute\n"); 3628 ctxt->wellFormed = 0; 3629 ctxt->disableSAX = 1; 3630 break; 3631 } 3632 GROW; 3633 if (!IS_BLANK(CUR)) { 3634 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3636 ctxt->sax->error(ctxt->userData, 3637 "Space required after the attribute name\n"); 3638 ctxt->wellFormed = 0; 3639 ctxt->disableSAX = 1; 3640 if (attrName != NULL) 3641 xmlFree(attrName); 3642 if (defaultValue != NULL) 3643 xmlFree(defaultValue); 3644 break; 3645 } 3646 SKIP_BLANKS; 3647 3648 type = xmlParseAttributeType(ctxt, &tree); 3649 if (type <= 0) { 3650 if (attrName != NULL) 3651 xmlFree(attrName); 3652 if (defaultValue != NULL) 3653 xmlFree(defaultValue); 3654 break; 3655 } 3656 3657 GROW; 3658 if (!IS_BLANK(CUR)) { 3659 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3661 ctxt->sax->error(ctxt->userData, 3662 "Space required after the attribute type\n"); 3663 ctxt->wellFormed = 0; 3664 ctxt->disableSAX = 1; 3665 if (attrName != NULL) 3666 xmlFree(attrName); 3667 if (defaultValue != NULL) 3668 xmlFree(defaultValue); 3669 if (tree != NULL) 3670 xmlFreeEnumeration(tree); 3671 break; 3672 } 3673 SKIP_BLANKS; 3674 3675 def = xmlParseDefaultDecl(ctxt, &defaultValue); 3676 if (def <= 0) { 3677 if (attrName != NULL) 3678 xmlFree(attrName); 3679 if (defaultValue != NULL) 3680 xmlFree(defaultValue); 3681 if (tree != NULL) 3682 xmlFreeEnumeration(tree); 3683 break; 3684 } 3685 3686 GROW; 3687 if (RAW != '>') { 3688 if (!IS_BLANK(CUR)) { 3689 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3691 ctxt->sax->error(ctxt->userData, 3692 "Space required after the attribute default value\n"); 3693 ctxt->wellFormed = 0; 3694 ctxt->disableSAX = 1; 3695 if (attrName != NULL) 3696 xmlFree(attrName); 3697 if (defaultValue != NULL) 3698 xmlFree(defaultValue); 3699 if (tree != NULL) 3700 xmlFreeEnumeration(tree); 3701 break; 3702 } 3703 SKIP_BLANKS; 3704 } 3705 if (check == CUR_PTR) { 3706 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 3707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3708 ctxt->sax->error(ctxt->userData, 3709 "xmlParseAttributeListDecl: detected internal error\n"); 3710 if (attrName != NULL) 3711 xmlFree(attrName); 3712 if (defaultValue != NULL) 3713 xmlFree(defaultValue); 3714 if (tree != NULL) 3715 xmlFreeEnumeration(tree); 3716 break; 3717 } 3718 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3719 (ctxt->sax->attributeDecl != NULL)) 3720 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 3721 type, def, defaultValue, tree); 3722 if (attrName != NULL) 3723 xmlFree(attrName); 3724 if (defaultValue != NULL) 3725 xmlFree(defaultValue); 3726 GROW; 3727 } 3728 if (RAW == '>') { 3729 if (input != ctxt->input) { 3730 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3732 ctxt->sax->error(ctxt->userData, 3733"Attribute list declaration doesn't start and stop in the same entity\n"); 3734 ctxt->wellFormed = 0; 3735 ctxt->disableSAX = 1; 3736 } 3737 NEXT; 3738 } 3739 3740 xmlFree(elemName); 3741 } 3742} 3743 3744/** 3745 * xmlParseElementMixedContentDecl: 3746 * @ctxt: an XML parser context 3747 * 3748 * parse the declaration for a Mixed Element content 3749 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3750 * 3751 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 3752 * '(' S? '#PCDATA' S? ')' 3753 * 3754 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 3755 * 3756 * [ VC: No Duplicate Types ] 3757 * The same name must not appear more than once in a single 3758 * mixed-content declaration. 3759 * 3760 * returns: the list of the xmlElementContentPtr describing the element choices 3761 */ 3762xmlElementContentPtr 3763xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 3764 xmlElementContentPtr ret = NULL, cur = NULL, n; 3765 xmlChar *elem = NULL; 3766 3767 GROW; 3768 if ((RAW == '#') && (NXT(1) == 'P') && 3769 (NXT(2) == 'C') && (NXT(3) == 'D') && 3770 (NXT(4) == 'A') && (NXT(5) == 'T') && 3771 (NXT(6) == 'A')) { 3772 SKIP(7); 3773 SKIP_BLANKS; 3774 SHRINK; 3775 if (RAW == ')') { 3776 ctxt->entity = ctxt->input; 3777 NEXT; 3778 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3779 if (RAW == '*') { 3780 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3781 NEXT; 3782 } 3783 return(ret); 3784 } 3785 if ((RAW == '(') || (RAW == '|')) { 3786 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3787 if (ret == NULL) return(NULL); 3788 } 3789 while (RAW == '|') { 3790 NEXT; 3791 if (elem == NULL) { 3792 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3793 if (ret == NULL) return(NULL); 3794 ret->c1 = cur; 3795 cur = ret; 3796 } else { 3797 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3798 if (n == NULL) return(NULL); 3799 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 3800 cur->c2 = n; 3801 cur = n; 3802 xmlFree(elem); 3803 } 3804 SKIP_BLANKS; 3805 elem = xmlParseName(ctxt); 3806 if (elem == NULL) { 3807 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3809 ctxt->sax->error(ctxt->userData, 3810 "xmlParseElementMixedContentDecl : Name expected\n"); 3811 ctxt->wellFormed = 0; 3812 ctxt->disableSAX = 1; 3813 xmlFreeElementContent(cur); 3814 return(NULL); 3815 } 3816 SKIP_BLANKS; 3817 GROW; 3818 } 3819 if ((RAW == ')') && (NXT(1) == '*')) { 3820 if (elem != NULL) { 3821 cur->c2 = xmlNewElementContent(elem, 3822 XML_ELEMENT_CONTENT_ELEMENT); 3823 xmlFree(elem); 3824 } 3825 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3826 ctxt->entity = ctxt->input; 3827 SKIP(2); 3828 } else { 3829 if (elem != NULL) xmlFree(elem); 3830 xmlFreeElementContent(ret); 3831 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 3832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3833 ctxt->sax->error(ctxt->userData, 3834 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 3835 ctxt->wellFormed = 0; 3836 ctxt->disableSAX = 1; 3837 return(NULL); 3838 } 3839 3840 } else { 3841 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 3842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3843 ctxt->sax->error(ctxt->userData, 3844 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 3845 ctxt->wellFormed = 0; 3846 ctxt->disableSAX = 1; 3847 } 3848 return(ret); 3849} 3850 3851/** 3852 * xmlParseElementChildrenContentDecl: 3853 * @ctxt: an XML parser context 3854 * 3855 * parse the declaration for a Mixed Element content 3856 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3857 * 3858 * 3859 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 3860 * 3861 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 3862 * 3863 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 3864 * 3865 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 3866 * 3867 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 3868 * TODO Parameter-entity replacement text must be properly nested 3869 * with parenthetized groups. That is to say, if either of the 3870 * opening or closing parentheses in a choice, seq, or Mixed 3871 * construct is contained in the replacement text for a parameter 3872 * entity, both must be contained in the same replacement text. For 3873 * interoperability, if a parameter-entity reference appears in a 3874 * choice, seq, or Mixed construct, its replacement text should not 3875 * be empty, and neither the first nor last non-blank character of 3876 * the replacement text should be a connector (| or ,). 3877 * 3878 * returns: the tree of xmlElementContentPtr describing the element 3879 * hierarchy. 3880 */ 3881xmlElementContentPtr 3882xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) { 3883 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 3884 xmlChar *elem; 3885 xmlChar type = 0; 3886 3887 SKIP_BLANKS; 3888 GROW; 3889 if (RAW == '(') { 3890 /* Recurse on first child */ 3891 NEXT; 3892 SKIP_BLANKS; 3893 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 3894 SKIP_BLANKS; 3895 GROW; 3896 } else { 3897 elem = xmlParseName(ctxt); 3898 if (elem == NULL) { 3899 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3901 ctxt->sax->error(ctxt->userData, 3902 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 3903 ctxt->wellFormed = 0; 3904 ctxt->disableSAX = 1; 3905 return(NULL); 3906 } 3907 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 3908 GROW; 3909 if (RAW == '?') { 3910 cur->ocur = XML_ELEMENT_CONTENT_OPT; 3911 NEXT; 3912 } else if (RAW == '*') { 3913 cur->ocur = XML_ELEMENT_CONTENT_MULT; 3914 NEXT; 3915 } else if (RAW == '+') { 3916 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 3917 NEXT; 3918 } else { 3919 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 3920 } 3921 xmlFree(elem); 3922 GROW; 3923 } 3924 SKIP_BLANKS; 3925 SHRINK; 3926 while (RAW != ')') { 3927 /* 3928 * Each loop we parse one separator and one element. 3929 */ 3930 if (RAW == ',') { 3931 if (type == 0) type = CUR; 3932 3933 /* 3934 * Detect "Name | Name , Name" error 3935 */ 3936 else if (type != CUR) { 3937 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3939 ctxt->sax->error(ctxt->userData, 3940 "xmlParseElementChildrenContentDecl : '%c' expected\n", 3941 type); 3942 ctxt->wellFormed = 0; 3943 ctxt->disableSAX = 1; 3944 if ((op != NULL) && (op != ret)) 3945 xmlFreeElementContent(op); 3946 if ((last != NULL) && (last != ret) && 3947 (last != ret->c1) && (last != ret->c2)) 3948 xmlFreeElementContent(last); 3949 if (ret != NULL) 3950 xmlFreeElementContent(ret); 3951 return(NULL); 3952 } 3953 NEXT; 3954 3955 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 3956 if (op == NULL) { 3957 xmlFreeElementContent(ret); 3958 return(NULL); 3959 } 3960 if (last == NULL) { 3961 op->c1 = ret; 3962 ret = cur = op; 3963 } else { 3964 cur->c2 = op; 3965 op->c1 = last; 3966 cur =op; 3967 last = NULL; 3968 } 3969 } else if (RAW == '|') { 3970 if (type == 0) type = CUR; 3971 3972 /* 3973 * Detect "Name , Name | Name" error 3974 */ 3975 else if (type != CUR) { 3976 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3978 ctxt->sax->error(ctxt->userData, 3979 "xmlParseElementChildrenContentDecl : '%c' expected\n", 3980 type); 3981 ctxt->wellFormed = 0; 3982 ctxt->disableSAX = 1; 3983 if ((op != NULL) && (op != ret) && (op != last)) 3984 xmlFreeElementContent(op); 3985 if ((last != NULL) && (last != ret) && 3986 (last != ret->c1) && (last != ret->c2)) 3987 xmlFreeElementContent(last); 3988 if (ret != NULL) 3989 xmlFreeElementContent(ret); 3990 return(NULL); 3991 } 3992 NEXT; 3993 3994 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3995 if (op == NULL) { 3996 if ((op != NULL) && (op != ret)) 3997 xmlFreeElementContent(op); 3998 if ((last != NULL) && (last != ret) && 3999 (last != ret->c1) && (last != ret->c2)) 4000 xmlFreeElementContent(last); 4001 if (ret != NULL) 4002 xmlFreeElementContent(ret); 4003 return(NULL); 4004 } 4005 if (last == NULL) { 4006 op->c1 = ret; 4007 ret = cur = op; 4008 } else { 4009 cur->c2 = op; 4010 op->c1 = last; 4011 cur =op; 4012 last = NULL; 4013 } 4014 } else { 4015 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4017 ctxt->sax->error(ctxt->userData, 4018 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4019 ctxt->wellFormed = 0; 4020 ctxt->disableSAX = 1; 4021 if ((op != NULL) && (op != ret)) 4022 xmlFreeElementContent(op); 4023 if ((last != NULL) && (last != ret) && 4024 (last != ret->c1) && (last != ret->c2)) 4025 xmlFreeElementContent(last); 4026 if (ret != NULL) 4027 xmlFreeElementContent(ret); 4028 return(NULL); 4029 } 4030 GROW; 4031 SKIP_BLANKS; 4032 GROW; 4033 if (RAW == '(') { 4034 /* Recurse on second child */ 4035 NEXT; 4036 SKIP_BLANKS; 4037 last = xmlParseElementChildrenContentDecl(ctxt); 4038 SKIP_BLANKS; 4039 } else { 4040 elem = xmlParseName(ctxt); 4041 if (elem == NULL) { 4042 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4044 ctxt->sax->error(ctxt->userData, 4045 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4046 ctxt->wellFormed = 0; 4047 ctxt->disableSAX = 1; 4048 if ((op != NULL) && (op != ret)) 4049 xmlFreeElementContent(op); 4050 if ((last != NULL) && (last != ret) && 4051 (last != ret->c1) && (last != ret->c2)) 4052 xmlFreeElementContent(last); 4053 if (ret != NULL) 4054 xmlFreeElementContent(ret); 4055 return(NULL); 4056 } 4057 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4058 xmlFree(elem); 4059 if (RAW == '?') { 4060 last->ocur = XML_ELEMENT_CONTENT_OPT; 4061 NEXT; 4062 } else if (RAW == '*') { 4063 last->ocur = XML_ELEMENT_CONTENT_MULT; 4064 NEXT; 4065 } else if (RAW == '+') { 4066 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4067 NEXT; 4068 } else { 4069 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4070 } 4071 } 4072 SKIP_BLANKS; 4073 GROW; 4074 } 4075 if ((cur != NULL) && (last != NULL)) { 4076 cur->c2 = last; 4077 } 4078 ctxt->entity = ctxt->input; 4079 NEXT; 4080 if (RAW == '?') { 4081 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4082 NEXT; 4083 } else if (RAW == '*') { 4084 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4085 NEXT; 4086 } else if (RAW == '+') { 4087 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4088 NEXT; 4089 } 4090 return(ret); 4091} 4092 4093/** 4094 * xmlParseElementContentDecl: 4095 * @ctxt: an XML parser context 4096 * @name: the name of the element being defined. 4097 * @result: the Element Content pointer will be stored here if any 4098 * 4099 * parse the declaration for an Element content either Mixed or Children, 4100 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4101 * 4102 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4103 * 4104 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4105 */ 4106 4107int 4108xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4109 xmlElementContentPtr *result) { 4110 4111 xmlElementContentPtr tree = NULL; 4112 xmlParserInputPtr input = ctxt->input; 4113 int res; 4114 4115 *result = NULL; 4116 4117 if (RAW != '(') { 4118 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4120 ctxt->sax->error(ctxt->userData, 4121 "xmlParseElementContentDecl : '(' expected\n"); 4122 ctxt->wellFormed = 0; 4123 ctxt->disableSAX = 1; 4124 return(-1); 4125 } 4126 NEXT; 4127 GROW; 4128 SKIP_BLANKS; 4129 if ((RAW == '#') && (NXT(1) == 'P') && 4130 (NXT(2) == 'C') && (NXT(3) == 'D') && 4131 (NXT(4) == 'A') && (NXT(5) == 'T') && 4132 (NXT(6) == 'A')) { 4133 tree = xmlParseElementMixedContentDecl(ctxt); 4134 res = XML_ELEMENT_TYPE_MIXED; 4135 } else { 4136 tree = xmlParseElementChildrenContentDecl(ctxt); 4137 res = XML_ELEMENT_TYPE_ELEMENT; 4138 } 4139 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4140 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4142 ctxt->sax->error(ctxt->userData, 4143"Element content declaration doesn't start and stop in the same entity\n"); 4144 ctxt->wellFormed = 0; 4145 ctxt->disableSAX = 1; 4146 } 4147 SKIP_BLANKS; 4148 *result = tree; 4149 return(res); 4150} 4151 4152/** 4153 * xmlParseElementDecl: 4154 * @ctxt: an XML parser context 4155 * 4156 * parse an Element declaration. 4157 * 4158 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4159 * 4160 * [ VC: Unique Element Type Declaration ] 4161 * No element type may be declared more than once 4162 * 4163 * Returns the type of the element, or -1 in case of error 4164 */ 4165int 4166xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4167 xmlChar *name; 4168 int ret = -1; 4169 xmlElementContentPtr content = NULL; 4170 4171 GROW; 4172 if ((RAW == '<') && (NXT(1) == '!') && 4173 (NXT(2) == 'E') && (NXT(3) == 'L') && 4174 (NXT(4) == 'E') && (NXT(5) == 'M') && 4175 (NXT(6) == 'E') && (NXT(7) == 'N') && 4176 (NXT(8) == 'T')) { 4177 xmlParserInputPtr input = ctxt->input; 4178 4179 SKIP(9); 4180 if (!IS_BLANK(CUR)) { 4181 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4183 ctxt->sax->error(ctxt->userData, 4184 "Space required after 'ELEMENT'\n"); 4185 ctxt->wellFormed = 0; 4186 ctxt->disableSAX = 1; 4187 } 4188 SKIP_BLANKS; 4189 name = xmlParseName(ctxt); 4190 if (name == NULL) { 4191 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4193 ctxt->sax->error(ctxt->userData, 4194 "xmlParseElementDecl: no name for Element\n"); 4195 ctxt->wellFormed = 0; 4196 ctxt->disableSAX = 1; 4197 return(-1); 4198 } 4199 if (!IS_BLANK(CUR)) { 4200 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4202 ctxt->sax->error(ctxt->userData, 4203 "Space required after the element name\n"); 4204 ctxt->wellFormed = 0; 4205 ctxt->disableSAX = 1; 4206 } 4207 SKIP_BLANKS; 4208 if ((RAW == 'E') && (NXT(1) == 'M') && 4209 (NXT(2) == 'P') && (NXT(3) == 'T') && 4210 (NXT(4) == 'Y')) { 4211 SKIP(5); 4212 /* 4213 * Element must always be empty. 4214 */ 4215 ret = XML_ELEMENT_TYPE_EMPTY; 4216 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4217 (NXT(2) == 'Y')) { 4218 SKIP(3); 4219 /* 4220 * Element is a generic container. 4221 */ 4222 ret = XML_ELEMENT_TYPE_ANY; 4223 } else if (RAW == '(') { 4224 ret = xmlParseElementContentDecl(ctxt, name, &content); 4225 } else { 4226 /* 4227 * [ WFC: PEs in Internal Subset ] error handling. 4228 */ 4229 if ((RAW == '%') && (ctxt->external == 0) && 4230 (ctxt->inputNr == 1)) { 4231 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4233 ctxt->sax->error(ctxt->userData, 4234 "PEReference: forbidden within markup decl in internal subset\n"); 4235 } else { 4236 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4238 ctxt->sax->error(ctxt->userData, 4239 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4240 } 4241 ctxt->wellFormed = 0; 4242 ctxt->disableSAX = 1; 4243 if (name != NULL) xmlFree(name); 4244 return(-1); 4245 } 4246 4247 SKIP_BLANKS; 4248 /* 4249 * Pop-up of finished entities. 4250 */ 4251 while ((RAW == 0) && (ctxt->inputNr > 1)) 4252 xmlPopInput(ctxt); 4253 SKIP_BLANKS; 4254 4255 if (RAW != '>') { 4256 ctxt->errNo = XML_ERR_GT_REQUIRED; 4257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4258 ctxt->sax->error(ctxt->userData, 4259 "xmlParseElementDecl: expected '>' at the end\n"); 4260 ctxt->wellFormed = 0; 4261 ctxt->disableSAX = 1; 4262 } else { 4263 if (input != ctxt->input) { 4264 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4266 ctxt->sax->error(ctxt->userData, 4267"Element declaration doesn't start and stop in the same entity\n"); 4268 ctxt->wellFormed = 0; 4269 ctxt->disableSAX = 1; 4270 } 4271 4272 NEXT; 4273 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4274 (ctxt->sax->elementDecl != NULL)) 4275 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4276 content); 4277 } 4278 if (content != NULL) { 4279 xmlFreeElementContent(content); 4280 } 4281 if (name != NULL) { 4282 xmlFree(name); 4283 } 4284 } 4285 return(ret); 4286} 4287 4288/** 4289 * xmlParseMarkupDecl: 4290 * @ctxt: an XML parser context 4291 * 4292 * parse Markup declarations 4293 * 4294 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4295 * NotationDecl | PI | Comment 4296 * 4297 * [ VC: Proper Declaration/PE Nesting ] 4298 * Parameter-entity replacement text must be properly nested with 4299 * markup declarations. That is to say, if either the first character 4300 * or the last character of a markup declaration (markupdecl above) is 4301 * contained in the replacement text for a parameter-entity reference, 4302 * both must be contained in the same replacement text. 4303 * 4304 * [ WFC: PEs in Internal Subset ] 4305 * In the internal DTD subset, parameter-entity references can occur 4306 * only where markup declarations can occur, not within markup declarations. 4307 * (This does not apply to references that occur in external parameter 4308 * entities or to the external subset.) 4309 */ 4310void 4311xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4312 GROW; 4313 xmlParseElementDecl(ctxt); 4314 xmlParseAttributeListDecl(ctxt); 4315 xmlParseEntityDecl(ctxt); 4316 xmlParseNotationDecl(ctxt); 4317 xmlParsePI(ctxt); 4318 xmlParseComment(ctxt); 4319 /* 4320 * This is only for internal subset. On external entities, 4321 * the replacement is done before parsing stage 4322 */ 4323 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4324 xmlParsePEReference(ctxt); 4325 ctxt->instate = XML_PARSER_DTD; 4326} 4327 4328/** 4329 * xmlParseTextDecl: 4330 * @ctxt: an XML parser context 4331 * 4332 * parse an XML declaration header for external entities 4333 * 4334 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4335 * 4336 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4337 */ 4338 4339void 4340xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4341 xmlChar *version; 4342 4343 /* 4344 * We know that '<?xml' is here. 4345 */ 4346 if ((RAW == '<') && (NXT(1) == '?') && 4347 (NXT(2) == 'x') && (NXT(3) == 'm') && 4348 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4349 SKIP(5); 4350 } else { 4351 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4353 ctxt->sax->error(ctxt->userData, 4354 "Text declaration '<?xml' required\n"); 4355 ctxt->wellFormed = 0; 4356 ctxt->disableSAX = 1; 4357 4358 return; 4359 } 4360 4361 if (!IS_BLANK(CUR)) { 4362 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4364 ctxt->sax->error(ctxt->userData, 4365 "Space needed after '<?xml'\n"); 4366 ctxt->wellFormed = 0; 4367 ctxt->disableSAX = 1; 4368 } 4369 SKIP_BLANKS; 4370 4371 /* 4372 * We may have the VersionInfo here. 4373 */ 4374 version = xmlParseVersionInfo(ctxt); 4375 if (version == NULL) 4376 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4377 ctxt->input->version = version; 4378 4379 /* 4380 * We must have the encoding declaration 4381 */ 4382 if (!IS_BLANK(CUR)) { 4383 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4385 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4386 ctxt->wellFormed = 0; 4387 ctxt->disableSAX = 1; 4388 } 4389 xmlParseEncodingDecl(ctxt); 4390 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4391 /* 4392 * The XML REC instructs us to stop parsing right here 4393 */ 4394 return; 4395 } 4396 4397 SKIP_BLANKS; 4398 if ((RAW == '?') && (NXT(1) == '>')) { 4399 SKIP(2); 4400 } else if (RAW == '>') { 4401 /* Deprecated old WD ... */ 4402 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4404 ctxt->sax->error(ctxt->userData, 4405 "XML declaration must end-up with '?>'\n"); 4406 ctxt->wellFormed = 0; 4407 ctxt->disableSAX = 1; 4408 NEXT; 4409 } else { 4410 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4412 ctxt->sax->error(ctxt->userData, 4413 "parsing XML declaration: '?>' expected\n"); 4414 ctxt->wellFormed = 0; 4415 ctxt->disableSAX = 1; 4416 MOVETO_ENDTAG(CUR_PTR); 4417 NEXT; 4418 } 4419} 4420 4421/* 4422 * xmlParseConditionalSections 4423 * @ctxt: an XML parser context 4424 * 4425 * [61] conditionalSect ::= includeSect | ignoreSect 4426 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4427 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4428 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4429 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4430 */ 4431 4432void 4433xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4434 SKIP(3); 4435 SKIP_BLANKS; 4436 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4437 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4438 (NXT(6) == 'E')) { 4439 SKIP(7); 4440 SKIP_BLANKS; 4441 if (RAW != '[') { 4442 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4444 ctxt->sax->error(ctxt->userData, 4445 "XML conditional section '[' expected\n"); 4446 ctxt->wellFormed = 0; 4447 ctxt->disableSAX = 1; 4448 } else { 4449 NEXT; 4450 } 4451 if (xmlParserDebugEntities) { 4452 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4453 xmlGenericError(xmlGenericErrorContext, 4454 "%s(%d): ", ctxt->input->filename, 4455 ctxt->input->line); 4456 xmlGenericError(xmlGenericErrorContext, 4457 "Entering INCLUDE Conditional Section\n"); 4458 } 4459 4460 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4461 (NXT(2) != '>'))) { 4462 const xmlChar *check = CUR_PTR; 4463 int cons = ctxt->input->consumed; 4464 int tok = ctxt->token; 4465 4466 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4467 xmlParseConditionalSections(ctxt); 4468 } else if (IS_BLANK(CUR)) { 4469 NEXT; 4470 } else if (RAW == '%') { 4471 xmlParsePEReference(ctxt); 4472 } else 4473 xmlParseMarkupDecl(ctxt); 4474 4475 /* 4476 * Pop-up of finished entities. 4477 */ 4478 while ((RAW == 0) && (ctxt->inputNr > 1)) 4479 xmlPopInput(ctxt); 4480 4481 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4482 (tok == ctxt->token)) { 4483 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4485 ctxt->sax->error(ctxt->userData, 4486 "Content error in the external subset\n"); 4487 ctxt->wellFormed = 0; 4488 ctxt->disableSAX = 1; 4489 break; 4490 } 4491 } 4492 if (xmlParserDebugEntities) { 4493 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4494 xmlGenericError(xmlGenericErrorContext, 4495 "%s(%d): ", ctxt->input->filename, 4496 ctxt->input->line); 4497 xmlGenericError(xmlGenericErrorContext, 4498 "Leaving INCLUDE Conditional Section\n"); 4499 } 4500 4501 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4502 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4503 int state; 4504 4505 SKIP(6); 4506 SKIP_BLANKS; 4507 if (RAW != '[') { 4508 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4510 ctxt->sax->error(ctxt->userData, 4511 "XML conditional section '[' expected\n"); 4512 ctxt->wellFormed = 0; 4513 ctxt->disableSAX = 1; 4514 } else { 4515 NEXT; 4516 } 4517 if (xmlParserDebugEntities) { 4518 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4519 xmlGenericError(xmlGenericErrorContext, 4520 "%s(%d): ", ctxt->input->filename, 4521 ctxt->input->line); 4522 xmlGenericError(xmlGenericErrorContext, 4523 "Entering IGNORE Conditional Section\n"); 4524 } 4525 4526 /* 4527 * Parse up to the end of the conditionnal section 4528 * But disable SAX event generating DTD building in the meantime 4529 */ 4530 state = ctxt->disableSAX; 4531 ctxt->disableSAX = 1; 4532 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4533 (NXT(2) != '>'))) { 4534 const xmlChar *check = CUR_PTR; 4535 int cons = ctxt->input->consumed; 4536 int tok = ctxt->token; 4537 4538 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4539 xmlParseConditionalSections(ctxt); 4540 } else if (IS_BLANK(CUR)) { 4541 NEXT; 4542 } else if (RAW == '%') { 4543 xmlParsePEReference(ctxt); 4544 } else 4545 xmlParseMarkupDecl(ctxt); 4546 4547 /* 4548 * Pop-up of finished entities. 4549 */ 4550 while ((RAW == 0) && (ctxt->inputNr > 1)) 4551 xmlPopInput(ctxt); 4552 4553 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4554 (tok == ctxt->token)) { 4555 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4557 ctxt->sax->error(ctxt->userData, 4558 "Content error in the external subset\n"); 4559 ctxt->wellFormed = 0; 4560 ctxt->disableSAX = 1; 4561 break; 4562 } 4563 } 4564 ctxt->disableSAX = state; 4565 if (xmlParserDebugEntities) { 4566 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4567 xmlGenericError(xmlGenericErrorContext, 4568 "%s(%d): ", ctxt->input->filename, 4569 ctxt->input->line); 4570 xmlGenericError(xmlGenericErrorContext, 4571 "Leaving IGNORE Conditional Section\n"); 4572 } 4573 4574 } else { 4575 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4577 ctxt->sax->error(ctxt->userData, 4578 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4579 ctxt->wellFormed = 0; 4580 ctxt->disableSAX = 1; 4581 } 4582 4583 if (RAW == 0) 4584 SHRINK; 4585 4586 if (RAW == 0) { 4587 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4589 ctxt->sax->error(ctxt->userData, 4590 "XML conditional section not closed\n"); 4591 ctxt->wellFormed = 0; 4592 ctxt->disableSAX = 1; 4593 } else { 4594 SKIP(3); 4595 } 4596} 4597 4598/** 4599 * xmlParseExternalSubset: 4600 * @ctxt: an XML parser context 4601 * @ExternalID: the external identifier 4602 * @SystemID: the system identifier (or URL) 4603 * 4604 * parse Markup declarations from an external subset 4605 * 4606 * [30] extSubset ::= textDecl? extSubsetDecl 4607 * 4608 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4609 */ 4610void 4611xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4612 const xmlChar *SystemID) { 4613 GROW; 4614 if ((RAW == '<') && (NXT(1) == '?') && 4615 (NXT(2) == 'x') && (NXT(3) == 'm') && 4616 (NXT(4) == 'l')) { 4617 xmlParseTextDecl(ctxt); 4618 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4619 /* 4620 * The XML REC instructs us to stop parsing right here 4621 */ 4622 ctxt->instate = XML_PARSER_EOF; 4623 return; 4624 } 4625 } 4626 if (ctxt->myDoc == NULL) { 4627 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4628 } 4629 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4630 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 4631 4632 ctxt->instate = XML_PARSER_DTD; 4633 ctxt->external = 1; 4634 while (((RAW == '<') && (NXT(1) == '?')) || 4635 ((RAW == '<') && (NXT(1) == '!')) || 4636 IS_BLANK(CUR)) { 4637 const xmlChar *check = CUR_PTR; 4638 int cons = ctxt->input->consumed; 4639 int tok = ctxt->token; 4640 4641 GROW; 4642 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4643 xmlParseConditionalSections(ctxt); 4644 } else if (IS_BLANK(CUR)) { 4645 NEXT; 4646 } else if (RAW == '%') { 4647 xmlParsePEReference(ctxt); 4648 } else 4649 xmlParseMarkupDecl(ctxt); 4650 4651 /* 4652 * Pop-up of finished entities. 4653 */ 4654 while ((RAW == 0) && (ctxt->inputNr > 1)) 4655 xmlPopInput(ctxt); 4656 4657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4658 (tok == ctxt->token)) { 4659 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4661 ctxt->sax->error(ctxt->userData, 4662 "Content error in the external subset\n"); 4663 ctxt->wellFormed = 0; 4664 ctxt->disableSAX = 1; 4665 break; 4666 } 4667 } 4668 4669 if (RAW != 0) { 4670 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4672 ctxt->sax->error(ctxt->userData, 4673 "Extra content at the end of the document\n"); 4674 ctxt->wellFormed = 0; 4675 ctxt->disableSAX = 1; 4676 } 4677 4678} 4679 4680/** 4681 * xmlParseReference: 4682 * @ctxt: an XML parser context 4683 * 4684 * parse and handle entity references in content, depending on the SAX 4685 * interface, this may end-up in a call to character() if this is a 4686 * CharRef, a predefined entity, if there is no reference() callback. 4687 * or if the parser was asked to switch to that mode. 4688 * 4689 * [67] Reference ::= EntityRef | CharRef 4690 */ 4691void 4692xmlParseReference(xmlParserCtxtPtr ctxt) { 4693 xmlEntityPtr ent; 4694 xmlChar *val; 4695 if (RAW != '&') return; 4696 4697 if (NXT(1) == '#') { 4698 int i = 0; 4699 xmlChar out[10]; 4700 int hex = NXT(2); 4701 int val = xmlParseCharRef(ctxt); 4702 4703 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 4704 /* 4705 * So we are using non-UTF-8 buffers 4706 * Check that the char fit on 8bits, if not 4707 * generate a CharRef. 4708 */ 4709 if (val <= 0xFF) { 4710 out[0] = val; 4711 out[1] = 0; 4712 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4713 (!ctxt->disableSAX)) 4714 ctxt->sax->characters(ctxt->userData, out, 1); 4715 } else { 4716 if ((hex == 'x') || (hex == 'X')) 4717 sprintf((char *)out, "#x%X", val); 4718 else 4719 sprintf((char *)out, "#%d", val); 4720 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4721 (!ctxt->disableSAX)) 4722 ctxt->sax->reference(ctxt->userData, out); 4723 } 4724 } else { 4725 /* 4726 * Just encode the value in UTF-8 4727 */ 4728 COPY_BUF(0 ,out, i, val); 4729 out[i] = 0; 4730 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4731 (!ctxt->disableSAX)) 4732 ctxt->sax->characters(ctxt->userData, out, i); 4733 } 4734 } else { 4735 ent = xmlParseEntityRef(ctxt); 4736 if (ent == NULL) return; 4737 if ((ent->name != NULL) && 4738 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 4739 xmlNodePtr list = NULL; 4740 int ret; 4741 4742 4743 /* 4744 * The first reference to the entity trigger a parsing phase 4745 * where the ent->children is filled with the result from 4746 * the parsing. 4747 */ 4748 if (ent->children == NULL) { 4749 xmlChar *value; 4750 value = ent->content; 4751 4752 /* 4753 * Check that this entity is well formed 4754 */ 4755 if ((value != NULL) && 4756 (value[1] == 0) && (value[0] == '<') && 4757 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 4758 /* 4759 * DONE: get definite answer on this !!! 4760 * Lots of entity decls are used to declare a single 4761 * char 4762 * <!ENTITY lt "<"> 4763 * Which seems to be valid since 4764 * 2.4: The ampersand character (&) and the left angle 4765 * bracket (<) may appear in their literal form only 4766 * when used ... They are also legal within the literal 4767 * entity value of an internal entity declaration;i 4768 * see "4.3.2 Well-Formed Parsed Entities". 4769 * IMHO 2.4 and 4.3.2 are directly in contradiction. 4770 * Looking at the OASIS test suite and James Clark 4771 * tests, this is broken. However the XML REC uses 4772 * it. Is the XML REC not well-formed ???? 4773 * This is a hack to avoid this problem 4774 * 4775 * ANSWER: since lt gt amp .. are already defined, 4776 * this is a redefinition and hence the fact that the 4777 * contentis not well balanced is not a Wf error, this 4778 * is lousy but acceptable. 4779 */ 4780 list = xmlNewDocText(ctxt->myDoc, value); 4781 if (list != NULL) { 4782 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4783 (ent->children == NULL)) { 4784 ent->children = list; 4785 ent->last = list; 4786 list->parent = (xmlNodePtr) ent; 4787 } else { 4788 xmlFreeNodeList(list); 4789 } 4790 } else if (list != NULL) { 4791 xmlFreeNodeList(list); 4792 } 4793 } else { 4794 /* 4795 * 4.3.2: An internal general parsed entity is well-formed 4796 * if its replacement text matches the production labeled 4797 * content. 4798 */ 4799 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 4800 ctxt->depth++; 4801 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 4802 ctxt->sax, NULL, ctxt->depth, 4803 value, &list); 4804 ctxt->depth--; 4805 } else if (ent->etype == 4806 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 4807 ctxt->depth++; 4808 ret = xmlParseExternalEntity(ctxt->myDoc, 4809 ctxt->sax, NULL, ctxt->depth, 4810 ent->URI, ent->ExternalID, &list); 4811 ctxt->depth--; 4812 } else { 4813 ret = -1; 4814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4815 ctxt->sax->error(ctxt->userData, 4816 "Internal: invalid entity type\n"); 4817 } 4818 if (ret == XML_ERR_ENTITY_LOOP) { 4819 ctxt->errNo = XML_ERR_ENTITY_LOOP; 4820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4821 ctxt->sax->error(ctxt->userData, 4822 "Detected entity reference loop\n"); 4823 ctxt->wellFormed = 0; 4824 ctxt->disableSAX = 1; 4825 } else if ((ret == 0) && (list != NULL)) { 4826 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4827 (ent->children == NULL)) { 4828 ent->children = list; 4829 while (list != NULL) { 4830 list->parent = (xmlNodePtr) ent; 4831 if (list->next == NULL) 4832 ent->last = list; 4833 list = list->next; 4834 } 4835 } else { 4836 xmlFreeNodeList(list); 4837 } 4838 } else if (ret > 0) { 4839 ctxt->errNo = ret; 4840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4841 ctxt->sax->error(ctxt->userData, 4842 "Entity value required\n"); 4843 ctxt->wellFormed = 0; 4844 ctxt->disableSAX = 1; 4845 } else if (list != NULL) { 4846 xmlFreeNodeList(list); 4847 } 4848 } 4849 } 4850 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4851 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 4852 /* 4853 * Create a node. 4854 */ 4855 ctxt->sax->reference(ctxt->userData, ent->name); 4856 return; 4857 } else if (ctxt->replaceEntities) { 4858 if ((ctxt->node != NULL) && (ent->children != NULL)) { 4859 /* 4860 * Seems we are generating the DOM content, do 4861 * a simple tree copy 4862 */ 4863 xmlNodePtr new; 4864 new = xmlCopyNodeList(ent->children); 4865 4866 xmlAddChildList(ctxt->node, new); 4867 /* 4868 * This is to avoid a nasty side effect, see 4869 * characters() in SAX.c 4870 */ 4871 ctxt->nodemem = 0; 4872 ctxt->nodelen = 0; 4873 return; 4874 } else { 4875 /* 4876 * Probably running in SAX mode 4877 */ 4878 xmlParserInputPtr input; 4879 4880 input = xmlNewEntityInputStream(ctxt, ent); 4881 xmlPushInput(ctxt, input); 4882 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 4883 (RAW == '<') && (NXT(1) == '?') && 4884 (NXT(2) == 'x') && (NXT(3) == 'm') && 4885 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4886 xmlParseTextDecl(ctxt); 4887 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4888 /* 4889 * The XML REC instructs us to stop parsing right here 4890 */ 4891 ctxt->instate = XML_PARSER_EOF; 4892 return; 4893 } 4894 if (input->standalone == 1) { 4895 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 4896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4897 ctxt->sax->error(ctxt->userData, 4898 "external parsed entities cannot be standalone\n"); 4899 ctxt->wellFormed = 0; 4900 ctxt->disableSAX = 1; 4901 } 4902 } 4903 return; 4904 } 4905 } 4906 } else { 4907 val = ent->content; 4908 if (val == NULL) return; 4909 /* 4910 * inline the entity. 4911 */ 4912 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4913 (!ctxt->disableSAX)) 4914 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 4915 } 4916 } 4917} 4918 4919/** 4920 * xmlParseEntityRef: 4921 * @ctxt: an XML parser context 4922 * 4923 * parse ENTITY references declarations 4924 * 4925 * [68] EntityRef ::= '&' Name ';' 4926 * 4927 * [ WFC: Entity Declared ] 4928 * In a document without any DTD, a document with only an internal DTD 4929 * subset which contains no parameter entity references, or a document 4930 * with "standalone='yes'", the Name given in the entity reference 4931 * must match that in an entity declaration, except that well-formed 4932 * documents need not declare any of the following entities: amp, lt, 4933 * gt, apos, quot. The declaration of a parameter entity must precede 4934 * any reference to it. Similarly, the declaration of a general entity 4935 * must precede any reference to it which appears in a default value in an 4936 * attribute-list declaration. Note that if entities are declared in the 4937 * external subset or in external parameter entities, a non-validating 4938 * processor is not obligated to read and process their declarations; 4939 * for such documents, the rule that an entity must be declared is a 4940 * well-formedness constraint only if standalone='yes'. 4941 * 4942 * [ WFC: Parsed Entity ] 4943 * An entity reference must not contain the name of an unparsed entity 4944 * 4945 * Returns the xmlEntityPtr if found, or NULL otherwise. 4946 */ 4947xmlEntityPtr 4948xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 4949 xmlChar *name; 4950 xmlEntityPtr ent = NULL; 4951 4952 GROW; 4953 4954 if (RAW == '&') { 4955 NEXT; 4956 name = xmlParseName(ctxt); 4957 if (name == NULL) { 4958 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4960 ctxt->sax->error(ctxt->userData, 4961 "xmlParseEntityRef: no name\n"); 4962 ctxt->wellFormed = 0; 4963 ctxt->disableSAX = 1; 4964 } else { 4965 if (RAW == ';') { 4966 NEXT; 4967 /* 4968 * Ask first SAX for entity resolution, otherwise try the 4969 * predefined set. 4970 */ 4971 if (ctxt->sax != NULL) { 4972 if (ctxt->sax->getEntity != NULL) 4973 ent = ctxt->sax->getEntity(ctxt->userData, name); 4974 if (ent == NULL) 4975 ent = xmlGetPredefinedEntity(name); 4976 } 4977 /* 4978 * [ WFC: Entity Declared ] 4979 * In a document without any DTD, a document with only an 4980 * internal DTD subset which contains no parameter entity 4981 * references, or a document with "standalone='yes'", the 4982 * Name given in the entity reference must match that in an 4983 * entity declaration, except that well-formed documents 4984 * need not declare any of the following entities: amp, lt, 4985 * gt, apos, quot. 4986 * The declaration of a parameter entity must precede any 4987 * reference to it. 4988 * Similarly, the declaration of a general entity must 4989 * precede any reference to it which appears in a default 4990 * value in an attribute-list declaration. Note that if 4991 * entities are declared in the external subset or in 4992 * external parameter entities, a non-validating processor 4993 * is not obligated to read and process their declarations; 4994 * for such documents, the rule that an entity must be 4995 * declared is a well-formedness constraint only if 4996 * standalone='yes'. 4997 */ 4998 if (ent == NULL) { 4999 if ((ctxt->standalone == 1) || 5000 ((ctxt->hasExternalSubset == 0) && 5001 (ctxt->hasPErefs == 0))) { 5002 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5004 ctxt->sax->error(ctxt->userData, 5005 "Entity '%s' not defined\n", name); 5006 ctxt->wellFormed = 0; 5007 ctxt->disableSAX = 1; 5008 } else { 5009 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5010 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5011 ctxt->sax->warning(ctxt->userData, 5012 "Entity '%s' not defined\n", name); 5013 } 5014 } 5015 5016 /* 5017 * [ WFC: Parsed Entity ] 5018 * An entity reference must not contain the name of an 5019 * unparsed entity 5020 */ 5021 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5022 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5024 ctxt->sax->error(ctxt->userData, 5025 "Entity reference to unparsed entity %s\n", name); 5026 ctxt->wellFormed = 0; 5027 ctxt->disableSAX = 1; 5028 } 5029 5030 /* 5031 * [ WFC: No External Entity References ] 5032 * Attribute values cannot contain direct or indirect 5033 * entity references to external entities. 5034 */ 5035 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5036 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5037 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5039 ctxt->sax->error(ctxt->userData, 5040 "Attribute references external entity '%s'\n", name); 5041 ctxt->wellFormed = 0; 5042 ctxt->disableSAX = 1; 5043 } 5044 /* 5045 * [ WFC: No < in Attribute Values ] 5046 * The replacement text of any entity referred to directly or 5047 * indirectly in an attribute value (other than "<") must 5048 * not contain a <. 5049 */ 5050 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5051 (ent != NULL) && 5052 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5053 (ent->content != NULL) && 5054 (xmlStrchr(ent->content, '<'))) { 5055 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5057 ctxt->sax->error(ctxt->userData, 5058 "'<' in entity '%s' is not allowed in attributes values\n", name); 5059 ctxt->wellFormed = 0; 5060 ctxt->disableSAX = 1; 5061 } 5062 5063 /* 5064 * Internal check, no parameter entities here ... 5065 */ 5066 else { 5067 switch (ent->etype) { 5068 case XML_INTERNAL_PARAMETER_ENTITY: 5069 case XML_EXTERNAL_PARAMETER_ENTITY: 5070 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5072 ctxt->sax->error(ctxt->userData, 5073 "Attempt to reference the parameter entity '%s'\n", name); 5074 ctxt->wellFormed = 0; 5075 ctxt->disableSAX = 1; 5076 break; 5077 default: 5078 break; 5079 } 5080 } 5081 5082 /* 5083 * [ WFC: No Recursion ] 5084 * A parsed entity must not contain a recursive reference 5085 * to itself, either directly or indirectly. 5086 * Done somewhere else 5087 */ 5088 5089 } else { 5090 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5092 ctxt->sax->error(ctxt->userData, 5093 "xmlParseEntityRef: expecting ';'\n"); 5094 ctxt->wellFormed = 0; 5095 ctxt->disableSAX = 1; 5096 } 5097 xmlFree(name); 5098 } 5099 } 5100 return(ent); 5101} 5102 5103/** 5104 * xmlParseStringEntityRef: 5105 * @ctxt: an XML parser context 5106 * @str: a pointer to an index in the string 5107 * 5108 * parse ENTITY references declarations, but this version parses it from 5109 * a string value. 5110 * 5111 * [68] EntityRef ::= '&' Name ';' 5112 * 5113 * [ WFC: Entity Declared ] 5114 * In a document without any DTD, a document with only an internal DTD 5115 * subset which contains no parameter entity references, or a document 5116 * with "standalone='yes'", the Name given in the entity reference 5117 * must match that in an entity declaration, except that well-formed 5118 * documents need not declare any of the following entities: amp, lt, 5119 * gt, apos, quot. The declaration of a parameter entity must precede 5120 * any reference to it. Similarly, the declaration of a general entity 5121 * must precede any reference to it which appears in a default value in an 5122 * attribute-list declaration. Note that if entities are declared in the 5123 * external subset or in external parameter entities, a non-validating 5124 * processor is not obligated to read and process their declarations; 5125 * for such documents, the rule that an entity must be declared is a 5126 * well-formedness constraint only if standalone='yes'. 5127 * 5128 * [ WFC: Parsed Entity ] 5129 * An entity reference must not contain the name of an unparsed entity 5130 * 5131 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5132 * is updated to the current location in the string. 5133 */ 5134xmlEntityPtr 5135xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5136 xmlChar *name; 5137 const xmlChar *ptr; 5138 xmlChar cur; 5139 xmlEntityPtr ent = NULL; 5140 5141 if ((str == NULL) || (*str == NULL)) 5142 return(NULL); 5143 ptr = *str; 5144 cur = *ptr; 5145 if (cur == '&') { 5146 ptr++; 5147 cur = *ptr; 5148 name = xmlParseStringName(ctxt, &ptr); 5149 if (name == NULL) { 5150 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5152 ctxt->sax->error(ctxt->userData, 5153 "xmlParseEntityRef: no name\n"); 5154 ctxt->wellFormed = 0; 5155 ctxt->disableSAX = 1; 5156 } else { 5157 if (*ptr == ';') { 5158 ptr++; 5159 /* 5160 * Ask first SAX for entity resolution, otherwise try the 5161 * predefined set. 5162 */ 5163 if (ctxt->sax != NULL) { 5164 if (ctxt->sax->getEntity != NULL) 5165 ent = ctxt->sax->getEntity(ctxt->userData, name); 5166 if (ent == NULL) 5167 ent = xmlGetPredefinedEntity(name); 5168 } 5169 /* 5170 * [ WFC: Entity Declared ] 5171 * In a document without any DTD, a document with only an 5172 * internal DTD subset which contains no parameter entity 5173 * references, or a document with "standalone='yes'", the 5174 * Name given in the entity reference must match that in an 5175 * entity declaration, except that well-formed documents 5176 * need not declare any of the following entities: amp, lt, 5177 * gt, apos, quot. 5178 * The declaration of a parameter entity must precede any 5179 * reference to it. 5180 * Similarly, the declaration of a general entity must 5181 * precede any reference to it which appears in a default 5182 * value in an attribute-list declaration. Note that if 5183 * entities are declared in the external subset or in 5184 * external parameter entities, a non-validating processor 5185 * is not obligated to read and process their declarations; 5186 * for such documents, the rule that an entity must be 5187 * declared is a well-formedness constraint only if 5188 * standalone='yes'. 5189 */ 5190 if (ent == NULL) { 5191 if ((ctxt->standalone == 1) || 5192 ((ctxt->hasExternalSubset == 0) && 5193 (ctxt->hasPErefs == 0))) { 5194 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5196 ctxt->sax->error(ctxt->userData, 5197 "Entity '%s' not defined\n", name); 5198 ctxt->wellFormed = 0; 5199 ctxt->disableSAX = 1; 5200 } else { 5201 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5202 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5203 ctxt->sax->warning(ctxt->userData, 5204 "Entity '%s' not defined\n", name); 5205 } 5206 } 5207 5208 /* 5209 * [ WFC: Parsed Entity ] 5210 * An entity reference must not contain the name of an 5211 * unparsed entity 5212 */ 5213 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5214 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5216 ctxt->sax->error(ctxt->userData, 5217 "Entity reference to unparsed entity %s\n", name); 5218 ctxt->wellFormed = 0; 5219 ctxt->disableSAX = 1; 5220 } 5221 5222 /* 5223 * [ WFC: No External Entity References ] 5224 * Attribute values cannot contain direct or indirect 5225 * entity references to external entities. 5226 */ 5227 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5228 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5229 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5231 ctxt->sax->error(ctxt->userData, 5232 "Attribute references external entity '%s'\n", name); 5233 ctxt->wellFormed = 0; 5234 ctxt->disableSAX = 1; 5235 } 5236 /* 5237 * [ WFC: No < in Attribute Values ] 5238 * The replacement text of any entity referred to directly or 5239 * indirectly in an attribute value (other than "<") must 5240 * not contain a <. 5241 */ 5242 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5243 (ent != NULL) && 5244 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5245 (ent->content != NULL) && 5246 (xmlStrchr(ent->content, '<'))) { 5247 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5249 ctxt->sax->error(ctxt->userData, 5250 "'<' in entity '%s' is not allowed in attributes values\n", name); 5251 ctxt->wellFormed = 0; 5252 ctxt->disableSAX = 1; 5253 } 5254 5255 /* 5256 * Internal check, no parameter entities here ... 5257 */ 5258 else { 5259 switch (ent->etype) { 5260 case XML_INTERNAL_PARAMETER_ENTITY: 5261 case XML_EXTERNAL_PARAMETER_ENTITY: 5262 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5264 ctxt->sax->error(ctxt->userData, 5265 "Attempt to reference the parameter entity '%s'\n", name); 5266 ctxt->wellFormed = 0; 5267 ctxt->disableSAX = 1; 5268 break; 5269 default: 5270 break; 5271 } 5272 } 5273 5274 /* 5275 * [ WFC: No Recursion ] 5276 * A parsed entity must not contain a recursive reference 5277 * to itself, either directly or indirectly. 5278 * Done somewhwere else 5279 */ 5280 5281 } else { 5282 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5284 ctxt->sax->error(ctxt->userData, 5285 "xmlParseEntityRef: expecting ';'\n"); 5286 ctxt->wellFormed = 0; 5287 ctxt->disableSAX = 1; 5288 } 5289 xmlFree(name); 5290 } 5291 } 5292 *str = ptr; 5293 return(ent); 5294} 5295 5296/** 5297 * xmlParsePEReference: 5298 * @ctxt: an XML parser context 5299 * 5300 * parse PEReference declarations 5301 * The entity content is handled directly by pushing it's content as 5302 * a new input stream. 5303 * 5304 * [69] PEReference ::= '%' Name ';' 5305 * 5306 * [ WFC: No Recursion ] 5307 * A parsed entity must not contain a recursive 5308 * reference to itself, either directly or indirectly. 5309 * 5310 * [ WFC: Entity Declared ] 5311 * In a document without any DTD, a document with only an internal DTD 5312 * subset which contains no parameter entity references, or a document 5313 * with "standalone='yes'", ... ... The declaration of a parameter 5314 * entity must precede any reference to it... 5315 * 5316 * [ VC: Entity Declared ] 5317 * In a document with an external subset or external parameter entities 5318 * with "standalone='no'", ... ... The declaration of a parameter entity 5319 * must precede any reference to it... 5320 * 5321 * [ WFC: In DTD ] 5322 * Parameter-entity references may only appear in the DTD. 5323 * NOTE: misleading but this is handled. 5324 */ 5325void 5326xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5327 xmlChar *name; 5328 xmlEntityPtr entity = NULL; 5329 xmlParserInputPtr input; 5330 5331 if (RAW == '%') { 5332 NEXT; 5333 name = xmlParseName(ctxt); 5334 if (name == NULL) { 5335 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5337 ctxt->sax->error(ctxt->userData, 5338 "xmlParsePEReference: no name\n"); 5339 ctxt->wellFormed = 0; 5340 ctxt->disableSAX = 1; 5341 } else { 5342 if (RAW == ';') { 5343 NEXT; 5344 if ((ctxt->sax != NULL) && 5345 (ctxt->sax->getParameterEntity != NULL)) 5346 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5347 name); 5348 if (entity == NULL) { 5349 /* 5350 * [ WFC: Entity Declared ] 5351 * In a document without any DTD, a document with only an 5352 * internal DTD subset which contains no parameter entity 5353 * references, or a document with "standalone='yes'", ... 5354 * ... The declaration of a parameter entity must precede 5355 * any reference to it... 5356 */ 5357 if ((ctxt->standalone == 1) || 5358 ((ctxt->hasExternalSubset == 0) && 5359 (ctxt->hasPErefs == 0))) { 5360 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5361 if ((!ctxt->disableSAX) && 5362 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5363 ctxt->sax->error(ctxt->userData, 5364 "PEReference: %%%s; not found\n", name); 5365 ctxt->wellFormed = 0; 5366 ctxt->disableSAX = 1; 5367 } else { 5368 /* 5369 * [ VC: Entity Declared ] 5370 * In a document with an external subset or external 5371 * parameter entities with "standalone='no'", ... 5372 * ... The declaration of a parameter entity must precede 5373 * any reference to it... 5374 */ 5375 if ((!ctxt->disableSAX) && 5376 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5377 ctxt->sax->warning(ctxt->userData, 5378 "PEReference: %%%s; not found\n", name); 5379 ctxt->valid = 0; 5380 } 5381 } else { 5382 /* 5383 * Internal checking in case the entity quest barfed 5384 */ 5385 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5386 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5387 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5388 ctxt->sax->warning(ctxt->userData, 5389 "Internal: %%%s; is not a parameter entity\n", name); 5390 } else { 5391 /* 5392 * TODO !!! 5393 * handle the extra spaces added before and after 5394 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5395 */ 5396 input = xmlNewEntityInputStream(ctxt, entity); 5397 xmlPushInput(ctxt, input); 5398 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5399 (RAW == '<') && (NXT(1) == '?') && 5400 (NXT(2) == 'x') && (NXT(3) == 'm') && 5401 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5402 xmlParseTextDecl(ctxt); 5403 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5404 /* 5405 * The XML REC instructs us to stop parsing 5406 * right here 5407 */ 5408 ctxt->instate = XML_PARSER_EOF; 5409 xmlFree(name); 5410 return; 5411 } 5412 } 5413 if (ctxt->token == 0) 5414 ctxt->token = ' '; 5415 } 5416 } 5417 ctxt->hasPErefs = 1; 5418 } else { 5419 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5421 ctxt->sax->error(ctxt->userData, 5422 "xmlParsePEReference: expecting ';'\n"); 5423 ctxt->wellFormed = 0; 5424 ctxt->disableSAX = 1; 5425 } 5426 xmlFree(name); 5427 } 5428 } 5429} 5430 5431/** 5432 * xmlParseStringPEReference: 5433 * @ctxt: an XML parser context 5434 * @str: a pointer to an index in the string 5435 * 5436 * parse PEReference declarations 5437 * 5438 * [69] PEReference ::= '%' Name ';' 5439 * 5440 * [ WFC: No Recursion ] 5441 * A parsed entity must not contain a recursive 5442 * reference to itself, either directly or indirectly. 5443 * 5444 * [ WFC: Entity Declared ] 5445 * In a document without any DTD, a document with only an internal DTD 5446 * subset which contains no parameter entity references, or a document 5447 * with "standalone='yes'", ... ... The declaration of a parameter 5448 * entity must precede any reference to it... 5449 * 5450 * [ VC: Entity Declared ] 5451 * In a document with an external subset or external parameter entities 5452 * with "standalone='no'", ... ... The declaration of a parameter entity 5453 * must precede any reference to it... 5454 * 5455 * [ WFC: In DTD ] 5456 * Parameter-entity references may only appear in the DTD. 5457 * NOTE: misleading but this is handled. 5458 * 5459 * Returns the string of the entity content. 5460 * str is updated to the current value of the index 5461 */ 5462xmlEntityPtr 5463xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5464 const xmlChar *ptr; 5465 xmlChar cur; 5466 xmlChar *name; 5467 xmlEntityPtr entity = NULL; 5468 5469 if ((str == NULL) || (*str == NULL)) return(NULL); 5470 ptr = *str; 5471 cur = *ptr; 5472 if (cur == '%') { 5473 ptr++; 5474 cur = *ptr; 5475 name = xmlParseStringName(ctxt, &ptr); 5476 if (name == NULL) { 5477 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5479 ctxt->sax->error(ctxt->userData, 5480 "xmlParseStringPEReference: no name\n"); 5481 ctxt->wellFormed = 0; 5482 ctxt->disableSAX = 1; 5483 } else { 5484 cur = *ptr; 5485 if (cur == ';') { 5486 ptr++; 5487 cur = *ptr; 5488 if ((ctxt->sax != NULL) && 5489 (ctxt->sax->getParameterEntity != NULL)) 5490 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5491 name); 5492 if (entity == NULL) { 5493 /* 5494 * [ WFC: Entity Declared ] 5495 * In a document without any DTD, a document with only an 5496 * internal DTD subset which contains no parameter entity 5497 * references, or a document with "standalone='yes'", ... 5498 * ... The declaration of a parameter entity must precede 5499 * any reference to it... 5500 */ 5501 if ((ctxt->standalone == 1) || 5502 ((ctxt->hasExternalSubset == 0) && 5503 (ctxt->hasPErefs == 0))) { 5504 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5506 ctxt->sax->error(ctxt->userData, 5507 "PEReference: %%%s; not found\n", name); 5508 ctxt->wellFormed = 0; 5509 ctxt->disableSAX = 1; 5510 } else { 5511 /* 5512 * [ VC: Entity Declared ] 5513 * In a document with an external subset or external 5514 * parameter entities with "standalone='no'", ... 5515 * ... The declaration of a parameter entity must 5516 * precede any reference to it... 5517 */ 5518 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5519 ctxt->sax->warning(ctxt->userData, 5520 "PEReference: %%%s; not found\n", name); 5521 ctxt->valid = 0; 5522 } 5523 } else { 5524 /* 5525 * Internal checking in case the entity quest barfed 5526 */ 5527 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5528 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5529 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5530 ctxt->sax->warning(ctxt->userData, 5531 "Internal: %%%s; is not a parameter entity\n", name); 5532 } 5533 } 5534 ctxt->hasPErefs = 1; 5535 } else { 5536 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5538 ctxt->sax->error(ctxt->userData, 5539 "xmlParseStringPEReference: expecting ';'\n"); 5540 ctxt->wellFormed = 0; 5541 ctxt->disableSAX = 1; 5542 } 5543 xmlFree(name); 5544 } 5545 } 5546 *str = ptr; 5547 return(entity); 5548} 5549 5550/** 5551 * xmlParseDocTypeDecl: 5552 * @ctxt: an XML parser context 5553 * 5554 * parse a DOCTYPE declaration 5555 * 5556 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5557 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5558 * 5559 * [ VC: Root Element Type ] 5560 * The Name in the document type declaration must match the element 5561 * type of the root element. 5562 */ 5563 5564void 5565xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5566 xmlChar *name = NULL; 5567 xmlChar *ExternalID = NULL; 5568 xmlChar *URI = NULL; 5569 5570 /* 5571 * We know that '<!DOCTYPE' has been detected. 5572 */ 5573 SKIP(9); 5574 5575 SKIP_BLANKS; 5576 5577 /* 5578 * Parse the DOCTYPE name. 5579 */ 5580 name = xmlParseName(ctxt); 5581 if (name == NULL) { 5582 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5584 ctxt->sax->error(ctxt->userData, 5585 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5586 ctxt->wellFormed = 0; 5587 ctxt->disableSAX = 1; 5588 } 5589 ctxt->intSubName = name; 5590 5591 SKIP_BLANKS; 5592 5593 /* 5594 * Check for SystemID and ExternalID 5595 */ 5596 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 5597 5598 if ((URI != NULL) || (ExternalID != NULL)) { 5599 ctxt->hasExternalSubset = 1; 5600 } 5601 ctxt->extSubURI = URI; 5602 ctxt->extSubSystem = ExternalID; 5603 5604 SKIP_BLANKS; 5605 5606 /* 5607 * Create and update the internal subset. 5608 */ 5609 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 5610 (!ctxt->disableSAX)) 5611 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 5612 5613 /* 5614 * Is there any internal subset declarations ? 5615 * they are handled separately in xmlParseInternalSubset() 5616 */ 5617 if (RAW == '[') 5618 return; 5619 5620 /* 5621 * We should be at the end of the DOCTYPE declaration. 5622 */ 5623 if (RAW != '>') { 5624 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5626 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5627 ctxt->wellFormed = 0; 5628 ctxt->disableSAX = 1; 5629 } 5630 NEXT; 5631} 5632 5633/** 5634 * xmlParseInternalsubset: 5635 * @ctxt: an XML parser context 5636 * 5637 * parse the internal subset declaration 5638 * 5639 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5640 */ 5641 5642void 5643xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 5644 /* 5645 * Is there any DTD definition ? 5646 */ 5647 if (RAW == '[') { 5648 ctxt->instate = XML_PARSER_DTD; 5649 NEXT; 5650 /* 5651 * Parse the succession of Markup declarations and 5652 * PEReferences. 5653 * Subsequence (markupdecl | PEReference | S)* 5654 */ 5655 while (RAW != ']') { 5656 const xmlChar *check = CUR_PTR; 5657 int cons = ctxt->input->consumed; 5658 5659 SKIP_BLANKS; 5660 xmlParseMarkupDecl(ctxt); 5661 xmlParsePEReference(ctxt); 5662 5663 /* 5664 * Pop-up of finished entities. 5665 */ 5666 while ((RAW == 0) && (ctxt->inputNr > 1)) 5667 xmlPopInput(ctxt); 5668 5669 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5670 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5672 ctxt->sax->error(ctxt->userData, 5673 "xmlParseInternalSubset: error detected in Markup declaration\n"); 5674 ctxt->wellFormed = 0; 5675 ctxt->disableSAX = 1; 5676 break; 5677 } 5678 } 5679 if (RAW == ']') { 5680 NEXT; 5681 SKIP_BLANKS; 5682 } 5683 } 5684 5685 /* 5686 * We should be at the end of the DOCTYPE declaration. 5687 */ 5688 if (RAW != '>') { 5689 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5691 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5692 ctxt->wellFormed = 0; 5693 ctxt->disableSAX = 1; 5694 } 5695 NEXT; 5696} 5697 5698/** 5699 * xmlParseAttribute: 5700 * @ctxt: an XML parser context 5701 * @value: a xmlChar ** used to store the value of the attribute 5702 * 5703 * parse an attribute 5704 * 5705 * [41] Attribute ::= Name Eq AttValue 5706 * 5707 * [ WFC: No External Entity References ] 5708 * Attribute values cannot contain direct or indirect entity references 5709 * to external entities. 5710 * 5711 * [ WFC: No < in Attribute Values ] 5712 * The replacement text of any entity referred to directly or indirectly in 5713 * an attribute value (other than "<") must not contain a <. 5714 * 5715 * [ VC: Attribute Value Type ] 5716 * The attribute must have been declared; the value must be of the type 5717 * declared for it. 5718 * 5719 * [25] Eq ::= S? '=' S? 5720 * 5721 * With namespace: 5722 * 5723 * [NS 11] Attribute ::= QName Eq AttValue 5724 * 5725 * Also the case QName == xmlns:??? is handled independently as a namespace 5726 * definition. 5727 * 5728 * Returns the attribute name, and the value in *value. 5729 */ 5730 5731xmlChar * 5732xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 5733 xmlChar *name, *val; 5734 5735 *value = NULL; 5736 name = xmlParseName(ctxt); 5737 if (name == NULL) { 5738 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5740 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 5741 ctxt->wellFormed = 0; 5742 ctxt->disableSAX = 1; 5743 return(NULL); 5744 } 5745 5746 /* 5747 * read the value 5748 */ 5749 SKIP_BLANKS; 5750 if (RAW == '=') { 5751 NEXT; 5752 SKIP_BLANKS; 5753 val = xmlParseAttValue(ctxt); 5754 ctxt->instate = XML_PARSER_CONTENT; 5755 } else { 5756 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5758 ctxt->sax->error(ctxt->userData, 5759 "Specification mandate value for attribute %s\n", name); 5760 ctxt->wellFormed = 0; 5761 ctxt->disableSAX = 1; 5762 xmlFree(name); 5763 return(NULL); 5764 } 5765 5766 /* 5767 * Check that xml:lang conforms to the specification 5768 * No more registered as an error, just generate a warning now 5769 * since this was deprecated in XML second edition 5770 */ 5771 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 5772 if (!xmlCheckLanguageID(val)) { 5773 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5774 ctxt->sax->warning(ctxt->userData, 5775 "Malformed value for xml:lang : %s\n", val); 5776 } 5777 } 5778 5779 /* 5780 * Check that xml:space conforms to the specification 5781 */ 5782 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 5783 if (xmlStrEqual(val, BAD_CAST "default")) 5784 *(ctxt->space) = 0; 5785 else if (xmlStrEqual(val, BAD_CAST "preserve")) 5786 *(ctxt->space) = 1; 5787 else { 5788 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5790 ctxt->sax->error(ctxt->userData, 5791"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 5792 val); 5793 ctxt->wellFormed = 0; 5794 ctxt->disableSAX = 1; 5795 } 5796 } 5797 5798 *value = val; 5799 return(name); 5800} 5801 5802/** 5803 * xmlParseStartTag: 5804 * @ctxt: an XML parser context 5805 * 5806 * parse a start of tag either for rule element or 5807 * EmptyElement. In both case we don't parse the tag closing chars. 5808 * 5809 * [40] STag ::= '<' Name (S Attribute)* S? '>' 5810 * 5811 * [ WFC: Unique Att Spec ] 5812 * No attribute name may appear more than once in the same start-tag or 5813 * empty-element tag. 5814 * 5815 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 5816 * 5817 * [ WFC: Unique Att Spec ] 5818 * No attribute name may appear more than once in the same start-tag or 5819 * empty-element tag. 5820 * 5821 * With namespace: 5822 * 5823 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 5824 * 5825 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 5826 * 5827 * Returns the element name parsed 5828 */ 5829 5830xmlChar * 5831xmlParseStartTag(xmlParserCtxtPtr ctxt) { 5832 xmlChar *name; 5833 xmlChar *attname; 5834 xmlChar *attvalue; 5835 const xmlChar **atts = NULL; 5836 int nbatts = 0; 5837 int maxatts = 0; 5838 int i; 5839 5840 if (RAW != '<') return(NULL); 5841 NEXT; 5842 5843 name = xmlParseName(ctxt); 5844 if (name == NULL) { 5845 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5847 ctxt->sax->error(ctxt->userData, 5848 "xmlParseStartTag: invalid element name\n"); 5849 ctxt->wellFormed = 0; 5850 ctxt->disableSAX = 1; 5851 return(NULL); 5852 } 5853 5854 /* 5855 * Now parse the attributes, it ends up with the ending 5856 * 5857 * (S Attribute)* S? 5858 */ 5859 SKIP_BLANKS; 5860 GROW; 5861 5862 while ((IS_CHAR(RAW)) && 5863 (RAW != '>') && 5864 ((RAW != '/') || (NXT(1) != '>'))) { 5865 const xmlChar *q = CUR_PTR; 5866 int cons = ctxt->input->consumed; 5867 5868 attname = xmlParseAttribute(ctxt, &attvalue); 5869 if ((attname != NULL) && (attvalue != NULL)) { 5870 /* 5871 * [ WFC: Unique Att Spec ] 5872 * No attribute name may appear more than once in the same 5873 * start-tag or empty-element tag. 5874 */ 5875 for (i = 0; i < nbatts;i += 2) { 5876 if (xmlStrEqual(atts[i], attname)) { 5877 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5879 ctxt->sax->error(ctxt->userData, 5880 "Attribute %s redefined\n", 5881 attname); 5882 ctxt->wellFormed = 0; 5883 ctxt->disableSAX = 1; 5884 xmlFree(attname); 5885 xmlFree(attvalue); 5886 goto failed; 5887 } 5888 } 5889 5890 /* 5891 * Add the pair to atts 5892 */ 5893 if (atts == NULL) { 5894 maxatts = 10; 5895 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 5896 if (atts == NULL) { 5897 xmlGenericError(xmlGenericErrorContext, 5898 "malloc of %ld byte failed\n", 5899 maxatts * (long)sizeof(xmlChar *)); 5900 return(NULL); 5901 } 5902 } else if (nbatts + 4 > maxatts) { 5903 maxatts *= 2; 5904 atts = (const xmlChar **) xmlRealloc((void *) atts, 5905 maxatts * sizeof(xmlChar *)); 5906 if (atts == NULL) { 5907 xmlGenericError(xmlGenericErrorContext, 5908 "realloc of %ld byte failed\n", 5909 maxatts * (long)sizeof(xmlChar *)); 5910 return(NULL); 5911 } 5912 } 5913 atts[nbatts++] = attname; 5914 atts[nbatts++] = attvalue; 5915 atts[nbatts] = NULL; 5916 atts[nbatts + 1] = NULL; 5917 } else { 5918 if (attname != NULL) 5919 xmlFree(attname); 5920 if (attvalue != NULL) 5921 xmlFree(attvalue); 5922 } 5923 5924failed: 5925 5926 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 5927 break; 5928 if (!IS_BLANK(RAW)) { 5929 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5931 ctxt->sax->error(ctxt->userData, 5932 "attributes construct error\n"); 5933 ctxt->wellFormed = 0; 5934 ctxt->disableSAX = 1; 5935 } 5936 SKIP_BLANKS; 5937 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 5938 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5940 ctxt->sax->error(ctxt->userData, 5941 "xmlParseStartTag: problem parsing attributes\n"); 5942 ctxt->wellFormed = 0; 5943 ctxt->disableSAX = 1; 5944 break; 5945 } 5946 GROW; 5947 } 5948 5949 /* 5950 * SAX: Start of Element ! 5951 */ 5952 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 5953 (!ctxt->disableSAX)) 5954 ctxt->sax->startElement(ctxt->userData, name, atts); 5955 5956 if (atts != NULL) { 5957 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 5958 xmlFree((void *) atts); 5959 } 5960 return(name); 5961} 5962 5963/** 5964 * xmlParseEndTag: 5965 * @ctxt: an XML parser context 5966 * 5967 * parse an end of tag 5968 * 5969 * [42] ETag ::= '</' Name S? '>' 5970 * 5971 * With namespace 5972 * 5973 * [NS 9] ETag ::= '</' QName S? '>' 5974 */ 5975 5976void 5977xmlParseEndTag(xmlParserCtxtPtr ctxt) { 5978 xmlChar *name; 5979 xmlChar *oldname; 5980 5981 GROW; 5982 if ((RAW != '<') || (NXT(1) != '/')) { 5983 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 5984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5985 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 5986 ctxt->wellFormed = 0; 5987 ctxt->disableSAX = 1; 5988 return; 5989 } 5990 SKIP(2); 5991 5992 name = xmlParseName(ctxt); 5993 5994 /* 5995 * We should definitely be at the ending "S? '>'" part 5996 */ 5997 GROW; 5998 SKIP_BLANKS; 5999 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6000 ctxt->errNo = XML_ERR_GT_REQUIRED; 6001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6002 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6003 ctxt->wellFormed = 0; 6004 ctxt->disableSAX = 1; 6005 } else 6006 NEXT; 6007 6008 /* 6009 * [ WFC: Element Type Match ] 6010 * The Name in an element's end-tag must match the element type in the 6011 * start-tag. 6012 * 6013 */ 6014 if ((name == NULL) || (ctxt->name == NULL) || 6015 (!xmlStrEqual(name, ctxt->name))) { 6016 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6018 if ((name != NULL) && (ctxt->name != NULL)) { 6019 ctxt->sax->error(ctxt->userData, 6020 "Opening and ending tag mismatch: %s and %s\n", 6021 ctxt->name, name); 6022 } else if (ctxt->name != NULL) { 6023 ctxt->sax->error(ctxt->userData, 6024 "Ending tag eror for: %s\n", ctxt->name); 6025 } else { 6026 ctxt->sax->error(ctxt->userData, 6027 "Ending tag error: internal error ???\n"); 6028 } 6029 6030 } 6031 ctxt->wellFormed = 0; 6032 ctxt->disableSAX = 1; 6033 } 6034 6035 /* 6036 * SAX: End of Tag 6037 */ 6038 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6039 (!ctxt->disableSAX)) 6040 ctxt->sax->endElement(ctxt->userData, name); 6041 6042 if (name != NULL) 6043 xmlFree(name); 6044 oldname = namePop(ctxt); 6045 spacePop(ctxt); 6046 if (oldname != NULL) { 6047#ifdef DEBUG_STACK 6048 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6049#endif 6050 xmlFree(oldname); 6051 } 6052 return; 6053} 6054 6055/** 6056 * xmlParseCDSect: 6057 * @ctxt: an XML parser context 6058 * 6059 * Parse escaped pure raw content. 6060 * 6061 * [18] CDSect ::= CDStart CData CDEnd 6062 * 6063 * [19] CDStart ::= '<![CDATA[' 6064 * 6065 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6066 * 6067 * [21] CDEnd ::= ']]>' 6068 */ 6069void 6070xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6071 xmlChar *buf = NULL; 6072 int len = 0; 6073 int size = XML_PARSER_BUFFER_SIZE; 6074 int r, rl; 6075 int s, sl; 6076 int cur, l; 6077 int count = 0; 6078 6079 if ((NXT(0) == '<') && (NXT(1) == '!') && 6080 (NXT(2) == '[') && (NXT(3) == 'C') && 6081 (NXT(4) == 'D') && (NXT(5) == 'A') && 6082 (NXT(6) == 'T') && (NXT(7) == 'A') && 6083 (NXT(8) == '[')) { 6084 SKIP(9); 6085 } else 6086 return; 6087 6088 ctxt->instate = XML_PARSER_CDATA_SECTION; 6089 r = CUR_CHAR(rl); 6090 if (!IS_CHAR(r)) { 6091 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6093 ctxt->sax->error(ctxt->userData, 6094 "CData section not finished\n"); 6095 ctxt->wellFormed = 0; 6096 ctxt->disableSAX = 1; 6097 ctxt->instate = XML_PARSER_CONTENT; 6098 return; 6099 } 6100 NEXTL(rl); 6101 s = CUR_CHAR(sl); 6102 if (!IS_CHAR(s)) { 6103 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6105 ctxt->sax->error(ctxt->userData, 6106 "CData section not finished\n"); 6107 ctxt->wellFormed = 0; 6108 ctxt->disableSAX = 1; 6109 ctxt->instate = XML_PARSER_CONTENT; 6110 return; 6111 } 6112 NEXTL(sl); 6113 cur = CUR_CHAR(l); 6114 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6115 if (buf == NULL) { 6116 xmlGenericError(xmlGenericErrorContext, 6117 "malloc of %d byte failed\n", size); 6118 return; 6119 } 6120 while (IS_CHAR(cur) && 6121 ((r != ']') || (s != ']') || (cur != '>'))) { 6122 if (len + 5 >= size) { 6123 size *= 2; 6124 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6125 if (buf == NULL) { 6126 xmlGenericError(xmlGenericErrorContext, 6127 "realloc of %d byte failed\n", size); 6128 return; 6129 } 6130 } 6131 COPY_BUF(rl,buf,len,r); 6132 r = s; 6133 rl = sl; 6134 s = cur; 6135 sl = l; 6136 count++; 6137 if (count > 50) { 6138 GROW; 6139 count = 0; 6140 } 6141 NEXTL(l); 6142 cur = CUR_CHAR(l); 6143 } 6144 buf[len] = 0; 6145 ctxt->instate = XML_PARSER_CONTENT; 6146 if (cur != '>') { 6147 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6149 ctxt->sax->error(ctxt->userData, 6150 "CData section not finished\n%.50s\n", buf); 6151 ctxt->wellFormed = 0; 6152 ctxt->disableSAX = 1; 6153 xmlFree(buf); 6154 return; 6155 } 6156 NEXTL(l); 6157 6158 /* 6159 * Ok the buffer is to be consumed as cdata. 6160 */ 6161 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6162 if (ctxt->sax->cdataBlock != NULL) 6163 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6164 } 6165 xmlFree(buf); 6166} 6167 6168/** 6169 * xmlParseContent: 6170 * @ctxt: an XML parser context 6171 * 6172 * Parse a content: 6173 * 6174 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6175 */ 6176 6177void 6178xmlParseContent(xmlParserCtxtPtr ctxt) { 6179 GROW; 6180 while (((RAW != 0) || (ctxt->token != 0)) && 6181 ((RAW != '<') || (NXT(1) != '/'))) { 6182 const xmlChar *test = CUR_PTR; 6183 int cons = ctxt->input->consumed; 6184 xmlChar tok = ctxt->token; 6185 6186 /* 6187 * Handle possible processed charrefs. 6188 */ 6189 if (ctxt->token != 0) { 6190 xmlParseCharData(ctxt, 0); 6191 } 6192 /* 6193 * First case : a Processing Instruction. 6194 */ 6195 else if ((RAW == '<') && (NXT(1) == '?')) { 6196 xmlParsePI(ctxt); 6197 } 6198 6199 /* 6200 * Second case : a CDSection 6201 */ 6202 else if ((RAW == '<') && (NXT(1) == '!') && 6203 (NXT(2) == '[') && (NXT(3) == 'C') && 6204 (NXT(4) == 'D') && (NXT(5) == 'A') && 6205 (NXT(6) == 'T') && (NXT(7) == 'A') && 6206 (NXT(8) == '[')) { 6207 xmlParseCDSect(ctxt); 6208 } 6209 6210 /* 6211 * Third case : a comment 6212 */ 6213 else if ((RAW == '<') && (NXT(1) == '!') && 6214 (NXT(2) == '-') && (NXT(3) == '-')) { 6215 xmlParseComment(ctxt); 6216 ctxt->instate = XML_PARSER_CONTENT; 6217 } 6218 6219 /* 6220 * Fourth case : a sub-element. 6221 */ 6222 else if (RAW == '<') { 6223 xmlParseElement(ctxt); 6224 } 6225 6226 /* 6227 * Fifth case : a reference. If if has not been resolved, 6228 * parsing returns it's Name, create the node 6229 */ 6230 6231 else if (RAW == '&') { 6232 xmlParseReference(ctxt); 6233 } 6234 6235 /* 6236 * Last case, text. Note that References are handled directly. 6237 */ 6238 else { 6239 xmlParseCharData(ctxt, 0); 6240 } 6241 6242 GROW; 6243 /* 6244 * Pop-up of finished entities. 6245 */ 6246 while ((RAW == 0) && (ctxt->inputNr > 1)) 6247 xmlPopInput(ctxt); 6248 SHRINK; 6249 6250 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6251 (tok == ctxt->token)) { 6252 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6254 ctxt->sax->error(ctxt->userData, 6255 "detected an error in element content\n"); 6256 ctxt->wellFormed = 0; 6257 ctxt->disableSAX = 1; 6258 ctxt->instate = XML_PARSER_EOF; 6259 break; 6260 } 6261 } 6262} 6263 6264/** 6265 * xmlParseElement: 6266 * @ctxt: an XML parser context 6267 * 6268 * parse an XML element, this is highly recursive 6269 * 6270 * [39] element ::= EmptyElemTag | STag content ETag 6271 * 6272 * [ WFC: Element Type Match ] 6273 * The Name in an element's end-tag must match the element type in the 6274 * start-tag. 6275 * 6276 * [ VC: Element Valid ] 6277 * An element is valid if there is a declaration matching elementdecl 6278 * where the Name matches the element type and one of the following holds: 6279 * - The declaration matches EMPTY and the element has no content. 6280 * - The declaration matches children and the sequence of child elements 6281 * belongs to the language generated by the regular expression in the 6282 * content model, with optional white space (characters matching the 6283 * nonterminal S) between each pair of child elements. 6284 * - The declaration matches Mixed and the content consists of character 6285 * data and child elements whose types match names in the content model. 6286 * - The declaration matches ANY, and the types of any child elements have 6287 * been declared. 6288 */ 6289 6290void 6291xmlParseElement(xmlParserCtxtPtr ctxt) { 6292 const xmlChar *openTag = CUR_PTR; 6293 xmlChar *name; 6294 xmlChar *oldname; 6295 xmlParserNodeInfo node_info; 6296 xmlNodePtr ret; 6297 6298 /* Capture start position */ 6299 if (ctxt->record_info) { 6300 node_info.begin_pos = ctxt->input->consumed + 6301 (CUR_PTR - ctxt->input->base); 6302 node_info.begin_line = ctxt->input->line; 6303 } 6304 6305 if (ctxt->spaceNr == 0) 6306 spacePush(ctxt, -1); 6307 else 6308 spacePush(ctxt, *ctxt->space); 6309 6310 name = xmlParseStartTag(ctxt); 6311 if (name == NULL) { 6312 spacePop(ctxt); 6313 return; 6314 } 6315 namePush(ctxt, name); 6316 ret = ctxt->node; 6317 6318 /* 6319 * [ VC: Root Element Type ] 6320 * The Name in the document type declaration must match the element 6321 * type of the root element. 6322 */ 6323 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6324 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6325 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6326 6327 /* 6328 * Check for an Empty Element. 6329 */ 6330 if ((RAW == '/') && (NXT(1) == '>')) { 6331 SKIP(2); 6332 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6333 (!ctxt->disableSAX)) 6334 ctxt->sax->endElement(ctxt->userData, name); 6335 oldname = namePop(ctxt); 6336 spacePop(ctxt); 6337 if (oldname != NULL) { 6338#ifdef DEBUG_STACK 6339 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6340#endif 6341 xmlFree(oldname); 6342 } 6343 if ( ret != NULL && ctxt->record_info ) { 6344 node_info.end_pos = ctxt->input->consumed + 6345 (CUR_PTR - ctxt->input->base); 6346 node_info.end_line = ctxt->input->line; 6347 node_info.node = ret; 6348 xmlParserAddNodeInfo(ctxt, &node_info); 6349 } 6350 return; 6351 } 6352 if (RAW == '>') { 6353 NEXT; 6354 } else { 6355 ctxt->errNo = XML_ERR_GT_REQUIRED; 6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6357 ctxt->sax->error(ctxt->userData, 6358 "Couldn't find end of Start Tag\n%.30s\n", 6359 openTag); 6360 ctxt->wellFormed = 0; 6361 ctxt->disableSAX = 1; 6362 6363 /* 6364 * end of parsing of this node. 6365 */ 6366 nodePop(ctxt); 6367 oldname = namePop(ctxt); 6368 spacePop(ctxt); 6369 if (oldname != NULL) { 6370#ifdef DEBUG_STACK 6371 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6372#endif 6373 xmlFree(oldname); 6374 } 6375 6376 /* 6377 * Capture end position and add node 6378 */ 6379 if ( ret != NULL && ctxt->record_info ) { 6380 node_info.end_pos = ctxt->input->consumed + 6381 (CUR_PTR - ctxt->input->base); 6382 node_info.end_line = ctxt->input->line; 6383 node_info.node = ret; 6384 xmlParserAddNodeInfo(ctxt, &node_info); 6385 } 6386 return; 6387 } 6388 6389 /* 6390 * Parse the content of the element: 6391 */ 6392 xmlParseContent(ctxt); 6393 if (!IS_CHAR(RAW)) { 6394 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6396 ctxt->sax->error(ctxt->userData, 6397 "Premature end of data in tag %.30s\n", openTag); 6398 ctxt->wellFormed = 0; 6399 ctxt->disableSAX = 1; 6400 6401 /* 6402 * end of parsing of this node. 6403 */ 6404 nodePop(ctxt); 6405 oldname = namePop(ctxt); 6406 spacePop(ctxt); 6407 if (oldname != NULL) { 6408#ifdef DEBUG_STACK 6409 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6410#endif 6411 xmlFree(oldname); 6412 } 6413 return; 6414 } 6415 6416 /* 6417 * parse the end of tag: '</' should be here. 6418 */ 6419 xmlParseEndTag(ctxt); 6420 6421 /* 6422 * Capture end position and add node 6423 */ 6424 if ( ret != NULL && ctxt->record_info ) { 6425 node_info.end_pos = ctxt->input->consumed + 6426 (CUR_PTR - ctxt->input->base); 6427 node_info.end_line = ctxt->input->line; 6428 node_info.node = ret; 6429 xmlParserAddNodeInfo(ctxt, &node_info); 6430 } 6431} 6432 6433/** 6434 * xmlParseVersionNum: 6435 * @ctxt: an XML parser context 6436 * 6437 * parse the XML version value. 6438 * 6439 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6440 * 6441 * Returns the string giving the XML version number, or NULL 6442 */ 6443xmlChar * 6444xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6445 xmlChar *buf = NULL; 6446 int len = 0; 6447 int size = 10; 6448 xmlChar cur; 6449 6450 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6451 if (buf == NULL) { 6452 xmlGenericError(xmlGenericErrorContext, 6453 "malloc of %d byte failed\n", size); 6454 return(NULL); 6455 } 6456 cur = CUR; 6457 while (((cur >= 'a') && (cur <= 'z')) || 6458 ((cur >= 'A') && (cur <= 'Z')) || 6459 ((cur >= '0') && (cur <= '9')) || 6460 (cur == '_') || (cur == '.') || 6461 (cur == ':') || (cur == '-')) { 6462 if (len + 1 >= size) { 6463 size *= 2; 6464 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6465 if (buf == NULL) { 6466 xmlGenericError(xmlGenericErrorContext, 6467 "realloc of %d byte failed\n", size); 6468 return(NULL); 6469 } 6470 } 6471 buf[len++] = cur; 6472 NEXT; 6473 cur=CUR; 6474 } 6475 buf[len] = 0; 6476 return(buf); 6477} 6478 6479/** 6480 * xmlParseVersionInfo: 6481 * @ctxt: an XML parser context 6482 * 6483 * parse the XML version. 6484 * 6485 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6486 * 6487 * [25] Eq ::= S? '=' S? 6488 * 6489 * Returns the version string, e.g. "1.0" 6490 */ 6491 6492xmlChar * 6493xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6494 xmlChar *version = NULL; 6495 const xmlChar *q; 6496 6497 if ((RAW == 'v') && (NXT(1) == 'e') && 6498 (NXT(2) == 'r') && (NXT(3) == 's') && 6499 (NXT(4) == 'i') && (NXT(5) == 'o') && 6500 (NXT(6) == 'n')) { 6501 SKIP(7); 6502 SKIP_BLANKS; 6503 if (RAW != '=') { 6504 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6506 ctxt->sax->error(ctxt->userData, 6507 "xmlParseVersionInfo : expected '='\n"); 6508 ctxt->wellFormed = 0; 6509 ctxt->disableSAX = 1; 6510 return(NULL); 6511 } 6512 NEXT; 6513 SKIP_BLANKS; 6514 if (RAW == '"') { 6515 NEXT; 6516 q = CUR_PTR; 6517 version = xmlParseVersionNum(ctxt); 6518 if (RAW != '"') { 6519 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6521 ctxt->sax->error(ctxt->userData, 6522 "String not closed\n%.50s\n", q); 6523 ctxt->wellFormed = 0; 6524 ctxt->disableSAX = 1; 6525 } else 6526 NEXT; 6527 } else if (RAW == '\''){ 6528 NEXT; 6529 q = CUR_PTR; 6530 version = xmlParseVersionNum(ctxt); 6531 if (RAW != '\'') { 6532 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6534 ctxt->sax->error(ctxt->userData, 6535 "String not closed\n%.50s\n", q); 6536 ctxt->wellFormed = 0; 6537 ctxt->disableSAX = 1; 6538 } else 6539 NEXT; 6540 } else { 6541 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6543 ctxt->sax->error(ctxt->userData, 6544 "xmlParseVersionInfo : expected ' or \"\n"); 6545 ctxt->wellFormed = 0; 6546 ctxt->disableSAX = 1; 6547 } 6548 } 6549 return(version); 6550} 6551 6552/** 6553 * xmlParseEncName: 6554 * @ctxt: an XML parser context 6555 * 6556 * parse the XML encoding name 6557 * 6558 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6559 * 6560 * Returns the encoding name value or NULL 6561 */ 6562xmlChar * 6563xmlParseEncName(xmlParserCtxtPtr ctxt) { 6564 xmlChar *buf = NULL; 6565 int len = 0; 6566 int size = 10; 6567 xmlChar cur; 6568 6569 cur = CUR; 6570 if (((cur >= 'a') && (cur <= 'z')) || 6571 ((cur >= 'A') && (cur <= 'Z'))) { 6572 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6573 if (buf == NULL) { 6574 xmlGenericError(xmlGenericErrorContext, 6575 "malloc of %d byte failed\n", size); 6576 return(NULL); 6577 } 6578 6579 buf[len++] = cur; 6580 NEXT; 6581 cur = CUR; 6582 while (((cur >= 'a') && (cur <= 'z')) || 6583 ((cur >= 'A') && (cur <= 'Z')) || 6584 ((cur >= '0') && (cur <= '9')) || 6585 (cur == '.') || (cur == '_') || 6586 (cur == '-')) { 6587 if (len + 1 >= size) { 6588 size *= 2; 6589 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6590 if (buf == NULL) { 6591 xmlGenericError(xmlGenericErrorContext, 6592 "realloc of %d byte failed\n", size); 6593 return(NULL); 6594 } 6595 } 6596 buf[len++] = cur; 6597 NEXT; 6598 cur = CUR; 6599 if (cur == 0) { 6600 SHRINK; 6601 GROW; 6602 cur = CUR; 6603 } 6604 } 6605 buf[len] = 0; 6606 } else { 6607 ctxt->errNo = XML_ERR_ENCODING_NAME; 6608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6609 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 6610 ctxt->wellFormed = 0; 6611 ctxt->disableSAX = 1; 6612 } 6613 return(buf); 6614} 6615 6616/** 6617 * xmlParseEncodingDecl: 6618 * @ctxt: an XML parser context 6619 * 6620 * parse the XML encoding declaration 6621 * 6622 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 6623 * 6624 * this setups the conversion filters. 6625 * 6626 * Returns the encoding value or NULL 6627 */ 6628 6629xmlChar * 6630xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 6631 xmlChar *encoding = NULL; 6632 const xmlChar *q; 6633 6634 SKIP_BLANKS; 6635 if ((RAW == 'e') && (NXT(1) == 'n') && 6636 (NXT(2) == 'c') && (NXT(3) == 'o') && 6637 (NXT(4) == 'd') && (NXT(5) == 'i') && 6638 (NXT(6) == 'n') && (NXT(7) == 'g')) { 6639 SKIP(8); 6640 SKIP_BLANKS; 6641 if (RAW != '=') { 6642 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6644 ctxt->sax->error(ctxt->userData, 6645 "xmlParseEncodingDecl : expected '='\n"); 6646 ctxt->wellFormed = 0; 6647 ctxt->disableSAX = 1; 6648 return(NULL); 6649 } 6650 NEXT; 6651 SKIP_BLANKS; 6652 if (RAW == '"') { 6653 NEXT; 6654 q = CUR_PTR; 6655 encoding = xmlParseEncName(ctxt); 6656 if (RAW != '"') { 6657 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6659 ctxt->sax->error(ctxt->userData, 6660 "String not closed\n%.50s\n", q); 6661 ctxt->wellFormed = 0; 6662 ctxt->disableSAX = 1; 6663 } else 6664 NEXT; 6665 } else if (RAW == '\''){ 6666 NEXT; 6667 q = CUR_PTR; 6668 encoding = xmlParseEncName(ctxt); 6669 if (RAW != '\'') { 6670 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6672 ctxt->sax->error(ctxt->userData, 6673 "String not closed\n%.50s\n", q); 6674 ctxt->wellFormed = 0; 6675 ctxt->disableSAX = 1; 6676 } else 6677 NEXT; 6678 } else if (RAW == '"'){ 6679 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6681 ctxt->sax->error(ctxt->userData, 6682 "xmlParseEncodingDecl : expected ' or \"\n"); 6683 ctxt->wellFormed = 0; 6684 ctxt->disableSAX = 1; 6685 } 6686 if (encoding != NULL) { 6687 xmlCharEncoding enc; 6688 xmlCharEncodingHandlerPtr handler; 6689 6690 if (ctxt->input->encoding != NULL) 6691 xmlFree((xmlChar *) ctxt->input->encoding); 6692 ctxt->input->encoding = encoding; 6693 6694 enc = xmlParseCharEncoding((const char *) encoding); 6695 /* 6696 * registered set of known encodings 6697 */ 6698 if (enc != XML_CHAR_ENCODING_ERROR) { 6699 xmlSwitchEncoding(ctxt, enc); 6700 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6701 xmlFree(encoding); 6702 return(NULL); 6703 } 6704 } else { 6705 /* 6706 * fallback for unknown encodings 6707 */ 6708 handler = xmlFindCharEncodingHandler((const char *) encoding); 6709 if (handler != NULL) { 6710 xmlSwitchToEncoding(ctxt, handler); 6711 } else { 6712 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 6713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6714 ctxt->sax->error(ctxt->userData, 6715 "Unsupported encoding %s\n", encoding); 6716 return(NULL); 6717 } 6718 } 6719 } 6720 } 6721 return(encoding); 6722} 6723 6724/** 6725 * xmlParseSDDecl: 6726 * @ctxt: an XML parser context 6727 * 6728 * parse the XML standalone declaration 6729 * 6730 * [32] SDDecl ::= S 'standalone' Eq 6731 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 6732 * 6733 * [ VC: Standalone Document Declaration ] 6734 * TODO The standalone document declaration must have the value "no" 6735 * if any external markup declarations contain declarations of: 6736 * - attributes with default values, if elements to which these 6737 * attributes apply appear in the document without specifications 6738 * of values for these attributes, or 6739 * - entities (other than amp, lt, gt, apos, quot), if references 6740 * to those entities appear in the document, or 6741 * - attributes with values subject to normalization, where the 6742 * attribute appears in the document with a value which will change 6743 * as a result of normalization, or 6744 * - element types with element content, if white space occurs directly 6745 * within any instance of those types. 6746 * 6747 * Returns 1 if standalone, 0 otherwise 6748 */ 6749 6750int 6751xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 6752 int standalone = -1; 6753 6754 SKIP_BLANKS; 6755 if ((RAW == 's') && (NXT(1) == 't') && 6756 (NXT(2) == 'a') && (NXT(3) == 'n') && 6757 (NXT(4) == 'd') && (NXT(5) == 'a') && 6758 (NXT(6) == 'l') && (NXT(7) == 'o') && 6759 (NXT(8) == 'n') && (NXT(9) == 'e')) { 6760 SKIP(10); 6761 SKIP_BLANKS; 6762 if (RAW != '=') { 6763 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6765 ctxt->sax->error(ctxt->userData, 6766 "XML standalone declaration : expected '='\n"); 6767 ctxt->wellFormed = 0; 6768 ctxt->disableSAX = 1; 6769 return(standalone); 6770 } 6771 NEXT; 6772 SKIP_BLANKS; 6773 if (RAW == '\''){ 6774 NEXT; 6775 if ((RAW == 'n') && (NXT(1) == 'o')) { 6776 standalone = 0; 6777 SKIP(2); 6778 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6779 (NXT(2) == 's')) { 6780 standalone = 1; 6781 SKIP(3); 6782 } else { 6783 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6785 ctxt->sax->error(ctxt->userData, 6786 "standalone accepts only 'yes' or 'no'\n"); 6787 ctxt->wellFormed = 0; 6788 ctxt->disableSAX = 1; 6789 } 6790 if (RAW != '\'') { 6791 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6793 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6794 ctxt->wellFormed = 0; 6795 ctxt->disableSAX = 1; 6796 } else 6797 NEXT; 6798 } else if (RAW == '"'){ 6799 NEXT; 6800 if ((RAW == 'n') && (NXT(1) == 'o')) { 6801 standalone = 0; 6802 SKIP(2); 6803 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6804 (NXT(2) == 's')) { 6805 standalone = 1; 6806 SKIP(3); 6807 } else { 6808 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6810 ctxt->sax->error(ctxt->userData, 6811 "standalone accepts only 'yes' or 'no'\n"); 6812 ctxt->wellFormed = 0; 6813 ctxt->disableSAX = 1; 6814 } 6815 if (RAW != '"') { 6816 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6818 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6819 ctxt->wellFormed = 0; 6820 ctxt->disableSAX = 1; 6821 } else 6822 NEXT; 6823 } else { 6824 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6826 ctxt->sax->error(ctxt->userData, 6827 "Standalone value not found\n"); 6828 ctxt->wellFormed = 0; 6829 ctxt->disableSAX = 1; 6830 } 6831 } 6832 return(standalone); 6833} 6834 6835/** 6836 * xmlParseXMLDecl: 6837 * @ctxt: an XML parser context 6838 * 6839 * parse an XML declaration header 6840 * 6841 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 6842 */ 6843 6844void 6845xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 6846 xmlChar *version; 6847 6848 /* 6849 * We know that '<?xml' is here. 6850 */ 6851 SKIP(5); 6852 6853 if (!IS_BLANK(RAW)) { 6854 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6856 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 6857 ctxt->wellFormed = 0; 6858 ctxt->disableSAX = 1; 6859 } 6860 SKIP_BLANKS; 6861 6862 /* 6863 * We should have the VersionInfo here. 6864 */ 6865 version = xmlParseVersionInfo(ctxt); 6866 if (version == NULL) 6867 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6868 ctxt->version = xmlStrdup(version); 6869 xmlFree(version); 6870 6871 /* 6872 * We may have the encoding declaration 6873 */ 6874 if (!IS_BLANK(RAW)) { 6875 if ((RAW == '?') && (NXT(1) == '>')) { 6876 SKIP(2); 6877 return; 6878 } 6879 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6881 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 6882 ctxt->wellFormed = 0; 6883 ctxt->disableSAX = 1; 6884 } 6885 xmlParseEncodingDecl(ctxt); 6886 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6887 /* 6888 * The XML REC instructs us to stop parsing right here 6889 */ 6890 return; 6891 } 6892 6893 /* 6894 * We may have the standalone status. 6895 */ 6896 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 6897 if ((RAW == '?') && (NXT(1) == '>')) { 6898 SKIP(2); 6899 return; 6900 } 6901 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6903 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 6904 ctxt->wellFormed = 0; 6905 ctxt->disableSAX = 1; 6906 } 6907 SKIP_BLANKS; 6908 ctxt->input->standalone = xmlParseSDDecl(ctxt); 6909 6910 SKIP_BLANKS; 6911 if ((RAW == '?') && (NXT(1) == '>')) { 6912 SKIP(2); 6913 } else if (RAW == '>') { 6914 /* Deprecated old WD ... */ 6915 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 6916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6917 ctxt->sax->error(ctxt->userData, 6918 "XML declaration must end-up with '?>'\n"); 6919 ctxt->wellFormed = 0; 6920 ctxt->disableSAX = 1; 6921 NEXT; 6922 } else { 6923 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 6924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6925 ctxt->sax->error(ctxt->userData, 6926 "parsing XML declaration: '?>' expected\n"); 6927 ctxt->wellFormed = 0; 6928 ctxt->disableSAX = 1; 6929 MOVETO_ENDTAG(CUR_PTR); 6930 NEXT; 6931 } 6932} 6933 6934/** 6935 * xmlParseMisc: 6936 * @ctxt: an XML parser context 6937 * 6938 * parse an XML Misc* optionnal field. 6939 * 6940 * [27] Misc ::= Comment | PI | S 6941 */ 6942 6943void 6944xmlParseMisc(xmlParserCtxtPtr ctxt) { 6945 while (((RAW == '<') && (NXT(1) == '?')) || 6946 ((RAW == '<') && (NXT(1) == '!') && 6947 (NXT(2) == '-') && (NXT(3) == '-')) || 6948 IS_BLANK(CUR)) { 6949 if ((RAW == '<') && (NXT(1) == '?')) { 6950 xmlParsePI(ctxt); 6951 } else if (IS_BLANK(CUR)) { 6952 NEXT; 6953 } else 6954 xmlParseComment(ctxt); 6955 } 6956} 6957 6958/** 6959 * xmlParseDocument: 6960 * @ctxt: an XML parser context 6961 * 6962 * parse an XML document (and build a tree if using the standard SAX 6963 * interface). 6964 * 6965 * [1] document ::= prolog element Misc* 6966 * 6967 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 6968 * 6969 * Returns 0, -1 in case of error. the parser context is augmented 6970 * as a result of the parsing. 6971 */ 6972 6973int 6974xmlParseDocument(xmlParserCtxtPtr ctxt) { 6975 xmlChar start[4]; 6976 xmlCharEncoding enc; 6977 6978 xmlInitParser(); 6979 6980 GROW; 6981 6982 /* 6983 * SAX: beginning of the document processing. 6984 */ 6985 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 6986 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 6987 6988 /* 6989 * Get the 4 first bytes and decode the charset 6990 * if enc != XML_CHAR_ENCODING_NONE 6991 * plug some encoding conversion routines. 6992 */ 6993 start[0] = RAW; 6994 start[1] = NXT(1); 6995 start[2] = NXT(2); 6996 start[3] = NXT(3); 6997 enc = xmlDetectCharEncoding(start, 4); 6998 if (enc != XML_CHAR_ENCODING_NONE) { 6999 xmlSwitchEncoding(ctxt, enc); 7000 } 7001 7002 7003 if (CUR == 0) { 7004 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7006 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7007 ctxt->wellFormed = 0; 7008 ctxt->disableSAX = 1; 7009 } 7010 7011 /* 7012 * Check for the XMLDecl in the Prolog. 7013 */ 7014 GROW; 7015 if ((RAW == '<') && (NXT(1) == '?') && 7016 (NXT(2) == 'x') && (NXT(3) == 'm') && 7017 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7018 7019 /* 7020 * Note that we will switch encoding on the fly. 7021 */ 7022 xmlParseXMLDecl(ctxt); 7023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7024 /* 7025 * The XML REC instructs us to stop parsing right here 7026 */ 7027 return(-1); 7028 } 7029 ctxt->standalone = ctxt->input->standalone; 7030 SKIP_BLANKS; 7031 } else { 7032 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7033 } 7034 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7035 ctxt->sax->startDocument(ctxt->userData); 7036 7037 /* 7038 * The Misc part of the Prolog 7039 */ 7040 GROW; 7041 xmlParseMisc(ctxt); 7042 7043 /* 7044 * Then possibly doc type declaration(s) and more Misc 7045 * (doctypedecl Misc*)? 7046 */ 7047 GROW; 7048 if ((RAW == '<') && (NXT(1) == '!') && 7049 (NXT(2) == 'D') && (NXT(3) == 'O') && 7050 (NXT(4) == 'C') && (NXT(5) == 'T') && 7051 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7052 (NXT(8) == 'E')) { 7053 7054 ctxt->inSubset = 1; 7055 xmlParseDocTypeDecl(ctxt); 7056 if (RAW == '[') { 7057 ctxt->instate = XML_PARSER_DTD; 7058 xmlParseInternalSubset(ctxt); 7059 } 7060 7061 /* 7062 * Create and update the external subset. 7063 */ 7064 ctxt->inSubset = 2; 7065 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7066 (!ctxt->disableSAX)) 7067 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7068 ctxt->extSubSystem, ctxt->extSubURI); 7069 ctxt->inSubset = 0; 7070 7071 7072 ctxt->instate = XML_PARSER_PROLOG; 7073 xmlParseMisc(ctxt); 7074 } 7075 7076 /* 7077 * Time to start parsing the tree itself 7078 */ 7079 GROW; 7080 if (RAW != '<') { 7081 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7083 ctxt->sax->error(ctxt->userData, 7084 "Start tag expected, '<' not found\n"); 7085 ctxt->wellFormed = 0; 7086 ctxt->disableSAX = 1; 7087 ctxt->instate = XML_PARSER_EOF; 7088 } else { 7089 ctxt->instate = XML_PARSER_CONTENT; 7090 xmlParseElement(ctxt); 7091 ctxt->instate = XML_PARSER_EPILOG; 7092 7093 7094 /* 7095 * The Misc part at the end 7096 */ 7097 xmlParseMisc(ctxt); 7098 7099 if (RAW != 0) { 7100 ctxt->errNo = XML_ERR_DOCUMENT_END; 7101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7102 ctxt->sax->error(ctxt->userData, 7103 "Extra content at the end of the document\n"); 7104 ctxt->wellFormed = 0; 7105 ctxt->disableSAX = 1; 7106 } 7107 ctxt->instate = XML_PARSER_EOF; 7108 } 7109 7110 /* 7111 * SAX: end of the document processing. 7112 */ 7113 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7114 (!ctxt->disableSAX)) 7115 ctxt->sax->endDocument(ctxt->userData); 7116 7117 if (! ctxt->wellFormed) return(-1); 7118 return(0); 7119} 7120 7121/** 7122 * xmlParseExtParsedEnt: 7123 * @ctxt: an XML parser context 7124 * 7125 * parse a genreral parsed entity 7126 * An external general parsed entity is well-formed if it matches the 7127 * production labeled extParsedEnt. 7128 * 7129 * [78] extParsedEnt ::= TextDecl? content 7130 * 7131 * Returns 0, -1 in case of error. the parser context is augmented 7132 * as a result of the parsing. 7133 */ 7134 7135int 7136xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7137 xmlChar start[4]; 7138 xmlCharEncoding enc; 7139 7140 xmlDefaultSAXHandlerInit(); 7141 7142 GROW; 7143 7144 /* 7145 * SAX: beginning of the document processing. 7146 */ 7147 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7148 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7149 7150 /* 7151 * Get the 4 first bytes and decode the charset 7152 * if enc != XML_CHAR_ENCODING_NONE 7153 * plug some encoding conversion routines. 7154 */ 7155 start[0] = RAW; 7156 start[1] = NXT(1); 7157 start[2] = NXT(2); 7158 start[3] = NXT(3); 7159 enc = xmlDetectCharEncoding(start, 4); 7160 if (enc != XML_CHAR_ENCODING_NONE) { 7161 xmlSwitchEncoding(ctxt, enc); 7162 } 7163 7164 7165 if (CUR == 0) { 7166 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7168 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7169 ctxt->wellFormed = 0; 7170 ctxt->disableSAX = 1; 7171 } 7172 7173 /* 7174 * Check for the XMLDecl in the Prolog. 7175 */ 7176 GROW; 7177 if ((RAW == '<') && (NXT(1) == '?') && 7178 (NXT(2) == 'x') && (NXT(3) == 'm') && 7179 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7180 7181 /* 7182 * Note that we will switch encoding on the fly. 7183 */ 7184 xmlParseXMLDecl(ctxt); 7185 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7186 /* 7187 * The XML REC instructs us to stop parsing right here 7188 */ 7189 return(-1); 7190 } 7191 SKIP_BLANKS; 7192 } else { 7193 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7194 } 7195 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7196 ctxt->sax->startDocument(ctxt->userData); 7197 7198 /* 7199 * Doing validity checking on chunk doesn't make sense 7200 */ 7201 ctxt->instate = XML_PARSER_CONTENT; 7202 ctxt->validate = 0; 7203 ctxt->depth = 0; 7204 7205 xmlParseContent(ctxt); 7206 7207 if ((RAW == '<') && (NXT(1) == '/')) { 7208 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7210 ctxt->sax->error(ctxt->userData, 7211 "chunk is not well balanced\n"); 7212 ctxt->wellFormed = 0; 7213 ctxt->disableSAX = 1; 7214 } else if (RAW != 0) { 7215 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7217 ctxt->sax->error(ctxt->userData, 7218 "extra content at the end of well balanced chunk\n"); 7219 ctxt->wellFormed = 0; 7220 ctxt->disableSAX = 1; 7221 } 7222 7223 /* 7224 * SAX: end of the document processing. 7225 */ 7226 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7227 (!ctxt->disableSAX)) 7228 ctxt->sax->endDocument(ctxt->userData); 7229 7230 if (! ctxt->wellFormed) return(-1); 7231 return(0); 7232} 7233 7234/************************************************************************ 7235 * * 7236 * Progressive parsing interfaces * 7237 * * 7238 ************************************************************************/ 7239 7240/** 7241 * xmlParseLookupSequence: 7242 * @ctxt: an XML parser context 7243 * @first: the first char to lookup 7244 * @next: the next char to lookup or zero 7245 * @third: the next char to lookup or zero 7246 * 7247 * Try to find if a sequence (first, next, third) or just (first next) or 7248 * (first) is available in the input stream. 7249 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7250 * to avoid rescanning sequences of bytes, it DOES change the state of the 7251 * parser, do not use liberally. 7252 * 7253 * Returns the index to the current parsing point if the full sequence 7254 * is available, -1 otherwise. 7255 */ 7256int 7257xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7258 xmlChar next, xmlChar third) { 7259 int base, len; 7260 xmlParserInputPtr in; 7261 const xmlChar *buf; 7262 7263 in = ctxt->input; 7264 if (in == NULL) return(-1); 7265 base = in->cur - in->base; 7266 if (base < 0) return(-1); 7267 if (ctxt->checkIndex > base) 7268 base = ctxt->checkIndex; 7269 if (in->buf == NULL) { 7270 buf = in->base; 7271 len = in->length; 7272 } else { 7273 buf = in->buf->buffer->content; 7274 len = in->buf->buffer->use; 7275 } 7276 /* take into account the sequence length */ 7277 if (third) len -= 2; 7278 else if (next) len --; 7279 for (;base < len;base++) { 7280 if (buf[base] == first) { 7281 if (third != 0) { 7282 if ((buf[base + 1] != next) || 7283 (buf[base + 2] != third)) continue; 7284 } else if (next != 0) { 7285 if (buf[base + 1] != next) continue; 7286 } 7287 ctxt->checkIndex = 0; 7288#ifdef DEBUG_PUSH 7289 if (next == 0) 7290 xmlGenericError(xmlGenericErrorContext, 7291 "PP: lookup '%c' found at %d\n", 7292 first, base); 7293 else if (third == 0) 7294 xmlGenericError(xmlGenericErrorContext, 7295 "PP: lookup '%c%c' found at %d\n", 7296 first, next, base); 7297 else 7298 xmlGenericError(xmlGenericErrorContext, 7299 "PP: lookup '%c%c%c' found at %d\n", 7300 first, next, third, base); 7301#endif 7302 return(base - (in->cur - in->base)); 7303 } 7304 } 7305 ctxt->checkIndex = base; 7306#ifdef DEBUG_PUSH 7307 if (next == 0) 7308 xmlGenericError(xmlGenericErrorContext, 7309 "PP: lookup '%c' failed\n", first); 7310 else if (third == 0) 7311 xmlGenericError(xmlGenericErrorContext, 7312 "PP: lookup '%c%c' failed\n", first, next); 7313 else 7314 xmlGenericError(xmlGenericErrorContext, 7315 "PP: lookup '%c%c%c' failed\n", first, next, third); 7316#endif 7317 return(-1); 7318} 7319 7320/** 7321 * xmlParseTryOrFinish: 7322 * @ctxt: an XML parser context 7323 * @terminate: last chunk indicator 7324 * 7325 * Try to progress on parsing 7326 * 7327 * Returns zero if no parsing was possible 7328 */ 7329int 7330xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7331 int ret = 0; 7332 int avail; 7333 xmlChar cur, next; 7334 7335#ifdef DEBUG_PUSH 7336 switch (ctxt->instate) { 7337 case XML_PARSER_EOF: 7338 xmlGenericError(xmlGenericErrorContext, 7339 "PP: try EOF\n"); break; 7340 case XML_PARSER_START: 7341 xmlGenericError(xmlGenericErrorContext, 7342 "PP: try START\n"); break; 7343 case XML_PARSER_MISC: 7344 xmlGenericError(xmlGenericErrorContext, 7345 "PP: try MISC\n");break; 7346 case XML_PARSER_COMMENT: 7347 xmlGenericError(xmlGenericErrorContext, 7348 "PP: try COMMENT\n");break; 7349 case XML_PARSER_PROLOG: 7350 xmlGenericError(xmlGenericErrorContext, 7351 "PP: try PROLOG\n");break; 7352 case XML_PARSER_START_TAG: 7353 xmlGenericError(xmlGenericErrorContext, 7354 "PP: try START_TAG\n");break; 7355 case XML_PARSER_CONTENT: 7356 xmlGenericError(xmlGenericErrorContext, 7357 "PP: try CONTENT\n");break; 7358 case XML_PARSER_CDATA_SECTION: 7359 xmlGenericError(xmlGenericErrorContext, 7360 "PP: try CDATA_SECTION\n");break; 7361 case XML_PARSER_END_TAG: 7362 xmlGenericError(xmlGenericErrorContext, 7363 "PP: try END_TAG\n");break; 7364 case XML_PARSER_ENTITY_DECL: 7365 xmlGenericError(xmlGenericErrorContext, 7366 "PP: try ENTITY_DECL\n");break; 7367 case XML_PARSER_ENTITY_VALUE: 7368 xmlGenericError(xmlGenericErrorContext, 7369 "PP: try ENTITY_VALUE\n");break; 7370 case XML_PARSER_ATTRIBUTE_VALUE: 7371 xmlGenericError(xmlGenericErrorContext, 7372 "PP: try ATTRIBUTE_VALUE\n");break; 7373 case XML_PARSER_DTD: 7374 xmlGenericError(xmlGenericErrorContext, 7375 "PP: try DTD\n");break; 7376 case XML_PARSER_EPILOG: 7377 xmlGenericError(xmlGenericErrorContext, 7378 "PP: try EPILOG\n");break; 7379 case XML_PARSER_PI: 7380 xmlGenericError(xmlGenericErrorContext, 7381 "PP: try PI\n");break; 7382 } 7383#endif 7384 7385 while (1) { 7386 /* 7387 * Pop-up of finished entities. 7388 */ 7389 while ((RAW == 0) && (ctxt->inputNr > 1)) 7390 xmlPopInput(ctxt); 7391 7392 if (ctxt->input ==NULL) break; 7393 if (ctxt->input->buf == NULL) 7394 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7395 else 7396 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7397 if (avail < 1) 7398 goto done; 7399 switch (ctxt->instate) { 7400 case XML_PARSER_EOF: 7401 /* 7402 * Document parsing is done ! 7403 */ 7404 goto done; 7405 case XML_PARSER_START: 7406 /* 7407 * Very first chars read from the document flow. 7408 */ 7409 cur = ctxt->input->cur[0]; 7410 if (IS_BLANK(cur)) { 7411 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7412 ctxt->sax->setDocumentLocator(ctxt->userData, 7413 &xmlDefaultSAXLocator); 7414 ctxt->errNo = XML_ERR_DOCUMENT_START; 7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7416 ctxt->sax->error(ctxt->userData, 7417 "Extra spaces at the beginning of the document are not allowed\n"); 7418 ctxt->wellFormed = 0; 7419 ctxt->disableSAX = 1; 7420 SKIP_BLANKS; 7421 ret++; 7422 if (ctxt->input->buf == NULL) 7423 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7424 else 7425 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7426 } 7427 if (avail < 2) 7428 goto done; 7429 7430 cur = ctxt->input->cur[0]; 7431 next = ctxt->input->cur[1]; 7432 if (cur == 0) { 7433 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7434 ctxt->sax->setDocumentLocator(ctxt->userData, 7435 &xmlDefaultSAXLocator); 7436 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7438 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7439 ctxt->wellFormed = 0; 7440 ctxt->disableSAX = 1; 7441 ctxt->instate = XML_PARSER_EOF; 7442#ifdef DEBUG_PUSH 7443 xmlGenericError(xmlGenericErrorContext, 7444 "PP: entering EOF\n"); 7445#endif 7446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7447 ctxt->sax->endDocument(ctxt->userData); 7448 goto done; 7449 } 7450 if ((cur == '<') && (next == '?')) { 7451 /* PI or XML decl */ 7452 if (avail < 5) return(ret); 7453 if ((!terminate) && 7454 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7455 return(ret); 7456 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7457 ctxt->sax->setDocumentLocator(ctxt->userData, 7458 &xmlDefaultSAXLocator); 7459 if ((ctxt->input->cur[2] == 'x') && 7460 (ctxt->input->cur[3] == 'm') && 7461 (ctxt->input->cur[4] == 'l') && 7462 (IS_BLANK(ctxt->input->cur[5]))) { 7463 ret += 5; 7464#ifdef DEBUG_PUSH 7465 xmlGenericError(xmlGenericErrorContext, 7466 "PP: Parsing XML Decl\n"); 7467#endif 7468 xmlParseXMLDecl(ctxt); 7469 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7470 /* 7471 * The XML REC instructs us to stop parsing right 7472 * here 7473 */ 7474 ctxt->instate = XML_PARSER_EOF; 7475 return(0); 7476 } 7477 ctxt->standalone = ctxt->input->standalone; 7478 if ((ctxt->encoding == NULL) && 7479 (ctxt->input->encoding != NULL)) 7480 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 7481 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7482 (!ctxt->disableSAX)) 7483 ctxt->sax->startDocument(ctxt->userData); 7484 ctxt->instate = XML_PARSER_MISC; 7485#ifdef DEBUG_PUSH 7486 xmlGenericError(xmlGenericErrorContext, 7487 "PP: entering MISC\n"); 7488#endif 7489 } else { 7490 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7491 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7492 (!ctxt->disableSAX)) 7493 ctxt->sax->startDocument(ctxt->userData); 7494 ctxt->instate = XML_PARSER_MISC; 7495#ifdef DEBUG_PUSH 7496 xmlGenericError(xmlGenericErrorContext, 7497 "PP: entering MISC\n"); 7498#endif 7499 } 7500 } else { 7501 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7502 ctxt->sax->setDocumentLocator(ctxt->userData, 7503 &xmlDefaultSAXLocator); 7504 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7505 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7506 (!ctxt->disableSAX)) 7507 ctxt->sax->startDocument(ctxt->userData); 7508 ctxt->instate = XML_PARSER_MISC; 7509#ifdef DEBUG_PUSH 7510 xmlGenericError(xmlGenericErrorContext, 7511 "PP: entering MISC\n"); 7512#endif 7513 } 7514 break; 7515 case XML_PARSER_MISC: 7516 SKIP_BLANKS; 7517 if (ctxt->input->buf == NULL) 7518 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7519 else 7520 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7521 if (avail < 2) 7522 goto done; 7523 cur = ctxt->input->cur[0]; 7524 next = ctxt->input->cur[1]; 7525 if ((cur == '<') && (next == '?')) { 7526 if ((!terminate) && 7527 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7528 goto done; 7529#ifdef DEBUG_PUSH 7530 xmlGenericError(xmlGenericErrorContext, 7531 "PP: Parsing PI\n"); 7532#endif 7533 xmlParsePI(ctxt); 7534 } else if ((cur == '<') && (next == '!') && 7535 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7536 if ((!terminate) && 7537 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7538 goto done; 7539#ifdef DEBUG_PUSH 7540 xmlGenericError(xmlGenericErrorContext, 7541 "PP: Parsing Comment\n"); 7542#endif 7543 xmlParseComment(ctxt); 7544 ctxt->instate = XML_PARSER_MISC; 7545 } else if ((cur == '<') && (next == '!') && 7546 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 7547 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 7548 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 7549 (ctxt->input->cur[8] == 'E')) { 7550 if ((!terminate) && 7551 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7552 goto done; 7553#ifdef DEBUG_PUSH 7554 xmlGenericError(xmlGenericErrorContext, 7555 "PP: Parsing internal subset\n"); 7556#endif 7557 ctxt->inSubset = 1; 7558 xmlParseDocTypeDecl(ctxt); 7559 if (RAW == '[') { 7560 ctxt->instate = XML_PARSER_DTD; 7561#ifdef DEBUG_PUSH 7562 xmlGenericError(xmlGenericErrorContext, 7563 "PP: entering DTD\n"); 7564#endif 7565 } else { 7566 /* 7567 * Create and update the external subset. 7568 */ 7569 ctxt->inSubset = 2; 7570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7571 (ctxt->sax->externalSubset != NULL)) 7572 ctxt->sax->externalSubset(ctxt->userData, 7573 ctxt->intSubName, ctxt->extSubSystem, 7574 ctxt->extSubURI); 7575 ctxt->inSubset = 0; 7576 ctxt->instate = XML_PARSER_PROLOG; 7577#ifdef DEBUG_PUSH 7578 xmlGenericError(xmlGenericErrorContext, 7579 "PP: entering PROLOG\n"); 7580#endif 7581 } 7582 } else if ((cur == '<') && (next == '!') && 7583 (avail < 9)) { 7584 goto done; 7585 } else { 7586 ctxt->instate = XML_PARSER_START_TAG; 7587#ifdef DEBUG_PUSH 7588 xmlGenericError(xmlGenericErrorContext, 7589 "PP: entering START_TAG\n"); 7590#endif 7591 } 7592 break; 7593 case XML_PARSER_PROLOG: 7594 SKIP_BLANKS; 7595 if (ctxt->input->buf == NULL) 7596 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7597 else 7598 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7599 if (avail < 2) 7600 goto done; 7601 cur = ctxt->input->cur[0]; 7602 next = ctxt->input->cur[1]; 7603 if ((cur == '<') && (next == '?')) { 7604 if ((!terminate) && 7605 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7606 goto done; 7607#ifdef DEBUG_PUSH 7608 xmlGenericError(xmlGenericErrorContext, 7609 "PP: Parsing PI\n"); 7610#endif 7611 xmlParsePI(ctxt); 7612 } else if ((cur == '<') && (next == '!') && 7613 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7614 if ((!terminate) && 7615 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7616 goto done; 7617#ifdef DEBUG_PUSH 7618 xmlGenericError(xmlGenericErrorContext, 7619 "PP: Parsing Comment\n"); 7620#endif 7621 xmlParseComment(ctxt); 7622 ctxt->instate = XML_PARSER_PROLOG; 7623 } else if ((cur == '<') && (next == '!') && 7624 (avail < 4)) { 7625 goto done; 7626 } else { 7627 ctxt->instate = XML_PARSER_START_TAG; 7628#ifdef DEBUG_PUSH 7629 xmlGenericError(xmlGenericErrorContext, 7630 "PP: entering START_TAG\n"); 7631#endif 7632 } 7633 break; 7634 case XML_PARSER_EPILOG: 7635 SKIP_BLANKS; 7636 if (ctxt->input->buf == NULL) 7637 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7638 else 7639 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7640 if (avail < 2) 7641 goto done; 7642 cur = ctxt->input->cur[0]; 7643 next = ctxt->input->cur[1]; 7644 if ((cur == '<') && (next == '?')) { 7645 if ((!terminate) && 7646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7647 goto done; 7648#ifdef DEBUG_PUSH 7649 xmlGenericError(xmlGenericErrorContext, 7650 "PP: Parsing PI\n"); 7651#endif 7652 xmlParsePI(ctxt); 7653 ctxt->instate = XML_PARSER_EPILOG; 7654 } else if ((cur == '<') && (next == '!') && 7655 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7656 if ((!terminate) && 7657 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7658 goto done; 7659#ifdef DEBUG_PUSH 7660 xmlGenericError(xmlGenericErrorContext, 7661 "PP: Parsing Comment\n"); 7662#endif 7663 xmlParseComment(ctxt); 7664 ctxt->instate = XML_PARSER_EPILOG; 7665 } else if ((cur == '<') && (next == '!') && 7666 (avail < 4)) { 7667 goto done; 7668 } else { 7669 ctxt->errNo = XML_ERR_DOCUMENT_END; 7670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7671 ctxt->sax->error(ctxt->userData, 7672 "Extra content at the end of the document\n"); 7673 ctxt->wellFormed = 0; 7674 ctxt->disableSAX = 1; 7675 ctxt->instate = XML_PARSER_EOF; 7676#ifdef DEBUG_PUSH 7677 xmlGenericError(xmlGenericErrorContext, 7678 "PP: entering EOF\n"); 7679#endif 7680 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7681 (!ctxt->disableSAX)) 7682 ctxt->sax->endDocument(ctxt->userData); 7683 goto done; 7684 } 7685 break; 7686 case XML_PARSER_START_TAG: { 7687 xmlChar *name, *oldname; 7688 7689 if ((avail < 2) && (ctxt->inputNr == 1)) 7690 goto done; 7691 cur = ctxt->input->cur[0]; 7692 if (cur != '<') { 7693 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7695 ctxt->sax->error(ctxt->userData, 7696 "Start tag expect, '<' not found\n"); 7697 ctxt->wellFormed = 0; 7698 ctxt->disableSAX = 1; 7699 ctxt->instate = XML_PARSER_EOF; 7700#ifdef DEBUG_PUSH 7701 xmlGenericError(xmlGenericErrorContext, 7702 "PP: entering EOF\n"); 7703#endif 7704 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7705 (!ctxt->disableSAX)) 7706 ctxt->sax->endDocument(ctxt->userData); 7707 goto done; 7708 } 7709 if ((!terminate) && 7710 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7711 goto done; 7712 if (ctxt->spaceNr == 0) 7713 spacePush(ctxt, -1); 7714 else 7715 spacePush(ctxt, *ctxt->space); 7716 name = xmlParseStartTag(ctxt); 7717 if (name == NULL) { 7718 spacePop(ctxt); 7719 ctxt->instate = XML_PARSER_EOF; 7720#ifdef DEBUG_PUSH 7721 xmlGenericError(xmlGenericErrorContext, 7722 "PP: entering EOF\n"); 7723#endif 7724 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7725 (!ctxt->disableSAX)) 7726 ctxt->sax->endDocument(ctxt->userData); 7727 goto done; 7728 } 7729 namePush(ctxt, xmlStrdup(name)); 7730 7731 /* 7732 * [ VC: Root Element Type ] 7733 * The Name in the document type declaration must match 7734 * the element type of the root element. 7735 */ 7736 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7737 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7738 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7739 7740 /* 7741 * Check for an Empty Element. 7742 */ 7743 if ((RAW == '/') && (NXT(1) == '>')) { 7744 SKIP(2); 7745 if ((ctxt->sax != NULL) && 7746 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 7747 ctxt->sax->endElement(ctxt->userData, name); 7748 xmlFree(name); 7749 oldname = namePop(ctxt); 7750 spacePop(ctxt); 7751 if (oldname != NULL) { 7752#ifdef DEBUG_STACK 7753 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7754#endif 7755 xmlFree(oldname); 7756 } 7757 if (ctxt->name == NULL) { 7758 ctxt->instate = XML_PARSER_EPILOG; 7759#ifdef DEBUG_PUSH 7760 xmlGenericError(xmlGenericErrorContext, 7761 "PP: entering EPILOG\n"); 7762#endif 7763 } else { 7764 ctxt->instate = XML_PARSER_CONTENT; 7765#ifdef DEBUG_PUSH 7766 xmlGenericError(xmlGenericErrorContext, 7767 "PP: entering CONTENT\n"); 7768#endif 7769 } 7770 break; 7771 } 7772 if (RAW == '>') { 7773 NEXT; 7774 } else { 7775 ctxt->errNo = XML_ERR_GT_REQUIRED; 7776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7777 ctxt->sax->error(ctxt->userData, 7778 "Couldn't find end of Start Tag %s\n", 7779 name); 7780 ctxt->wellFormed = 0; 7781 ctxt->disableSAX = 1; 7782 7783 /* 7784 * end of parsing of this node. 7785 */ 7786 nodePop(ctxt); 7787 oldname = namePop(ctxt); 7788 spacePop(ctxt); 7789 if (oldname != NULL) { 7790#ifdef DEBUG_STACK 7791 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7792#endif 7793 xmlFree(oldname); 7794 } 7795 } 7796 xmlFree(name); 7797 ctxt->instate = XML_PARSER_CONTENT; 7798#ifdef DEBUG_PUSH 7799 xmlGenericError(xmlGenericErrorContext, 7800 "PP: entering CONTENT\n"); 7801#endif 7802 break; 7803 } 7804 case XML_PARSER_CONTENT: { 7805 const xmlChar *test; 7806 int cons; 7807 xmlChar tok; 7808 7809 /* 7810 * Handle preparsed entities and charRef 7811 */ 7812 if (ctxt->token != 0) { 7813 xmlChar cur[2] = { 0 , 0 } ; 7814 7815 cur[0] = (xmlChar) ctxt->token; 7816 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7817 (ctxt->sax->characters != NULL)) 7818 ctxt->sax->characters(ctxt->userData, cur, 1); 7819 ctxt->token = 0; 7820 } 7821 if ((avail < 2) && (ctxt->inputNr == 1)) 7822 goto done; 7823 cur = ctxt->input->cur[0]; 7824 next = ctxt->input->cur[1]; 7825 7826 test = CUR_PTR; 7827 cons = ctxt->input->consumed; 7828 tok = ctxt->token; 7829 if ((cur == '<') && (next == '?')) { 7830 if ((!terminate) && 7831 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7832 goto done; 7833#ifdef DEBUG_PUSH 7834 xmlGenericError(xmlGenericErrorContext, 7835 "PP: Parsing PI\n"); 7836#endif 7837 xmlParsePI(ctxt); 7838 } else if ((cur == '<') && (next == '!') && 7839 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7840 if ((!terminate) && 7841 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7842 goto done; 7843#ifdef DEBUG_PUSH 7844 xmlGenericError(xmlGenericErrorContext, 7845 "PP: Parsing Comment\n"); 7846#endif 7847 xmlParseComment(ctxt); 7848 ctxt->instate = XML_PARSER_CONTENT; 7849 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 7850 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 7851 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 7852 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 7853 (ctxt->input->cur[8] == '[')) { 7854 SKIP(9); 7855 ctxt->instate = XML_PARSER_CDATA_SECTION; 7856#ifdef DEBUG_PUSH 7857 xmlGenericError(xmlGenericErrorContext, 7858 "PP: entering CDATA_SECTION\n"); 7859#endif 7860 break; 7861 } else if ((cur == '<') && (next == '!') && 7862 (avail < 9)) { 7863 goto done; 7864 } else if ((cur == '<') && (next == '/')) { 7865 ctxt->instate = XML_PARSER_END_TAG; 7866#ifdef DEBUG_PUSH 7867 xmlGenericError(xmlGenericErrorContext, 7868 "PP: entering END_TAG\n"); 7869#endif 7870 break; 7871 } else if (cur == '<') { 7872 ctxt->instate = XML_PARSER_START_TAG; 7873#ifdef DEBUG_PUSH 7874 xmlGenericError(xmlGenericErrorContext, 7875 "PP: entering START_TAG\n"); 7876#endif 7877 break; 7878 } else if (cur == '&') { 7879 if ((!terminate) && 7880 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 7881 goto done; 7882#ifdef DEBUG_PUSH 7883 xmlGenericError(xmlGenericErrorContext, 7884 "PP: Parsing Reference\n"); 7885#endif 7886 xmlParseReference(ctxt); 7887 } else { 7888 /* TODO Avoid the extra copy, handle directly !!! */ 7889 /* 7890 * Goal of the following test is: 7891 * - minimize calls to the SAX 'character' callback 7892 * when they are mergeable 7893 * - handle an problem for isBlank when we only parse 7894 * a sequence of blank chars and the next one is 7895 * not available to check against '<' presence. 7896 * - tries to homogenize the differences in SAX 7897 * callbacks beween the push and pull versions 7898 * of the parser. 7899 */ 7900 if ((ctxt->inputNr == 1) && 7901 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 7902 if ((!terminate) && 7903 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 7904 goto done; 7905 } 7906 ctxt->checkIndex = 0; 7907#ifdef DEBUG_PUSH 7908 xmlGenericError(xmlGenericErrorContext, 7909 "PP: Parsing char data\n"); 7910#endif 7911 xmlParseCharData(ctxt, 0); 7912 } 7913 /* 7914 * Pop-up of finished entities. 7915 */ 7916 while ((RAW == 0) && (ctxt->inputNr > 1)) 7917 xmlPopInput(ctxt); 7918 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 7919 (tok == ctxt->token)) { 7920 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 7921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7922 ctxt->sax->error(ctxt->userData, 7923 "detected an error in element content\n"); 7924 ctxt->wellFormed = 0; 7925 ctxt->disableSAX = 1; 7926 ctxt->instate = XML_PARSER_EOF; 7927 break; 7928 } 7929 break; 7930 } 7931 case XML_PARSER_CDATA_SECTION: { 7932 /* 7933 * The Push mode need to have the SAX callback for 7934 * cdataBlock merge back contiguous callbacks. 7935 */ 7936 int base; 7937 7938 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 7939 if (base < 0) { 7940 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 7941 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 7942 if (ctxt->sax->cdataBlock != NULL) 7943 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 7944 XML_PARSER_BIG_BUFFER_SIZE); 7945 } 7946 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 7947 ctxt->checkIndex = 0; 7948 } 7949 goto done; 7950 } else { 7951 if ((ctxt->sax != NULL) && (base > 0) && 7952 (!ctxt->disableSAX)) { 7953 if (ctxt->sax->cdataBlock != NULL) 7954 ctxt->sax->cdataBlock(ctxt->userData, 7955 ctxt->input->cur, base); 7956 } 7957 SKIP(base + 3); 7958 ctxt->checkIndex = 0; 7959 ctxt->instate = XML_PARSER_CONTENT; 7960#ifdef DEBUG_PUSH 7961 xmlGenericError(xmlGenericErrorContext, 7962 "PP: entering CONTENT\n"); 7963#endif 7964 } 7965 break; 7966 } 7967 case XML_PARSER_END_TAG: 7968 if (avail < 2) 7969 goto done; 7970 if ((!terminate) && 7971 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7972 goto done; 7973 xmlParseEndTag(ctxt); 7974 if (ctxt->name == NULL) { 7975 ctxt->instate = XML_PARSER_EPILOG; 7976#ifdef DEBUG_PUSH 7977 xmlGenericError(xmlGenericErrorContext, 7978 "PP: entering EPILOG\n"); 7979#endif 7980 } else { 7981 ctxt->instate = XML_PARSER_CONTENT; 7982#ifdef DEBUG_PUSH 7983 xmlGenericError(xmlGenericErrorContext, 7984 "PP: entering CONTENT\n"); 7985#endif 7986 } 7987 break; 7988 case XML_PARSER_DTD: { 7989 /* 7990 * Sorry but progressive parsing of the internal subset 7991 * is not expected to be supported. We first check that 7992 * the full content of the internal subset is available and 7993 * the parsing is launched only at that point. 7994 * Internal subset ends up with "']' S? '>'" in an unescaped 7995 * section and not in a ']]>' sequence which are conditional 7996 * sections (whoever argued to keep that crap in XML deserve 7997 * a place in hell !). 7998 */ 7999 int base, i; 8000 xmlChar *buf; 8001 xmlChar quote = 0; 8002 8003 base = ctxt->input->cur - ctxt->input->base; 8004 if (base < 0) return(0); 8005 if (ctxt->checkIndex > base) 8006 base = ctxt->checkIndex; 8007 buf = ctxt->input->buf->buffer->content; 8008 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8009 base++) { 8010 if (quote != 0) { 8011 if (buf[base] == quote) 8012 quote = 0; 8013 continue; 8014 } 8015 if (buf[base] == '"') { 8016 quote = '"'; 8017 continue; 8018 } 8019 if (buf[base] == '\'') { 8020 quote = '\''; 8021 continue; 8022 } 8023 if (buf[base] == ']') { 8024 if ((unsigned int) base +1 >= 8025 ctxt->input->buf->buffer->use) 8026 break; 8027 if (buf[base + 1] == ']') { 8028 /* conditional crap, skip both ']' ! */ 8029 base++; 8030 continue; 8031 } 8032 for (i = 0; 8033 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8034 i++) { 8035 if (buf[base + i] == '>') 8036 goto found_end_int_subset; 8037 } 8038 break; 8039 } 8040 } 8041 /* 8042 * We didn't found the end of the Internal subset 8043 */ 8044 if (quote == 0) 8045 ctxt->checkIndex = base; 8046#ifdef DEBUG_PUSH 8047 if (next == 0) 8048 xmlGenericError(xmlGenericErrorContext, 8049 "PP: lookup of int subset end filed\n"); 8050#endif 8051 goto done; 8052 8053found_end_int_subset: 8054 xmlParseInternalSubset(ctxt); 8055 ctxt->inSubset = 2; 8056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8057 (ctxt->sax->externalSubset != NULL)) 8058 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8059 ctxt->extSubSystem, ctxt->extSubURI); 8060 ctxt->inSubset = 0; 8061 ctxt->instate = XML_PARSER_PROLOG; 8062 ctxt->checkIndex = 0; 8063#ifdef DEBUG_PUSH 8064 xmlGenericError(xmlGenericErrorContext, 8065 "PP: entering PROLOG\n"); 8066#endif 8067 break; 8068 } 8069 case XML_PARSER_COMMENT: 8070 xmlGenericError(xmlGenericErrorContext, 8071 "PP: internal error, state == COMMENT\n"); 8072 ctxt->instate = XML_PARSER_CONTENT; 8073#ifdef DEBUG_PUSH 8074 xmlGenericError(xmlGenericErrorContext, 8075 "PP: entering CONTENT\n"); 8076#endif 8077 break; 8078 case XML_PARSER_PI: 8079 xmlGenericError(xmlGenericErrorContext, 8080 "PP: internal error, state == PI\n"); 8081 ctxt->instate = XML_PARSER_CONTENT; 8082#ifdef DEBUG_PUSH 8083 xmlGenericError(xmlGenericErrorContext, 8084 "PP: entering CONTENT\n"); 8085#endif 8086 break; 8087 case XML_PARSER_ENTITY_DECL: 8088 xmlGenericError(xmlGenericErrorContext, 8089 "PP: internal error, state == ENTITY_DECL\n"); 8090 ctxt->instate = XML_PARSER_DTD; 8091#ifdef DEBUG_PUSH 8092 xmlGenericError(xmlGenericErrorContext, 8093 "PP: entering DTD\n"); 8094#endif 8095 break; 8096 case XML_PARSER_ENTITY_VALUE: 8097 xmlGenericError(xmlGenericErrorContext, 8098 "PP: internal error, state == ENTITY_VALUE\n"); 8099 ctxt->instate = XML_PARSER_CONTENT; 8100#ifdef DEBUG_PUSH 8101 xmlGenericError(xmlGenericErrorContext, 8102 "PP: entering DTD\n"); 8103#endif 8104 break; 8105 case XML_PARSER_ATTRIBUTE_VALUE: 8106 xmlGenericError(xmlGenericErrorContext, 8107 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8108 ctxt->instate = XML_PARSER_START_TAG; 8109#ifdef DEBUG_PUSH 8110 xmlGenericError(xmlGenericErrorContext, 8111 "PP: entering START_TAG\n"); 8112#endif 8113 break; 8114 case XML_PARSER_SYSTEM_LITERAL: 8115 xmlGenericError(xmlGenericErrorContext, 8116 "PP: internal error, state == SYSTEM_LITERAL\n"); 8117 ctxt->instate = XML_PARSER_START_TAG; 8118#ifdef DEBUG_PUSH 8119 xmlGenericError(xmlGenericErrorContext, 8120 "PP: entering START_TAG\n"); 8121#endif 8122 break; 8123 } 8124 } 8125done: 8126#ifdef DEBUG_PUSH 8127 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8128#endif 8129 return(ret); 8130} 8131 8132/** 8133 * xmlParseTry: 8134 * @ctxt: an XML parser context 8135 * 8136 * Try to progress on parsing 8137 * 8138 * Returns zero if no parsing was possible 8139 */ 8140int 8141xmlParseTry(xmlParserCtxtPtr ctxt) { 8142 return(xmlParseTryOrFinish(ctxt, 0)); 8143} 8144 8145/** 8146 * xmlParseChunk: 8147 * @ctxt: an XML parser context 8148 * @chunk: an char array 8149 * @size: the size in byte of the chunk 8150 * @terminate: last chunk indicator 8151 * 8152 * Parse a Chunk of memory 8153 * 8154 * Returns zero if no error, the xmlParserErrors otherwise. 8155 */ 8156int 8157xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8158 int terminate) { 8159 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8160 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8161 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8162 int cur = ctxt->input->cur - ctxt->input->base; 8163 8164 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8165 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8166 ctxt->input->cur = ctxt->input->base + cur; 8167#ifdef DEBUG_PUSH 8168 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8169#endif 8170 8171 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8172 xmlParseTryOrFinish(ctxt, terminate); 8173 } else if (ctxt->instate != XML_PARSER_EOF) 8174 xmlParseTryOrFinish(ctxt, terminate); 8175 if (terminate) { 8176 /* 8177 * Check for termination 8178 */ 8179 if ((ctxt->instate != XML_PARSER_EOF) && 8180 (ctxt->instate != XML_PARSER_EPILOG)) { 8181 ctxt->errNo = XML_ERR_DOCUMENT_END; 8182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8183 ctxt->sax->error(ctxt->userData, 8184 "Extra content at the end of the document\n"); 8185 ctxt->wellFormed = 0; 8186 ctxt->disableSAX = 1; 8187 } 8188 if (ctxt->instate != XML_PARSER_EOF) { 8189 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8190 (!ctxt->disableSAX)) 8191 ctxt->sax->endDocument(ctxt->userData); 8192 } 8193 ctxt->instate = XML_PARSER_EOF; 8194 } 8195 return((xmlParserErrors) ctxt->errNo); 8196} 8197 8198/************************************************************************ 8199 * * 8200 * I/O front end functions to the parser * 8201 * * 8202 ************************************************************************/ 8203 8204/** 8205 * xmlStopParser: 8206 * @ctxt: an XML parser context 8207 * 8208 * Blocks further parser processing 8209 */ 8210void 8211xmlStopParser(xmlParserCtxtPtr ctxt) { 8212 ctxt->instate = XML_PARSER_EOF; 8213 if (ctxt->input != NULL) 8214 ctxt->input->cur = BAD_CAST""; 8215} 8216 8217/** 8218 * xmlCreatePushParserCtxt: 8219 * @sax: a SAX handler 8220 * @user_data: The user data returned on SAX callbacks 8221 * @chunk: a pointer to an array of chars 8222 * @size: number of chars in the array 8223 * @filename: an optional file name or URI 8224 * 8225 * Create a parser context for using the XML parser in push mode 8226 * To allow content encoding detection, @size should be >= 4 8227 * The value of @filename is used for fetching external entities 8228 * and error/warning reports. 8229 * 8230 * Returns the new parser context or NULL 8231 */ 8232xmlParserCtxtPtr 8233xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8234 const char *chunk, int size, const char *filename) { 8235 xmlParserCtxtPtr ctxt; 8236 xmlParserInputPtr inputStream; 8237 xmlParserInputBufferPtr buf; 8238 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8239 8240 /* 8241 * plug some encoding conversion routines 8242 */ 8243 if ((chunk != NULL) && (size >= 4)) 8244 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8245 8246 buf = xmlAllocParserInputBuffer(enc); 8247 if (buf == NULL) return(NULL); 8248 8249 ctxt = xmlNewParserCtxt(); 8250 if (ctxt == NULL) { 8251 xmlFree(buf); 8252 return(NULL); 8253 } 8254 if (sax != NULL) { 8255 if (ctxt->sax != &xmlDefaultSAXHandler) 8256 xmlFree(ctxt->sax); 8257 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8258 if (ctxt->sax == NULL) { 8259 xmlFree(buf); 8260 xmlFree(ctxt); 8261 return(NULL); 8262 } 8263 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8264 if (user_data != NULL) 8265 ctxt->userData = user_data; 8266 } 8267 if (filename == NULL) { 8268 ctxt->directory = NULL; 8269 } else { 8270 ctxt->directory = xmlParserGetDirectory(filename); 8271 } 8272 8273 inputStream = xmlNewInputStream(ctxt); 8274 if (inputStream == NULL) { 8275 xmlFreeParserCtxt(ctxt); 8276 return(NULL); 8277 } 8278 8279 if (filename == NULL) 8280 inputStream->filename = NULL; 8281 else 8282 inputStream->filename = xmlMemStrdup(filename); 8283 inputStream->buf = buf; 8284 inputStream->base = inputStream->buf->buffer->content; 8285 inputStream->cur = inputStream->buf->buffer->content; 8286 if (enc != XML_CHAR_ENCODING_NONE) { 8287 xmlSwitchEncoding(ctxt, enc); 8288 } 8289 8290 inputPush(ctxt, inputStream); 8291 8292 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8293 (ctxt->input->buf != NULL)) { 8294 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8295#ifdef DEBUG_PUSH 8296 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8297#endif 8298 } 8299 8300 return(ctxt); 8301} 8302 8303/** 8304 * xmlCreateIOParserCtxt: 8305 * @sax: a SAX handler 8306 * @user_data: The user data returned on SAX callbacks 8307 * @ioread: an I/O read function 8308 * @ioclose: an I/O close function 8309 * @ioctx: an I/O handler 8310 * @enc: the charset encoding if known 8311 * 8312 * Create a parser context for using the XML parser with an existing 8313 * I/O stream 8314 * 8315 * Returns the new parser context or NULL 8316 */ 8317xmlParserCtxtPtr 8318xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8319 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8320 void *ioctx, xmlCharEncoding enc) { 8321 xmlParserCtxtPtr ctxt; 8322 xmlParserInputPtr inputStream; 8323 xmlParserInputBufferPtr buf; 8324 8325 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8326 if (buf == NULL) return(NULL); 8327 8328 ctxt = xmlNewParserCtxt(); 8329 if (ctxt == NULL) { 8330 xmlFree(buf); 8331 return(NULL); 8332 } 8333 if (sax != NULL) { 8334 if (ctxt->sax != &xmlDefaultSAXHandler) 8335 xmlFree(ctxt->sax); 8336 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8337 if (ctxt->sax == NULL) { 8338 xmlFree(buf); 8339 xmlFree(ctxt); 8340 return(NULL); 8341 } 8342 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8343 if (user_data != NULL) 8344 ctxt->userData = user_data; 8345 } 8346 8347 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8348 if (inputStream == NULL) { 8349 xmlFreeParserCtxt(ctxt); 8350 return(NULL); 8351 } 8352 inputPush(ctxt, inputStream); 8353 8354 return(ctxt); 8355} 8356 8357/************************************************************************ 8358 * * 8359 * Front ends when parsing a Dtd * 8360 * * 8361 ************************************************************************/ 8362 8363/** 8364 * xmlSAXParseDTD: 8365 * @sax: the SAX handler block 8366 * @ExternalID: a NAME* containing the External ID of the DTD 8367 * @SystemID: a NAME* containing the URL to the DTD 8368 * 8369 * Load and parse an external subset. 8370 * 8371 * Returns the resulting xmlDtdPtr or NULL in case of error. 8372 */ 8373 8374xmlDtdPtr 8375xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 8376 const xmlChar *SystemID) { 8377 xmlDtdPtr ret = NULL; 8378 xmlParserCtxtPtr ctxt; 8379 xmlParserInputPtr input = NULL; 8380 xmlCharEncoding enc; 8381 8382 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 8383 8384 ctxt = xmlNewParserCtxt(); 8385 if (ctxt == NULL) { 8386 return(NULL); 8387 } 8388 8389 /* 8390 * Set-up the SAX context 8391 */ 8392 if (sax != NULL) { 8393 if (ctxt->sax != NULL) 8394 xmlFree(ctxt->sax); 8395 ctxt->sax = sax; 8396 ctxt->userData = NULL; 8397 } 8398 8399 /* 8400 * Ask the Entity resolver to load the damn thing 8401 */ 8402 8403 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8404 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8405 if (input == NULL) { 8406 if (sax != NULL) ctxt->sax = NULL; 8407 xmlFreeParserCtxt(ctxt); 8408 return(NULL); 8409 } 8410 8411 /* 8412 * plug some encoding conversion routines here. 8413 */ 8414 xmlPushInput(ctxt, input); 8415 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 8416 xmlSwitchEncoding(ctxt, enc); 8417 8418 if (input->filename == NULL) 8419 input->filename = (char *) xmlStrdup(SystemID); 8420 input->line = 1; 8421 input->col = 1; 8422 input->base = ctxt->input->cur; 8423 input->cur = ctxt->input->cur; 8424 input->free = NULL; 8425 8426 /* 8427 * let's parse that entity knowing it's an external subset. 8428 */ 8429 ctxt->inSubset = 2; 8430 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8431 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8432 ExternalID, SystemID); 8433 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8434 8435 if (ctxt->myDoc != NULL) { 8436 if (ctxt->wellFormed) { 8437 ret = ctxt->myDoc->extSubset; 8438 ctxt->myDoc->extSubset = NULL; 8439 } else { 8440 ret = NULL; 8441 } 8442 xmlFreeDoc(ctxt->myDoc); 8443 ctxt->myDoc = NULL; 8444 } 8445 if (sax != NULL) ctxt->sax = NULL; 8446 xmlFreeParserCtxt(ctxt); 8447 8448 return(ret); 8449} 8450 8451/** 8452 * xmlParseDTD: 8453 * @ExternalID: a NAME* containing the External ID of the DTD 8454 * @SystemID: a NAME* containing the URL to the DTD 8455 * 8456 * Load and parse an external subset. 8457 * 8458 * Returns the resulting xmlDtdPtr or NULL in case of error. 8459 */ 8460 8461xmlDtdPtr 8462xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 8463 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 8464} 8465 8466/************************************************************************ 8467 * * 8468 * Front ends when parsing an Entity * 8469 * * 8470 ************************************************************************/ 8471 8472/** 8473 * xmlSAXParseBalancedChunk: 8474 * @ctx: an XML parser context (possibly NULL) 8475 * @sax: the SAX handler bloc (possibly NULL) 8476 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8477 * @input: a parser input stream 8478 * @enc: the encoding 8479 * 8480 * Parse a well-balanced chunk of an XML document 8481 * The user has to provide SAX callback block whose routines will be 8482 * called by the parser 8483 * The allowed sequence for the Well Balanced Chunk is the one defined by 8484 * the content production in the XML grammar: 8485 * 8486 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8487 * 8488 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 8489 * the error code otherwise 8490 */ 8491 8492int 8493xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax, 8494 void *user_data, xmlParserInputPtr input, 8495 xmlCharEncoding enc) { 8496 xmlParserCtxtPtr ctxt; 8497 int ret; 8498 8499 if (input == NULL) return(-1); 8500 8501 if (ctx != NULL) 8502 ctxt = ctx; 8503 else { 8504 ctxt = xmlNewParserCtxt(); 8505 if (ctxt == NULL) 8506 return(-1); 8507 if (sax == NULL) 8508 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8509 } 8510 8511 /* 8512 * Set-up the SAX context 8513 */ 8514 if (sax != NULL) { 8515 if (ctxt->sax != NULL) 8516 xmlFree(ctxt->sax); 8517 ctxt->sax = sax; 8518 ctxt->userData = user_data; 8519 } 8520 8521 /* 8522 * plug some encoding conversion routines here. 8523 */ 8524 xmlPushInput(ctxt, input); 8525 if (enc != XML_CHAR_ENCODING_NONE) 8526 xmlSwitchEncoding(ctxt, enc); 8527 8528 /* 8529 * let's parse that entity knowing it's an external subset. 8530 */ 8531 xmlParseContent(ctxt); 8532 ret = ctxt->errNo; 8533 8534 if (ctx == NULL) { 8535 if (sax != NULL) 8536 ctxt->sax = NULL; 8537 else 8538 xmlFreeDoc(ctxt->myDoc); 8539 xmlFreeParserCtxt(ctxt); 8540 } 8541 return(ret); 8542} 8543 8544/** 8545 * xmlParseCtxtExternalEntity: 8546 * @ctx: the existing parsing context 8547 * @URL: the URL for the entity to load 8548 * @ID: the System ID for the entity to load 8549 * @list: the return value for the set of parsed nodes 8550 * 8551 * Parse an external general entity within an existing parsing context 8552 * An external general parsed entity is well-formed if it matches the 8553 * production labeled extParsedEnt. 8554 * 8555 * [78] extParsedEnt ::= TextDecl? content 8556 * 8557 * Returns 0 if the entity is well formed, -1 in case of args problem and 8558 * the parser error code otherwise 8559 */ 8560 8561int 8562xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 8563 const xmlChar *ID, xmlNodePtr *list) { 8564 xmlParserCtxtPtr ctxt; 8565 xmlDocPtr newDoc; 8566 xmlSAXHandlerPtr oldsax = NULL; 8567 int ret = 0; 8568 8569 if (ctx->depth > 40) { 8570 return(XML_ERR_ENTITY_LOOP); 8571 } 8572 8573 if (list != NULL) 8574 *list = NULL; 8575 if ((URL == NULL) && (ID == NULL)) 8576 return(-1); 8577 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 8578 return(-1); 8579 8580 8581 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8582 if (ctxt == NULL) return(-1); 8583 ctxt->userData = ctxt; 8584 oldsax = ctxt->sax; 8585 ctxt->sax = ctx->sax; 8586 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8587 if (newDoc == NULL) { 8588 xmlFreeParserCtxt(ctxt); 8589 return(-1); 8590 } 8591 if (ctx->myDoc != NULL) { 8592 newDoc->intSubset = ctx->myDoc->intSubset; 8593 newDoc->extSubset = ctx->myDoc->extSubset; 8594 } 8595 if (ctx->myDoc->URL != NULL) { 8596 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 8597 } 8598 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8599 if (newDoc->children == NULL) { 8600 ctxt->sax = oldsax; 8601 xmlFreeParserCtxt(ctxt); 8602 newDoc->intSubset = NULL; 8603 newDoc->extSubset = NULL; 8604 xmlFreeDoc(newDoc); 8605 return(-1); 8606 } 8607 nodePush(ctxt, newDoc->children); 8608 if (ctx->myDoc == NULL) { 8609 ctxt->myDoc = newDoc; 8610 } else { 8611 ctxt->myDoc = ctx->myDoc; 8612 newDoc->children->doc = ctx->myDoc; 8613 } 8614 8615 /* 8616 * Parse a possible text declaration first 8617 */ 8618 GROW; 8619 if ((RAW == '<') && (NXT(1) == '?') && 8620 (NXT(2) == 'x') && (NXT(3) == 'm') && 8621 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8622 xmlParseTextDecl(ctxt); 8623 } 8624 8625 /* 8626 * Doing validity checking on chunk doesn't make sense 8627 */ 8628 ctxt->instate = XML_PARSER_CONTENT; 8629 ctxt->validate = ctx->validate; 8630 ctxt->depth = ctx->depth + 1; 8631 ctxt->replaceEntities = ctx->replaceEntities; 8632 if (ctxt->validate) { 8633 ctxt->vctxt.error = ctx->vctxt.error; 8634 ctxt->vctxt.warning = ctx->vctxt.warning; 8635 /* Allocate the Node stack */ 8636 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); 8637 if (ctxt->vctxt.nodeTab == NULL) { 8638 xmlGenericError(xmlGenericErrorContext, 8639 "xmlParseCtxtExternalEntity: out of memory\n"); 8640 ctxt->validate = 0; 8641 ctxt->vctxt.error = NULL; 8642 ctxt->vctxt.warning = NULL; 8643 } else { 8644 ctxt->vctxt.nodeNr = 0; 8645 ctxt->vctxt.nodeMax = 4; 8646 ctxt->vctxt.node = NULL; 8647 } 8648 } else { 8649 ctxt->vctxt.error = NULL; 8650 ctxt->vctxt.warning = NULL; 8651 } 8652 8653 xmlParseContent(ctxt); 8654 8655 if ((RAW == '<') && (NXT(1) == '/')) { 8656 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8658 ctxt->sax->error(ctxt->userData, 8659 "chunk is not well balanced\n"); 8660 ctxt->wellFormed = 0; 8661 ctxt->disableSAX = 1; 8662 } else if (RAW != 0) { 8663 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8665 ctxt->sax->error(ctxt->userData, 8666 "extra content at the end of well balanced chunk\n"); 8667 ctxt->wellFormed = 0; 8668 ctxt->disableSAX = 1; 8669 } 8670 if (ctxt->node != newDoc->children) { 8671 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8673 ctxt->sax->error(ctxt->userData, 8674 "chunk is not well balanced\n"); 8675 ctxt->wellFormed = 0; 8676 ctxt->disableSAX = 1; 8677 } 8678 8679 if (!ctxt->wellFormed) { 8680 if (ctxt->errNo == 0) 8681 ret = 1; 8682 else 8683 ret = ctxt->errNo; 8684 } else { 8685 if (list != NULL) { 8686 xmlNodePtr cur; 8687 8688 /* 8689 * Return the newly created nodeset after unlinking it from 8690 * they pseudo parent. 8691 */ 8692 cur = newDoc->children->children; 8693 *list = cur; 8694 while (cur != NULL) { 8695 cur->parent = NULL; 8696 cur = cur->next; 8697 } 8698 newDoc->children->children = NULL; 8699 } 8700 ret = 0; 8701 } 8702 ctxt->sax = oldsax; 8703 xmlFreeParserCtxt(ctxt); 8704 newDoc->intSubset = NULL; 8705 newDoc->extSubset = NULL; 8706 xmlFreeDoc(newDoc); 8707 8708 return(ret); 8709} 8710 8711/** 8712 * xmlParseExternalEntity: 8713 * @doc: the document the chunk pertains to 8714 * @sax: the SAX handler bloc (possibly NULL) 8715 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8716 * @depth: Used for loop detection, use 0 8717 * @URL: the URL for the entity to load 8718 * @ID: the System ID for the entity to load 8719 * @list: the return value for the set of parsed nodes 8720 * 8721 * Parse an external general entity 8722 * An external general parsed entity is well-formed if it matches the 8723 * production labeled extParsedEnt. 8724 * 8725 * [78] extParsedEnt ::= TextDecl? content 8726 * 8727 * Returns 0 if the entity is well formed, -1 in case of args problem and 8728 * the parser error code otherwise 8729 */ 8730 8731int 8732xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 8733 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) { 8734 xmlParserCtxtPtr ctxt; 8735 xmlDocPtr newDoc; 8736 xmlSAXHandlerPtr oldsax = NULL; 8737 int ret = 0; 8738 8739 if (depth > 40) { 8740 return(XML_ERR_ENTITY_LOOP); 8741 } 8742 8743 8744 8745 if (list != NULL) 8746 *list = NULL; 8747 if ((URL == NULL) && (ID == NULL)) 8748 return(-1); 8749 if (doc == NULL) /* @@ relax but check for dereferences */ 8750 return(-1); 8751 8752 8753 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8754 if (ctxt == NULL) return(-1); 8755 ctxt->userData = ctxt; 8756 if (sax != NULL) { 8757 oldsax = ctxt->sax; 8758 ctxt->sax = sax; 8759 if (user_data != NULL) 8760 ctxt->userData = user_data; 8761 } 8762 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8763 if (newDoc == NULL) { 8764 xmlFreeParserCtxt(ctxt); 8765 return(-1); 8766 } 8767 if (doc != NULL) { 8768 newDoc->intSubset = doc->intSubset; 8769 newDoc->extSubset = doc->extSubset; 8770 } 8771 if (doc->URL != NULL) { 8772 newDoc->URL = xmlStrdup(doc->URL); 8773 } 8774 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8775 if (newDoc->children == NULL) { 8776 if (sax != NULL) 8777 ctxt->sax = oldsax; 8778 xmlFreeParserCtxt(ctxt); 8779 newDoc->intSubset = NULL; 8780 newDoc->extSubset = NULL; 8781 xmlFreeDoc(newDoc); 8782 return(-1); 8783 } 8784 nodePush(ctxt, newDoc->children); 8785 if (doc == NULL) { 8786 ctxt->myDoc = newDoc; 8787 } else { 8788 ctxt->myDoc = doc; 8789 newDoc->children->doc = doc; 8790 } 8791 8792 /* 8793 * Parse a possible text declaration first 8794 */ 8795 GROW; 8796 if ((RAW == '<') && (NXT(1) == '?') && 8797 (NXT(2) == 'x') && (NXT(3) == 'm') && 8798 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8799 xmlParseTextDecl(ctxt); 8800 } 8801 8802 /* 8803 * Doing validity checking on chunk doesn't make sense 8804 */ 8805 ctxt->instate = XML_PARSER_CONTENT; 8806 ctxt->validate = 0; 8807 ctxt->depth = depth; 8808 8809 xmlParseContent(ctxt); 8810 8811 if ((RAW == '<') && (NXT(1) == '/')) { 8812 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8814 ctxt->sax->error(ctxt->userData, 8815 "chunk is not well balanced\n"); 8816 ctxt->wellFormed = 0; 8817 ctxt->disableSAX = 1; 8818 } else if (RAW != 0) { 8819 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8821 ctxt->sax->error(ctxt->userData, 8822 "extra content at the end of well balanced chunk\n"); 8823 ctxt->wellFormed = 0; 8824 ctxt->disableSAX = 1; 8825 } 8826 if (ctxt->node != newDoc->children) { 8827 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8829 ctxt->sax->error(ctxt->userData, 8830 "chunk is not well balanced\n"); 8831 ctxt->wellFormed = 0; 8832 ctxt->disableSAX = 1; 8833 } 8834 8835 if (!ctxt->wellFormed) { 8836 if (ctxt->errNo == 0) 8837 ret = 1; 8838 else 8839 ret = ctxt->errNo; 8840 } else { 8841 if (list != NULL) { 8842 xmlNodePtr cur; 8843 8844 /* 8845 * Return the newly created nodeset after unlinking it from 8846 * they pseudo parent. 8847 */ 8848 cur = newDoc->children->children; 8849 *list = cur; 8850 while (cur != NULL) { 8851 cur->parent = NULL; 8852 cur = cur->next; 8853 } 8854 newDoc->children->children = NULL; 8855 } 8856 ret = 0; 8857 } 8858 if (sax != NULL) 8859 ctxt->sax = oldsax; 8860 xmlFreeParserCtxt(ctxt); 8861 newDoc->intSubset = NULL; 8862 newDoc->extSubset = NULL; 8863 xmlFreeDoc(newDoc); 8864 8865 return(ret); 8866} 8867 8868/** 8869 * xmlParseBalancedChunk: 8870 * @doc: the document the chunk pertains to 8871 * @sax: the SAX handler bloc (possibly NULL) 8872 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8873 * @depth: Used for loop detection, use 0 8874 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 8875 * @list: the return value for the set of parsed nodes 8876 * 8877 * Parse a well-balanced chunk of an XML document 8878 * called by the parser 8879 * The allowed sequence for the Well Balanced Chunk is the one defined by 8880 * the content production in the XML grammar: 8881 * 8882 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8883 * 8884 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 8885 * the parser error code otherwise 8886 */ 8887 8888int 8889xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 8890 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) { 8891 xmlParserCtxtPtr ctxt; 8892 xmlDocPtr newDoc; 8893 xmlSAXHandlerPtr oldsax = NULL; 8894 int size; 8895 int ret = 0; 8896 8897 if (depth > 40) { 8898 return(XML_ERR_ENTITY_LOOP); 8899 } 8900 8901 8902 if (list != NULL) 8903 *list = NULL; 8904 if (string == NULL) 8905 return(-1); 8906 8907 size = xmlStrlen(string); 8908 8909 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 8910 if (ctxt == NULL) return(-1); 8911 ctxt->userData = ctxt; 8912 if (sax != NULL) { 8913 oldsax = ctxt->sax; 8914 ctxt->sax = sax; 8915 if (user_data != NULL) 8916 ctxt->userData = user_data; 8917 } 8918 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8919 if (newDoc == NULL) { 8920 xmlFreeParserCtxt(ctxt); 8921 return(-1); 8922 } 8923 if (doc != NULL) { 8924 newDoc->intSubset = doc->intSubset; 8925 newDoc->extSubset = doc->extSubset; 8926 } 8927 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8928 if (newDoc->children == NULL) { 8929 if (sax != NULL) 8930 ctxt->sax = oldsax; 8931 xmlFreeParserCtxt(ctxt); 8932 newDoc->intSubset = NULL; 8933 newDoc->extSubset = NULL; 8934 xmlFreeDoc(newDoc); 8935 return(-1); 8936 } 8937 nodePush(ctxt, newDoc->children); 8938 if (doc == NULL) { 8939 ctxt->myDoc = newDoc; 8940 } else { 8941 ctxt->myDoc = doc; 8942 newDoc->children->doc = doc; 8943 } 8944 ctxt->instate = XML_PARSER_CONTENT; 8945 ctxt->depth = depth; 8946 8947 /* 8948 * Doing validity checking on chunk doesn't make sense 8949 */ 8950 ctxt->validate = 0; 8951 8952 xmlParseContent(ctxt); 8953 8954 if ((RAW == '<') && (NXT(1) == '/')) { 8955 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8957 ctxt->sax->error(ctxt->userData, 8958 "chunk is not well balanced\n"); 8959 ctxt->wellFormed = 0; 8960 ctxt->disableSAX = 1; 8961 } else if (RAW != 0) { 8962 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8964 ctxt->sax->error(ctxt->userData, 8965 "extra content at the end of well balanced chunk\n"); 8966 ctxt->wellFormed = 0; 8967 ctxt->disableSAX = 1; 8968 } 8969 if (ctxt->node != newDoc->children) { 8970 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8972 ctxt->sax->error(ctxt->userData, 8973 "chunk is not well balanced\n"); 8974 ctxt->wellFormed = 0; 8975 ctxt->disableSAX = 1; 8976 } 8977 8978 if (!ctxt->wellFormed) { 8979 if (ctxt->errNo == 0) 8980 ret = 1; 8981 else 8982 ret = ctxt->errNo; 8983 } else { 8984 if (list != NULL) { 8985 xmlNodePtr cur; 8986 8987 /* 8988 * Return the newly created nodeset after unlinking it from 8989 * they pseudo parent. 8990 */ 8991 cur = newDoc->children->children; 8992 *list = cur; 8993 while (cur != NULL) { 8994 cur->parent = NULL; 8995 cur = cur->next; 8996 } 8997 newDoc->children->children = NULL; 8998 } 8999 ret = 0; 9000 } 9001 if (sax != NULL) 9002 ctxt->sax = oldsax; 9003 xmlFreeParserCtxt(ctxt); 9004 newDoc->intSubset = NULL; 9005 newDoc->extSubset = NULL; 9006 xmlFreeDoc(newDoc); 9007 9008 return(ret); 9009} 9010 9011/** 9012 * xmlSAXParseEntity: 9013 * @sax: the SAX handler block 9014 * @filename: the filename 9015 * 9016 * parse an XML external entity out of context and build a tree. 9017 * It use the given SAX function block to handle the parsing callback. 9018 * If sax is NULL, fallback to the default DOM tree building routines. 9019 * 9020 * [78] extParsedEnt ::= TextDecl? content 9021 * 9022 * This correspond to a "Well Balanced" chunk 9023 * 9024 * Returns the resulting document tree 9025 */ 9026 9027xmlDocPtr 9028xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9029 xmlDocPtr ret; 9030 xmlParserCtxtPtr ctxt; 9031 char *directory = NULL; 9032 9033 ctxt = xmlCreateFileParserCtxt(filename); 9034 if (ctxt == NULL) { 9035 return(NULL); 9036 } 9037 if (sax != NULL) { 9038 if (ctxt->sax != NULL) 9039 xmlFree(ctxt->sax); 9040 ctxt->sax = sax; 9041 ctxt->userData = NULL; 9042 } 9043 9044 if ((ctxt->directory == NULL) && (directory == NULL)) 9045 directory = xmlParserGetDirectory(filename); 9046 9047 xmlParseExtParsedEnt(ctxt); 9048 9049 if (ctxt->wellFormed) 9050 ret = ctxt->myDoc; 9051 else { 9052 ret = NULL; 9053 xmlFreeDoc(ctxt->myDoc); 9054 ctxt->myDoc = NULL; 9055 } 9056 if (sax != NULL) 9057 ctxt->sax = NULL; 9058 xmlFreeParserCtxt(ctxt); 9059 9060 return(ret); 9061} 9062 9063/** 9064 * xmlParseEntity: 9065 * @filename: the filename 9066 * 9067 * parse an XML external entity out of context and build a tree. 9068 * 9069 * [78] extParsedEnt ::= TextDecl? content 9070 * 9071 * This correspond to a "Well Balanced" chunk 9072 * 9073 * Returns the resulting document tree 9074 */ 9075 9076xmlDocPtr 9077xmlParseEntity(const char *filename) { 9078 return(xmlSAXParseEntity(NULL, filename)); 9079} 9080 9081/** 9082 * xmlCreateEntityParserCtxt: 9083 * @URL: the entity URL 9084 * @ID: the entity PUBLIC ID 9085 * @base: a posible base for the target URI 9086 * 9087 * Create a parser context for an external entity 9088 * Automatic support for ZLIB/Compress compressed document is provided 9089 * by default if found at compile-time. 9090 * 9091 * Returns the new parser context or NULL 9092 */ 9093xmlParserCtxtPtr 9094xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9095 const xmlChar *base) { 9096 xmlParserCtxtPtr ctxt; 9097 xmlParserInputPtr inputStream; 9098 char *directory = NULL; 9099 xmlChar *uri; 9100 9101 ctxt = xmlNewParserCtxt(); 9102 if (ctxt == NULL) { 9103 return(NULL); 9104 } 9105 9106 uri = xmlBuildURI(URL, base); 9107 9108 if (uri == NULL) { 9109 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9110 if (inputStream == NULL) { 9111 xmlFreeParserCtxt(ctxt); 9112 return(NULL); 9113 } 9114 9115 inputPush(ctxt, inputStream); 9116 9117 if ((ctxt->directory == NULL) && (directory == NULL)) 9118 directory = xmlParserGetDirectory((char *)URL); 9119 if ((ctxt->directory == NULL) && (directory != NULL)) 9120 ctxt->directory = directory; 9121 } else { 9122 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9123 if (inputStream == NULL) { 9124 xmlFreeParserCtxt(ctxt); 9125 return(NULL); 9126 } 9127 9128 inputPush(ctxt, inputStream); 9129 9130 if ((ctxt->directory == NULL) && (directory == NULL)) 9131 directory = xmlParserGetDirectory((char *)uri); 9132 if ((ctxt->directory == NULL) && (directory != NULL)) 9133 ctxt->directory = directory; 9134 xmlFree(uri); 9135 } 9136 9137 return(ctxt); 9138} 9139 9140/************************************************************************ 9141 * * 9142 * Front ends when parsing from a file * 9143 * * 9144 ************************************************************************/ 9145 9146/** 9147 * xmlCreateFileParserCtxt: 9148 * @filename: the filename 9149 * 9150 * Create a parser context for a file content. 9151 * Automatic support for ZLIB/Compress compressed document is provided 9152 * by default if found at compile-time. 9153 * 9154 * Returns the new parser context or NULL 9155 */ 9156xmlParserCtxtPtr 9157xmlCreateFileParserCtxt(const char *filename) 9158{ 9159 xmlParserCtxtPtr ctxt; 9160 xmlParserInputPtr inputStream; 9161 xmlParserInputBufferPtr buf; 9162 char *directory = NULL; 9163 9164 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 9165 if (buf == NULL) { 9166 return(NULL); 9167 } 9168 9169 ctxt = xmlNewParserCtxt(); 9170 if (ctxt == NULL) { 9171 if (xmlDefaultSAXHandler.error != NULL) { 9172 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9173 } 9174 return(NULL); 9175 } 9176 9177 inputStream = xmlNewInputStream(ctxt); 9178 if (inputStream == NULL) { 9179 xmlFreeParserCtxt(ctxt); 9180 return(NULL); 9181 } 9182 9183 inputStream->filename = xmlMemStrdup(filename); 9184 inputStream->buf = buf; 9185 inputStream->base = inputStream->buf->buffer->content; 9186 inputStream->cur = inputStream->buf->buffer->content; 9187 9188 inputPush(ctxt, inputStream); 9189 if ((ctxt->directory == NULL) && (directory == NULL)) 9190 directory = xmlParserGetDirectory(filename); 9191 if ((ctxt->directory == NULL) && (directory != NULL)) 9192 ctxt->directory = directory; 9193 9194 return(ctxt); 9195} 9196 9197/** 9198 * xmlSAXParseFile: 9199 * @sax: the SAX handler block 9200 * @filename: the filename 9201 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9202 * documents 9203 * 9204 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9205 * compressed document is provided by default if found at compile-time. 9206 * It use the given SAX function block to handle the parsing callback. 9207 * If sax is NULL, fallback to the default DOM tree building routines. 9208 * 9209 * Returns the resulting document tree 9210 */ 9211 9212xmlDocPtr 9213xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9214 int recovery) { 9215 xmlDocPtr ret; 9216 xmlParserCtxtPtr ctxt; 9217 char *directory = NULL; 9218 9219 ctxt = xmlCreateFileParserCtxt(filename); 9220 if (ctxt == NULL) { 9221 return(NULL); 9222 } 9223 if (sax != NULL) { 9224 if (ctxt->sax != NULL) 9225 xmlFree(ctxt->sax); 9226 ctxt->sax = sax; 9227 ctxt->userData = NULL; 9228 } 9229 9230 if ((ctxt->directory == NULL) && (directory == NULL)) 9231 directory = xmlParserGetDirectory(filename); 9232 if ((ctxt->directory == NULL) && (directory != NULL)) 9233 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9234 9235 xmlParseDocument(ctxt); 9236 9237 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9238 else { 9239 ret = NULL; 9240 xmlFreeDoc(ctxt->myDoc); 9241 ctxt->myDoc = NULL; 9242 } 9243 if (sax != NULL) 9244 ctxt->sax = NULL; 9245 xmlFreeParserCtxt(ctxt); 9246 9247 return(ret); 9248} 9249 9250/** 9251 * xmlRecoverDoc: 9252 * @cur: a pointer to an array of xmlChar 9253 * 9254 * parse an XML in-memory document and build a tree. 9255 * In the case the document is not Well Formed, a tree is built anyway 9256 * 9257 * Returns the resulting document tree 9258 */ 9259 9260xmlDocPtr 9261xmlRecoverDoc(xmlChar *cur) { 9262 return(xmlSAXParseDoc(NULL, cur, 1)); 9263} 9264 9265/** 9266 * xmlParseFile: 9267 * @filename: the filename 9268 * 9269 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9270 * compressed document is provided by default if found at compile-time. 9271 * 9272 * Returns the resulting document tree 9273 */ 9274 9275xmlDocPtr 9276xmlParseFile(const char *filename) { 9277 return(xmlSAXParseFile(NULL, filename, 0)); 9278} 9279 9280/** 9281 * xmlRecoverFile: 9282 * @filename: the filename 9283 * 9284 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9285 * compressed document is provided by default if found at compile-time. 9286 * In the case the document is not Well Formed, a tree is built anyway 9287 * 9288 * Returns the resulting document tree 9289 */ 9290 9291xmlDocPtr 9292xmlRecoverFile(const char *filename) { 9293 return(xmlSAXParseFile(NULL, filename, 1)); 9294} 9295 9296 9297/** 9298 * xmlSetupParserForBuffer: 9299 * @ctxt: an XML parser context 9300 * @buffer: a xmlChar * buffer 9301 * @filename: a file name 9302 * 9303 * Setup the parser context to parse a new buffer; Clears any prior 9304 * contents from the parser context. The buffer parameter must not be 9305 * NULL, but the filename parameter can be 9306 */ 9307void 9308xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9309 const char* filename) 9310{ 9311 xmlParserInputPtr input; 9312 9313 input = xmlNewInputStream(ctxt); 9314 if (input == NULL) { 9315 perror("malloc"); 9316 xmlFree(ctxt); 9317 return; 9318 } 9319 9320 xmlClearParserCtxt(ctxt); 9321 if (filename != NULL) 9322 input->filename = xmlMemStrdup(filename); 9323 input->base = buffer; 9324 input->cur = buffer; 9325 inputPush(ctxt, input); 9326} 9327 9328/** 9329 * xmlSAXUserParseFile: 9330 * @sax: a SAX handler 9331 * @user_data: The user data returned on SAX callbacks 9332 * @filename: a file name 9333 * 9334 * parse an XML file and call the given SAX handler routines. 9335 * Automatic support for ZLIB/Compress compressed document is provided 9336 * 9337 * Returns 0 in case of success or a error number otherwise 9338 */ 9339int 9340xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 9341 const char *filename) { 9342 int ret = 0; 9343 xmlParserCtxtPtr ctxt; 9344 9345 ctxt = xmlCreateFileParserCtxt(filename); 9346 if (ctxt == NULL) return -1; 9347 if (ctxt->sax != &xmlDefaultSAXHandler) 9348 xmlFree(ctxt->sax); 9349 ctxt->sax = sax; 9350 if (user_data != NULL) 9351 ctxt->userData = user_data; 9352 9353 xmlParseDocument(ctxt); 9354 9355 if (ctxt->wellFormed) 9356 ret = 0; 9357 else { 9358 if (ctxt->errNo != 0) 9359 ret = ctxt->errNo; 9360 else 9361 ret = -1; 9362 } 9363 if (sax != NULL) 9364 ctxt->sax = NULL; 9365 xmlFreeParserCtxt(ctxt); 9366 9367 return ret; 9368} 9369 9370/************************************************************************ 9371 * * 9372 * Front ends when parsing from memory * 9373 * * 9374 ************************************************************************/ 9375 9376/** 9377 * xmlCreateMemoryParserCtxt: 9378 * @buffer: a pointer to a char array 9379 * @size: the size of the array 9380 * 9381 * Create a parser context for an XML in-memory document. 9382 * 9383 * Returns the new parser context or NULL 9384 */ 9385xmlParserCtxtPtr 9386xmlCreateMemoryParserCtxt(char *buffer, int size) { 9387 xmlParserCtxtPtr ctxt; 9388 xmlParserInputPtr input; 9389 xmlParserInputBufferPtr buf; 9390 9391 if (buffer == NULL) 9392 return(NULL); 9393 if (size <= 0) 9394 return(NULL); 9395 9396 ctxt = xmlNewParserCtxt(); 9397 if (ctxt == NULL) 9398 return(NULL); 9399 9400 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 9401 if (buf == NULL) return(NULL); 9402 9403 input = xmlNewInputStream(ctxt); 9404 if (input == NULL) { 9405 xmlFreeParserCtxt(ctxt); 9406 return(NULL); 9407 } 9408 9409 input->filename = NULL; 9410 input->buf = buf; 9411 input->base = input->buf->buffer->content; 9412 input->cur = input->buf->buffer->content; 9413 9414 inputPush(ctxt, input); 9415 return(ctxt); 9416} 9417 9418/** 9419 * xmlSAXParseMemory: 9420 * @sax: the SAX handler block 9421 * @buffer: an pointer to a char array 9422 * @size: the size of the array 9423 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 9424 * documents 9425 * 9426 * parse an XML in-memory block and use the given SAX function block 9427 * to handle the parsing callback. If sax is NULL, fallback to the default 9428 * DOM tree building routines. 9429 * 9430 * Returns the resulting document tree 9431 */ 9432xmlDocPtr 9433xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) { 9434 xmlDocPtr ret; 9435 xmlParserCtxtPtr ctxt; 9436 9437 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9438 if (ctxt == NULL) return(NULL); 9439 if (sax != NULL) { 9440 ctxt->sax = sax; 9441 ctxt->userData = NULL; 9442 } 9443 9444 xmlParseDocument(ctxt); 9445 9446 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9447 else { 9448 ret = NULL; 9449 xmlFreeDoc(ctxt->myDoc); 9450 ctxt->myDoc = NULL; 9451 } 9452 if (sax != NULL) 9453 ctxt->sax = NULL; 9454 xmlFreeParserCtxt(ctxt); 9455 9456 return(ret); 9457} 9458 9459/** 9460 * xmlParseMemory: 9461 * @buffer: an pointer to a char array 9462 * @size: the size of the array 9463 * 9464 * parse an XML in-memory block and build a tree. 9465 * 9466 * Returns the resulting document tree 9467 */ 9468 9469xmlDocPtr xmlParseMemory(char *buffer, int size) { 9470 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 9471} 9472 9473/** 9474 * xmlRecoverMemory: 9475 * @buffer: an pointer to a char array 9476 * @size: the size of the array 9477 * 9478 * parse an XML in-memory block and build a tree. 9479 * In the case the document is not Well Formed, a tree is built anyway 9480 * 9481 * Returns the resulting document tree 9482 */ 9483 9484xmlDocPtr xmlRecoverMemory(char *buffer, int size) { 9485 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 9486} 9487 9488/** 9489 * xmlSAXUserParseMemory: 9490 * @sax: a SAX handler 9491 * @user_data: The user data returned on SAX callbacks 9492 * @buffer: an in-memory XML document input 9493 * @size: the length of the XML document in bytes 9494 * 9495 * A better SAX parsing routine. 9496 * parse an XML in-memory buffer and call the given SAX handler routines. 9497 * 9498 * Returns 0 in case of success or a error number otherwise 9499 */ 9500int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 9501 char *buffer, int size) { 9502 int ret = 0; 9503 xmlParserCtxtPtr ctxt; 9504 xmlSAXHandlerPtr oldsax = NULL; 9505 9506 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9507 if (ctxt == NULL) return -1; 9508 if (sax != NULL) { 9509 oldsax = ctxt->sax; 9510 ctxt->sax = sax; 9511 } 9512 ctxt->userData = user_data; 9513 9514 xmlParseDocument(ctxt); 9515 9516 if (ctxt->wellFormed) 9517 ret = 0; 9518 else { 9519 if (ctxt->errNo != 0) 9520 ret = ctxt->errNo; 9521 else 9522 ret = -1; 9523 } 9524 if (sax != NULL) { 9525 ctxt->sax = oldsax; 9526 } 9527 xmlFreeParserCtxt(ctxt); 9528 9529 return ret; 9530} 9531 9532/** 9533 * xmlCreateDocParserCtxt: 9534 * @cur: a pointer to an array of xmlChar 9535 * 9536 * Creates a parser context for an XML in-memory document. 9537 * 9538 * Returns the new parser context or NULL 9539 */ 9540xmlParserCtxtPtr 9541xmlCreateDocParserCtxt(xmlChar *cur) { 9542 int len; 9543 9544 if (cur == NULL) 9545 return(NULL); 9546 len = xmlStrlen(cur); 9547 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 9548} 9549 9550/** 9551 * xmlSAXParseDoc: 9552 * @sax: the SAX handler block 9553 * @cur: a pointer to an array of xmlChar 9554 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9555 * documents 9556 * 9557 * parse an XML in-memory document and build a tree. 9558 * It use the given SAX function block to handle the parsing callback. 9559 * If sax is NULL, fallback to the default DOM tree building routines. 9560 * 9561 * Returns the resulting document tree 9562 */ 9563 9564xmlDocPtr 9565xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 9566 xmlDocPtr ret; 9567 xmlParserCtxtPtr ctxt; 9568 9569 if (cur == NULL) return(NULL); 9570 9571 9572 ctxt = xmlCreateDocParserCtxt(cur); 9573 if (ctxt == NULL) return(NULL); 9574 if (sax != NULL) { 9575 ctxt->sax = sax; 9576 ctxt->userData = NULL; 9577 } 9578 9579 xmlParseDocument(ctxt); 9580 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9581 else { 9582 ret = NULL; 9583 xmlFreeDoc(ctxt->myDoc); 9584 ctxt->myDoc = NULL; 9585 } 9586 if (sax != NULL) 9587 ctxt->sax = NULL; 9588 xmlFreeParserCtxt(ctxt); 9589 9590 return(ret); 9591} 9592 9593/** 9594 * xmlParseDoc: 9595 * @cur: a pointer to an array of xmlChar 9596 * 9597 * parse an XML in-memory document and build a tree. 9598 * 9599 * Returns the resulting document tree 9600 */ 9601 9602xmlDocPtr 9603xmlParseDoc(xmlChar *cur) { 9604 return(xmlSAXParseDoc(NULL, cur, 0)); 9605} 9606 9607 9608/************************************************************************ 9609 * * 9610 * Miscellaneous * 9611 * * 9612 ************************************************************************/ 9613 9614#ifdef LIBXML_XPATH_ENABLED 9615#include <libxml/xpath.h> 9616#endif 9617 9618static int xmlParserInitialized = 0; 9619 9620/** 9621 * xmlInitParser: 9622 * 9623 * Initialization function for the XML parser. 9624 * This is not reentrant. Call once before processing in case of 9625 * use in multithreaded programs. 9626 */ 9627 9628void 9629xmlInitParser(void) { 9630 if (xmlParserInitialized) return; 9631 9632 xmlInitCharEncodingHandlers(); 9633 xmlInitializePredefinedEntities(); 9634 xmlDefaultSAXHandlerInit(); 9635 xmlRegisterDefaultInputCallbacks(); 9636 xmlRegisterDefaultOutputCallbacks(); 9637#ifdef LIBXML_HTML_ENABLED 9638 htmlInitAutoClose(); 9639 htmlDefaultSAXHandlerInit(); 9640#endif 9641#ifdef LIBXML_XPATH_ENABLED 9642 xmlXPathInit(); 9643#endif 9644 xmlParserInitialized = 1; 9645} 9646 9647/** 9648 * xmlCleanupParser: 9649 * 9650 * Cleanup function for the XML parser. It tries to reclaim all 9651 * parsing related global memory allocated for the parser processing. 9652 * It doesn't deallocate any document related memory. Calling this 9653 * function should not prevent reusing the parser. 9654 */ 9655 9656void 9657xmlCleanupParser(void) { 9658 xmlParserInitialized = 0; 9659 xmlCleanupCharEncodingHandlers(); 9660 xmlCleanupPredefinedEntities(); 9661} 9662 9663/** 9664 * xmlPedanticParserDefault: 9665 * @val: int 0 or 1 9666 * 9667 * Set and return the previous value for enabling pedantic warnings. 9668 * 9669 * Returns the last value for 0 for no substitution, 1 for substitution. 9670 */ 9671 9672int 9673xmlPedanticParserDefault(int val) { 9674 int old = xmlPedanticParserDefaultValue; 9675 9676 xmlPedanticParserDefaultValue = val; 9677 return(old); 9678} 9679 9680/** 9681 * xmlSubstituteEntitiesDefault: 9682 * @val: int 0 or 1 9683 * 9684 * Set and return the previous value for default entity support. 9685 * Initially the parser always keep entity references instead of substituting 9686 * entity values in the output. This function has to be used to change the 9687 * default parser behaviour 9688 * SAX::subtituteEntities() has to be used for changing that on a file by 9689 * file basis. 9690 * 9691 * Returns the last value for 0 for no substitution, 1 for substitution. 9692 */ 9693 9694int 9695xmlSubstituteEntitiesDefault(int val) { 9696 int old = xmlSubstituteEntitiesDefaultValue; 9697 9698 xmlSubstituteEntitiesDefaultValue = val; 9699 return(old); 9700} 9701 9702/** 9703 * xmlKeepBlanksDefault: 9704 * @val: int 0 or 1 9705 * 9706 * Set and return the previous value for default blanks text nodes support. 9707 * The 1.x version of the parser used an heuristic to try to detect 9708 * ignorable white spaces. As a result the SAX callback was generating 9709 * ignorableWhitespace() callbacks instead of characters() one, and when 9710 * using the DOM output text nodes containing those blanks were not generated. 9711 * The 2.x and later version will switch to the XML standard way and 9712 * ignorableWhitespace() are only generated when running the parser in 9713 * validating mode and when the current element doesn't allow CDATA or 9714 * mixed content. 9715 * This function is provided as a way to force the standard behaviour 9716 * on 1.X libs and to switch back to the old mode for compatibility when 9717 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 9718 * by using xmlIsBlankNode() commodity function to detect the "empty" 9719 * nodes generated. 9720 * This value also affect autogeneration of indentation when saving code 9721 * if blanks sections are kept, indentation is not generated. 9722 * 9723 * Returns the last value for 0 for no substitution, 1 for substitution. 9724 */ 9725 9726int 9727xmlKeepBlanksDefault(int val) { 9728 int old = xmlKeepBlanksDefaultValue; 9729 9730 xmlKeepBlanksDefaultValue = val; 9731 xmlIndentTreeOutput = !val; 9732 return(old); 9733} 9734 9735