parser.c revision f6ed8bc7b2bd2d3d98ee37b4972b996142d5c89b
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscelaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalones functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#include "libxml.h" 34 35#ifdef WIN32 36#define XML_DIR_SEP '\\' 37#else 38#define XML_DIR_SEP '/' 39#endif 40 41#include <stdlib.h> 42#include <string.h> 43#include <libxml/xmlmemory.h> 44#include <libxml/tree.h> 45#include <libxml/parser.h> 46#include <libxml/parserInternals.h> 47#include <libxml/valid.h> 48#include <libxml/entities.h> 49#include <libxml/xmlerror.h> 50#include <libxml/encoding.h> 51#include <libxml/xmlIO.h> 52#include <libxml/uri.h> 53#ifdef LIBXML_CATALOG_ENABLED 54#include <libxml/catalog.h> 55#endif 56 57#ifdef HAVE_CTYPE_H 58#include <ctype.h> 59#endif 60#ifdef HAVE_STDLIB_H 61#include <stdlib.h> 62#endif 63#ifdef HAVE_SYS_STAT_H 64#include <sys/stat.h> 65#endif 66#ifdef HAVE_FCNTL_H 67#include <fcntl.h> 68#endif 69#ifdef HAVE_UNISTD_H 70#include <unistd.h> 71#endif 72#ifdef HAVE_ZLIB_H 73#include <zlib.h> 74#endif 75 76 77#define XML_PARSER_BIG_BUFFER_SIZE 300 78#define XML_PARSER_BUFFER_SIZE 100 79 80/* 81 * Various global defaults for parsing 82 */ 83int xmlParserDebugEntities = 0; 84 85/* 86 * List of XML prefixed PI allowed by W3C specs 87 */ 88 89const char *xmlW3CPIs[] = { 90 "xml-stylesheet", 91 NULL 92}; 93 94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 97 const xmlChar **str); 98 99static int 100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 101 xmlSAXHandlerPtr sax, 102 void *user_data, int depth, const xmlChar *URL, 103 const xmlChar *ID, xmlNodePtr *list); 104 105/************************************************************************ 106 * * 107 * Parser stacks related functions and macros * 108 * * 109 ************************************************************************/ 110 111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 112 const xmlChar ** str); 113 114/* 115 * Generic function for accessing stacks in the Parser Context 116 */ 117 118#define PUSH_AND_POP(scope, type, name) \ 119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 120 if (ctxt->name##Nr >= ctxt->name##Max) { \ 121 ctxt->name##Max *= 2; \ 122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 124 if (ctxt->name##Tab == NULL) { \ 125 xmlGenericError(xmlGenericErrorContext, \ 126 "realloc failed !\n"); \ 127 return(0); \ 128 } \ 129 } \ 130 ctxt->name##Tab[ctxt->name##Nr] = value; \ 131 ctxt->name = value; \ 132 return(ctxt->name##Nr++); \ 133} \ 134scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 135 type ret; \ 136 if (ctxt->name##Nr <= 0) return(0); \ 137 ctxt->name##Nr--; \ 138 if (ctxt->name##Nr > 0) \ 139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 140 else \ 141 ctxt->name = NULL; \ 142 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 143 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 144 return(ret); \ 145} \ 146 147/** 148 * inputPop: 149 * @ctxt: an XML parser context 150 * 151 * Pops the top parser input from the input stack 152 * 153 * Returns the input just removed 154 */ 155/** 156 * inputPush: 157 * @ctxt: an XML parser context 158 * @input: the parser input 159 * 160 * Pushes a new parser input on top of the input stack 161 */ 162/** 163 * namePop: 164 * @ctxt: an XML parser context 165 * 166 * Pops the top element name from the name stack 167 * 168 * Returns the name just removed 169 */ 170/** 171 * namePush: 172 * @ctxt: an XML parser context 173 * @name: the element name 174 * 175 * Pushes a new element name on top of the name stack 176 */ 177/** 178 * nodePop: 179 * @ctxt: an XML parser context 180 * 181 * Pops the top element node from the node stack 182 * 183 * Returns the node just removed 184 */ 185/** 186 * nodePush: 187 * @ctxt: an XML parser context 188 * @node: the element node 189 * 190 * Pushes a new element node on top of the node stack 191 */ 192/* 193 * Those macros actually generate the functions 194 */ 195PUSH_AND_POP(extern, xmlParserInputPtr, input) 196PUSH_AND_POP(extern, xmlNodePtr, node) 197PUSH_AND_POP(extern, xmlChar*, name) 198 199static int spacePush(xmlParserCtxtPtr ctxt, int val) { 200 if (ctxt->spaceNr >= ctxt->spaceMax) { 201 ctxt->spaceMax *= 2; 202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 204 if (ctxt->spaceTab == NULL) { 205 xmlGenericError(xmlGenericErrorContext, 206 "realloc failed !\n"); 207 return(0); 208 } 209 } 210 ctxt->spaceTab[ctxt->spaceNr] = val; 211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 212 return(ctxt->spaceNr++); 213} 214 215static int spacePop(xmlParserCtxtPtr ctxt) { 216 int ret; 217 if (ctxt->spaceNr <= 0) return(0); 218 ctxt->spaceNr--; 219 if (ctxt->spaceNr > 0) 220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 221 else 222 ctxt->space = NULL; 223 ret = ctxt->spaceTab[ctxt->spaceNr]; 224 ctxt->spaceTab[ctxt->spaceNr] = -1; 225 return(ret); 226} 227 228/* 229 * Macros for accessing the content. Those should be used only by the parser, 230 * and not exported. 231 * 232 * Dirty macros, i.e. one often need to make assumption on the context to 233 * use them 234 * 235 * CUR_PTR return the current pointer to the xmlChar to be parsed. 236 * To be used with extreme caution since operations consuming 237 * characters may move the input buffer to a different location ! 238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 239 * This should be used internally by the parser 240 * only to compare to ASCII values otherwise it would break when 241 * running with UTF-8 encoding. 242 * RAW same as CUR but in the input buffer, bypass any token 243 * extraction that may have been done 244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 245 * to compare on ASCII based substring. 246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 247 * strings within the parser. 248 * 249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 250 * 251 * NEXT Skip to the next character, this does the proper decoding 252 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 253 * NEXTL(l) Skip l xmlChars in the input buffer 254 * CUR_CHAR(l) returns the current unicode character (int), set l 255 * to the number of xmlChars used for the encoding [0-5]. 256 * CUR_SCHAR same but operate on a string instead of the context 257 * COPY_BUF copy the current unicode char to the target buffer, increment 258 * the index 259 * GROW, SHRINK handling of input buffers 260 */ 261 262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 264#define NXT(val) ctxt->input->cur[(val)] 265#define CUR_PTR ctxt->input->cur 266 267#define SKIP(val) do { \ 268 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 270 if ((*ctxt->input->cur == 0) && \ 271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 272 xmlPopInput(ctxt); \ 273 } while (0) 274 275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ 276 xmlParserInputShrink(ctxt->input); \ 277 if ((*ctxt->input->cur == 0) && \ 278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 279 xmlPopInput(ctxt); \ 280 } 281 282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ 283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 284 if ((*ctxt->input->cur == 0) && \ 285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 286 xmlPopInput(ctxt); \ 287 } 288 289#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 290 291#define NEXT xmlNextChar(ctxt) 292 293#define NEXT1 { \ 294 ctxt->input->cur++; \ 295 ctxt->nbChars++; \ 296 if (*ctxt->input->cur == 0) \ 297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 298 } 299 300#define NEXTL(l) do { \ 301 if (*(ctxt->input->cur) == '\n') { \ 302 ctxt->input->line++; ctxt->input->col = 1; \ 303 } else ctxt->input->col++; \ 304 ctxt->token = 0; ctxt->input->cur += l; \ 305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 306 } while (0) 307 308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 310 311#define COPY_BUF(l,b,i,v) \ 312 if (l == 1) b[i++] = (xmlChar) v; \ 313 else i += xmlCopyCharMultiByte(&b[i],v) 314 315/** 316 * xmlSkipBlankChars: 317 * @ctxt: the XML parser context 318 * 319 * skip all blanks character found at that point in the input streams. 320 * It pops up finished entities in the process if allowable at that point. 321 * 322 * Returns the number of space chars skipped 323 */ 324 325int 326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 327 int res = 0; 328 329 if (ctxt->token != 0) { 330 if (!IS_BLANK(ctxt->token)) 331 return(0); 332 ctxt->token = 0; 333 res++; 334 } 335 /* 336 * It's Okay to use CUR/NEXT here since all the blanks are on 337 * the ASCII range. 338 */ 339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 340 const xmlChar *cur; 341 /* 342 * if we are in the document content, go really fast 343 */ 344 cur = ctxt->input->cur; 345 while (IS_BLANK(*cur)) { 346 if (*cur == '\n') { 347 ctxt->input->line++; ctxt->input->col = 1; 348 } 349 cur++; 350 res++; 351 if (*cur == 0) { 352 ctxt->input->cur = cur; 353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 354 cur = ctxt->input->cur; 355 } 356 } 357 ctxt->input->cur = cur; 358 } else { 359 int cur; 360 do { 361 cur = CUR; 362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 363 NEXT; 364 cur = CUR; 365 res++; 366 } 367 while ((cur == 0) && (ctxt->inputNr > 1) && 368 (ctxt->instate != XML_PARSER_COMMENT)) { 369 xmlPopInput(ctxt); 370 cur = CUR; 371 } 372 /* 373 * Need to handle support of entities branching here 374 */ 375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 377 } 378 return(res); 379} 380 381/************************************************************************ 382 * * 383 * Commodity functions to handle entities * 384 * * 385 ************************************************************************/ 386 387/** 388 * xmlPopInput: 389 * @ctxt: an XML parser context 390 * 391 * xmlPopInput: the current input pointed by ctxt->input came to an end 392 * pop it and return the next char. 393 * 394 * Returns the current xmlChar in the parser context 395 */ 396xmlChar 397xmlPopInput(xmlParserCtxtPtr ctxt) { 398 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 399 if (xmlParserDebugEntities) 400 xmlGenericError(xmlGenericErrorContext, 401 "Popping input %d\n", ctxt->inputNr); 402 xmlFreeInputStream(inputPop(ctxt)); 403 if ((*ctxt->input->cur == 0) && 404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 405 return(xmlPopInput(ctxt)); 406 return(CUR); 407} 408 409/** 410 * xmlPushInput: 411 * @ctxt: an XML parser context 412 * @input: an XML parser input fragment (entity, XML fragment ...). 413 * 414 * xmlPushInput: switch to a new input stream which is stacked on top 415 * of the previous one(s). 416 */ 417void 418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 419 if (input == NULL) return; 420 421 if (xmlParserDebugEntities) { 422 if ((ctxt->input != NULL) && (ctxt->input->filename)) 423 xmlGenericError(xmlGenericErrorContext, 424 "%s(%d): ", ctxt->input->filename, 425 ctxt->input->line); 426 xmlGenericError(xmlGenericErrorContext, 427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 428 } 429 inputPush(ctxt, input); 430 GROW; 431} 432 433/** 434 * xmlParseCharRef: 435 * @ctxt: an XML parser context 436 * 437 * parse Reference declarations 438 * 439 * [66] CharRef ::= '&#' [0-9]+ ';' | 440 * '&#x' [0-9a-fA-F]+ ';' 441 * 442 * [ WFC: Legal Character ] 443 * Characters referred to using character references must match the 444 * production for Char. 445 * 446 * Returns the value parsed (as an int), 0 in case of error 447 */ 448int 449xmlParseCharRef(xmlParserCtxtPtr ctxt) { 450 unsigned int val = 0; 451 int count = 0; 452 453 if (ctxt->token != 0) { 454 val = ctxt->token; 455 ctxt->token = 0; 456 return(val); 457 } 458 /* 459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 460 */ 461 if ((RAW == '&') && (NXT(1) == '#') && 462 (NXT(2) == 'x')) { 463 SKIP(3); 464 GROW; 465 while (RAW != ';') { /* loop blocked by count */ 466 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 467 val = val * 16 + (CUR - '0'); 468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 469 val = val * 16 + (CUR - 'a') + 10; 470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 471 val = val * 16 + (CUR - 'A') + 10; 472 else { 473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 475 ctxt->sax->error(ctxt->userData, 476 "xmlParseCharRef: invalid hexadecimal value\n"); 477 ctxt->wellFormed = 0; 478 ctxt->disableSAX = 1; 479 val = 0; 480 break; 481 } 482 NEXT; 483 count++; 484 } 485 if (RAW == ';') { 486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 487 ctxt->nbChars ++; 488 ctxt->input->cur++; 489 } 490 } else if ((RAW == '&') && (NXT(1) == '#')) { 491 SKIP(2); 492 GROW; 493 while (RAW != ';') { /* loop blocked by count */ 494 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 495 val = val * 10 + (CUR - '0'); 496 else { 497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 499 ctxt->sax->error(ctxt->userData, 500 "xmlParseCharRef: invalid decimal value\n"); 501 ctxt->wellFormed = 0; 502 ctxt->disableSAX = 1; 503 val = 0; 504 break; 505 } 506 NEXT; 507 count++; 508 } 509 if (RAW == ';') { 510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 511 ctxt->nbChars ++; 512 ctxt->input->cur++; 513 } 514 } else { 515 ctxt->errNo = XML_ERR_INVALID_CHARREF; 516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 517 ctxt->sax->error(ctxt->userData, 518 "xmlParseCharRef: invalid value\n"); 519 ctxt->wellFormed = 0; 520 ctxt->disableSAX = 1; 521 } 522 523 /* 524 * [ WFC: Legal Character ] 525 * Characters referred to using character references must match the 526 * production for Char. 527 */ 528 if (IS_CHAR(val)) { 529 return(val); 530 } else { 531 ctxt->errNo = XML_ERR_INVALID_CHAR; 532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 534 val); 535 ctxt->wellFormed = 0; 536 ctxt->disableSAX = 1; 537 } 538 return(0); 539} 540 541/** 542 * xmlParseStringCharRef: 543 * @ctxt: an XML parser context 544 * @str: a pointer to an index in the string 545 * 546 * parse Reference declarations, variant parsing from a string rather 547 * than an an input flow. 548 * 549 * [66] CharRef ::= '&#' [0-9]+ ';' | 550 * '&#x' [0-9a-fA-F]+ ';' 551 * 552 * [ WFC: Legal Character ] 553 * Characters referred to using character references must match the 554 * production for Char. 555 * 556 * Returns the value parsed (as an int), 0 in case of error, str will be 557 * updated to the current value of the index 558 */ 559static int 560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 561 const xmlChar *ptr; 562 xmlChar cur; 563 int val = 0; 564 565 if ((str == NULL) || (*str == NULL)) return(0); 566 ptr = *str; 567 cur = *ptr; 568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 569 ptr += 3; 570 cur = *ptr; 571 while (cur != ';') { /* Non input consuming loop */ 572 if ((cur >= '0') && (cur <= '9')) 573 val = val * 16 + (cur - '0'); 574 else if ((cur >= 'a') && (cur <= 'f')) 575 val = val * 16 + (cur - 'a') + 10; 576 else if ((cur >= 'A') && (cur <= 'F')) 577 val = val * 16 + (cur - 'A') + 10; 578 else { 579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 581 ctxt->sax->error(ctxt->userData, 582 "xmlParseStringCharRef: invalid hexadecimal value\n"); 583 ctxt->wellFormed = 0; 584 ctxt->disableSAX = 1; 585 val = 0; 586 break; 587 } 588 ptr++; 589 cur = *ptr; 590 } 591 if (cur == ';') 592 ptr++; 593 } else if ((cur == '&') && (ptr[1] == '#')){ 594 ptr += 2; 595 cur = *ptr; 596 while (cur != ';') { /* Non input consuming loops */ 597 if ((cur >= '0') && (cur <= '9')) 598 val = val * 10 + (cur - '0'); 599 else { 600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 602 ctxt->sax->error(ctxt->userData, 603 "xmlParseStringCharRef: invalid decimal value\n"); 604 ctxt->wellFormed = 0; 605 ctxt->disableSAX = 1; 606 val = 0; 607 break; 608 } 609 ptr++; 610 cur = *ptr; 611 } 612 if (cur == ';') 613 ptr++; 614 } else { 615 ctxt->errNo = XML_ERR_INVALID_CHARREF; 616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 617 ctxt->sax->error(ctxt->userData, 618 "xmlParseCharRef: invalid value\n"); 619 ctxt->wellFormed = 0; 620 ctxt->disableSAX = 1; 621 return(0); 622 } 623 *str = ptr; 624 625 /* 626 * [ WFC: Legal Character ] 627 * Characters referred to using character references must match the 628 * production for Char. 629 */ 630 if (IS_CHAR(val)) { 631 return(val); 632 } else { 633 ctxt->errNo = XML_ERR_INVALID_CHAR; 634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 635 ctxt->sax->error(ctxt->userData, 636 "CharRef: invalid xmlChar value %d\n", val); 637 ctxt->wellFormed = 0; 638 ctxt->disableSAX = 1; 639 } 640 return(0); 641} 642 643/** 644 * xmlParserHandlePEReference: 645 * @ctxt: the parser context 646 * 647 * [69] PEReference ::= '%' Name ';' 648 * 649 * [ WFC: No Recursion ] 650 * A parsed entity must not contain a recursive 651 * reference to itself, either directly or indirectly. 652 * 653 * [ WFC: Entity Declared ] 654 * In a document without any DTD, a document with only an internal DTD 655 * subset which contains no parameter entity references, or a document 656 * with "standalone='yes'", ... ... The declaration of a parameter 657 * entity must precede any reference to it... 658 * 659 * [ VC: Entity Declared ] 660 * In a document with an external subset or external parameter entities 661 * with "standalone='no'", ... ... The declaration of a parameter entity 662 * must precede any reference to it... 663 * 664 * [ WFC: In DTD ] 665 * Parameter-entity references may only appear in the DTD. 666 * NOTE: misleading but this is handled. 667 * 668 * A PEReference may have been detected in the current input stream 669 * the handling is done accordingly to 670 * http://www.w3.org/TR/REC-xml#entproc 671 * i.e. 672 * - Included in literal in entity values 673 * - Included as Paraemeter Entity reference within DTDs 674 */ 675void 676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 677 xmlChar *name; 678 xmlEntityPtr entity = NULL; 679 xmlParserInputPtr input; 680 681 if (ctxt->token != 0) { 682 return; 683 } 684 if (RAW != '%') return; 685 switch(ctxt->instate) { 686 case XML_PARSER_CDATA_SECTION: 687 return; 688 case XML_PARSER_COMMENT: 689 return; 690 case XML_PARSER_START_TAG: 691 return; 692 case XML_PARSER_END_TAG: 693 return; 694 case XML_PARSER_EOF: 695 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 698 ctxt->wellFormed = 0; 699 ctxt->disableSAX = 1; 700 return; 701 case XML_PARSER_PROLOG: 702 case XML_PARSER_START: 703 case XML_PARSER_MISC: 704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 707 ctxt->wellFormed = 0; 708 ctxt->disableSAX = 1; 709 return; 710 case XML_PARSER_ENTITY_DECL: 711 case XML_PARSER_CONTENT: 712 case XML_PARSER_ATTRIBUTE_VALUE: 713 case XML_PARSER_PI: 714 case XML_PARSER_SYSTEM_LITERAL: 715 /* we just ignore it there */ 716 return; 717 case XML_PARSER_EPILOG: 718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 721 ctxt->wellFormed = 0; 722 ctxt->disableSAX = 1; 723 return; 724 case XML_PARSER_ENTITY_VALUE: 725 /* 726 * NOTE: in the case of entity values, we don't do the 727 * substitution here since we need the literal 728 * entity value to be able to save the internal 729 * subset of the document. 730 * This will be handled by xmlStringDecodeEntities 731 */ 732 return; 733 case XML_PARSER_DTD: 734 /* 735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 736 * In the internal DTD subset, parameter-entity references 737 * can occur only where markup declarations can occur, not 738 * within markup declarations. 739 * In that case this is handled in xmlParseMarkupDecl 740 */ 741 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 742 return; 743 break; 744 case XML_PARSER_IGNORE: 745 return; 746 } 747 748 NEXT; 749 name = xmlParseName(ctxt); 750 if (xmlParserDebugEntities) 751 xmlGenericError(xmlGenericErrorContext, 752 "PE Reference: %s\n", name); 753 if (name == NULL) { 754 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 757 ctxt->wellFormed = 0; 758 ctxt->disableSAX = 1; 759 } else { 760 if (RAW == ';') { 761 NEXT; 762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 764 if (entity == NULL) { 765 766 /* 767 * [ WFC: Entity Declared ] 768 * In a document without any DTD, a document with only an 769 * internal DTD subset which contains no parameter entity 770 * references, or a document with "standalone='yes'", ... 771 * ... The declaration of a parameter entity must precede 772 * any reference to it... 773 */ 774 if ((ctxt->standalone == 1) || 775 ((ctxt->hasExternalSubset == 0) && 776 (ctxt->hasPErefs == 0))) { 777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 778 ctxt->sax->error(ctxt->userData, 779 "PEReference: %%%s; not found\n", name); 780 ctxt->wellFormed = 0; 781 ctxt->disableSAX = 1; 782 } else { 783 /* 784 * [ VC: Entity Declared ] 785 * In a document with an external subset or external 786 * parameter entities with "standalone='no'", ... 787 * ... The declaration of a parameter entity must precede 788 * any reference to it... 789 */ 790 if ((!ctxt->disableSAX) && 791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 792 ctxt->vctxt.error(ctxt->vctxt.userData, 793 "PEReference: %%%s; not found\n", name); 794 } else if ((!ctxt->disableSAX) && 795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 796 ctxt->sax->warning(ctxt->userData, 797 "PEReference: %%%s; not found\n", name); 798 ctxt->valid = 0; 799 } 800 } else { 801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 803 xmlChar start[4]; 804 xmlCharEncoding enc; 805 806 /* 807 * handle the extra spaces added before and after 808 * c.f. http://www.w3.org/TR/REC-xml#as-PE 809 * this is done independantly. 810 */ 811 input = xmlNewEntityInputStream(ctxt, entity); 812 xmlPushInput(ctxt, input); 813 814 /* 815 * Get the 4 first bytes and decode the charset 816 * if enc != XML_CHAR_ENCODING_NONE 817 * plug some encoding conversion routines. 818 */ 819 GROW 820 start[0] = RAW; 821 start[1] = NXT(1); 822 start[2] = NXT(2); 823 start[3] = NXT(3); 824 enc = xmlDetectCharEncoding(start, 4); 825 if (enc != XML_CHAR_ENCODING_NONE) { 826 xmlSwitchEncoding(ctxt, enc); 827 } 828 829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 830 (RAW == '<') && (NXT(1) == '?') && 831 (NXT(2) == 'x') && (NXT(3) == 'm') && 832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 833 xmlParseTextDecl(ctxt); 834 } 835 if (ctxt->token == 0) 836 ctxt->token = ' '; 837 } else { 838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 839 ctxt->sax->error(ctxt->userData, 840 "xmlHandlePEReference: %s is not a parameter entity\n", 841 name); 842 ctxt->wellFormed = 0; 843 ctxt->disableSAX = 1; 844 } 845 } 846 } else { 847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 849 ctxt->sax->error(ctxt->userData, 850 "xmlHandlePEReference: expecting ';'\n"); 851 ctxt->wellFormed = 0; 852 ctxt->disableSAX = 1; 853 } 854 xmlFree(name); 855 } 856} 857 858/* 859 * Macro used to grow the current buffer. 860 */ 861#define growBuffer(buffer) { \ 862 buffer##_size *= 2; \ 863 buffer = (xmlChar *) \ 864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 865 if (buffer == NULL) { \ 866 perror("realloc failed"); \ 867 return(NULL); \ 868 } \ 869} 870 871/** 872 * xmlStringDecodeEntities: 873 * @ctxt: the parser context 874 * @str: the input string 875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 876 * @end: an end marker xmlChar, 0 if none 877 * @end2: an end marker xmlChar, 0 if none 878 * @end3: an end marker xmlChar, 0 if none 879 * 880 * Takes a entity string content and process to do the adequate subtitutions. 881 * 882 * [67] Reference ::= EntityRef | CharRef 883 * 884 * [69] PEReference ::= '%' Name ';' 885 * 886 * Returns A newly allocated string with the substitution done. The caller 887 * must deallocate it ! 888 */ 889xmlChar * 890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 891 xmlChar end, xmlChar end2, xmlChar end3) { 892 xmlChar *buffer = NULL; 893 int buffer_size = 0; 894 895 xmlChar *current = NULL; 896 xmlEntityPtr ent; 897 int c,l; 898 int nbchars = 0; 899 900 if (str == NULL) 901 return(NULL); 902 903 if (ctxt->depth > 40) { 904 ctxt->errNo = XML_ERR_ENTITY_LOOP; 905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 906 ctxt->sax->error(ctxt->userData, 907 "Detected entity reference loop\n"); 908 ctxt->wellFormed = 0; 909 ctxt->disableSAX = 1; 910 return(NULL); 911 } 912 913 /* 914 * allocate a translation buffer. 915 */ 916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 918 if (buffer == NULL) { 919 perror("xmlDecodeEntities: malloc failed"); 920 return(NULL); 921 } 922 923 /* 924 * Ok loop until we reach one of the ending char or a size limit. 925 * we are operating on already parsed values. 926 */ 927 c = CUR_SCHAR(str, l); 928 while ((c != 0) && (c != end) && /* non input consuming loop */ 929 (c != end2) && (c != end3)) { 930 931 if (c == 0) break; 932 if ((c == '&') && (str[1] == '#')) { 933 int val = xmlParseStringCharRef(ctxt, &str); 934 if (val != 0) { 935 COPY_BUF(0,buffer,nbchars,val); 936 } 937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 938 if (xmlParserDebugEntities) 939 xmlGenericError(xmlGenericErrorContext, 940 "String decoding Entity Reference: %.30s\n", 941 str); 942 ent = xmlParseStringEntityRef(ctxt, &str); 943 if ((ent != NULL) && 944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 945 if (ent->content != NULL) { 946 COPY_BUF(0,buffer,nbchars,ent->content[0]); 947 } else { 948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 949 ctxt->sax->error(ctxt->userData, 950 "internal error entity has no content\n"); 951 } 952 } else if ((ent != NULL) && (ent->content != NULL)) { 953 xmlChar *rep; 954 955 ctxt->depth++; 956 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 957 0, 0, 0); 958 ctxt->depth--; 959 if (rep != NULL) { 960 current = rep; 961 while (*current != 0) { /* non input consuming loop */ 962 buffer[nbchars++] = *current++; 963 if (nbchars > 964 buffer_size - XML_PARSER_BUFFER_SIZE) { 965 growBuffer(buffer); 966 } 967 } 968 xmlFree(rep); 969 } 970 } else if (ent != NULL) { 971 int i = xmlStrlen(ent->name); 972 const xmlChar *cur = ent->name; 973 974 buffer[nbchars++] = '&'; 975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 976 growBuffer(buffer); 977 } 978 for (;i > 0;i--) 979 buffer[nbchars++] = *cur++; 980 buffer[nbchars++] = ';'; 981 } 982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 983 if (xmlParserDebugEntities) 984 xmlGenericError(xmlGenericErrorContext, 985 "String decoding PE Reference: %.30s\n", str); 986 ent = xmlParseStringPEReference(ctxt, &str); 987 if (ent != NULL) { 988 xmlChar *rep; 989 990 ctxt->depth++; 991 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 992 0, 0, 0); 993 ctxt->depth--; 994 if (rep != NULL) { 995 current = rep; 996 while (*current != 0) { /* non input consuming loop */ 997 buffer[nbchars++] = *current++; 998 if (nbchars > 999 buffer_size - XML_PARSER_BUFFER_SIZE) { 1000 growBuffer(buffer); 1001 } 1002 } 1003 xmlFree(rep); 1004 } 1005 } 1006 } else { 1007 COPY_BUF(l,buffer,nbchars,c); 1008 str += l; 1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1010 growBuffer(buffer); 1011 } 1012 } 1013 c = CUR_SCHAR(str, l); 1014 } 1015 buffer[nbchars++] = 0; 1016 return(buffer); 1017} 1018 1019 1020/************************************************************************ 1021 * * 1022 * Commodity functions to handle xmlChars * 1023 * * 1024 ************************************************************************/ 1025 1026/** 1027 * xmlStrndup: 1028 * @cur: the input xmlChar * 1029 * @len: the len of @cur 1030 * 1031 * a strndup for array of xmlChar's 1032 * 1033 * Returns a new xmlChar * or NULL 1034 */ 1035xmlChar * 1036xmlStrndup(const xmlChar *cur, int len) { 1037 xmlChar *ret; 1038 1039 if ((cur == NULL) || (len < 0)) return(NULL); 1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1041 if (ret == NULL) { 1042 xmlGenericError(xmlGenericErrorContext, 1043 "malloc of %ld byte failed\n", 1044 (len + 1) * (long)sizeof(xmlChar)); 1045 return(NULL); 1046 } 1047 memcpy(ret, cur, len * sizeof(xmlChar)); 1048 ret[len] = 0; 1049 return(ret); 1050} 1051 1052/** 1053 * xmlStrdup: 1054 * @cur: the input xmlChar * 1055 * 1056 * a strdup for array of xmlChar's. Since they are supposed to be 1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1058 * a termination mark of '0'. 1059 * 1060 * Returns a new xmlChar * or NULL 1061 */ 1062xmlChar * 1063xmlStrdup(const xmlChar *cur) { 1064 const xmlChar *p = cur; 1065 1066 if (cur == NULL) return(NULL); 1067 while (*p != 0) p++; /* non input consuming */ 1068 return(xmlStrndup(cur, p - cur)); 1069} 1070 1071/** 1072 * xmlCharStrndup: 1073 * @cur: the input char * 1074 * @len: the len of @cur 1075 * 1076 * a strndup for char's to xmlChar's 1077 * 1078 * Returns a new xmlChar * or NULL 1079 */ 1080 1081xmlChar * 1082xmlCharStrndup(const char *cur, int len) { 1083 int i; 1084 xmlChar *ret; 1085 1086 if ((cur == NULL) || (len < 0)) return(NULL); 1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1088 if (ret == NULL) { 1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1090 (len + 1) * (long)sizeof(xmlChar)); 1091 return(NULL); 1092 } 1093 for (i = 0;i < len;i++) 1094 ret[i] = (xmlChar) cur[i]; 1095 ret[len] = 0; 1096 return(ret); 1097} 1098 1099/** 1100 * xmlCharStrdup: 1101 * @cur: the input char * 1102 * @len: the len of @cur 1103 * 1104 * a strdup for char's to xmlChar's 1105 * 1106 * Returns a new xmlChar * or NULL 1107 */ 1108 1109xmlChar * 1110xmlCharStrdup(const char *cur) { 1111 const char *p = cur; 1112 1113 if (cur == NULL) return(NULL); 1114 while (*p != '\0') p++; /* non input consuming */ 1115 return(xmlCharStrndup(cur, p - cur)); 1116} 1117 1118/** 1119 * xmlStrcmp: 1120 * @str1: the first xmlChar * 1121 * @str2: the second xmlChar * 1122 * 1123 * a strcmp for xmlChar's 1124 * 1125 * Returns the integer result of the comparison 1126 */ 1127 1128int 1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1130 register int tmp; 1131 1132 if (str1 == str2) return(0); 1133 if (str1 == NULL) return(-1); 1134 if (str2 == NULL) return(1); 1135 do { 1136 tmp = *str1++ - *str2; 1137 if (tmp != 0) return(tmp); 1138 } while (*str2++ != 0); 1139 return 0; 1140} 1141 1142/** 1143 * xmlStrEqual: 1144 * @str1: the first xmlChar * 1145 * @str2: the second xmlChar * 1146 * 1147 * Check if both string are equal of have same content 1148 * Should be a bit more readable and faster than xmlStrEqual() 1149 * 1150 * Returns 1 if they are equal, 0 if they are different 1151 */ 1152 1153int 1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1155 if (str1 == str2) return(1); 1156 if (str1 == NULL) return(0); 1157 if (str2 == NULL) return(0); 1158 do { 1159 if (*str1++ != *str2) return(0); 1160 } while (*str2++); 1161 return(1); 1162} 1163 1164/** 1165 * xmlStrncmp: 1166 * @str1: the first xmlChar * 1167 * @str2: the second xmlChar * 1168 * @len: the max comparison length 1169 * 1170 * a strncmp for xmlChar's 1171 * 1172 * Returns the integer result of the comparison 1173 */ 1174 1175int 1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1177 register int tmp; 1178 1179 if (len <= 0) return(0); 1180 if (str1 == str2) return(0); 1181 if (str1 == NULL) return(-1); 1182 if (str2 == NULL) return(1); 1183 do { 1184 tmp = *str1++ - *str2; 1185 if (tmp != 0 || --len == 0) return(tmp); 1186 } while (*str2++ != 0); 1187 return 0; 1188} 1189 1190static xmlChar casemap[256] = { 1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1223}; 1224 1225/** 1226 * xmlStrcasecmp: 1227 * @str1: the first xmlChar * 1228 * @str2: the second xmlChar * 1229 * 1230 * a strcasecmp for xmlChar's 1231 * 1232 * Returns the integer result of the comparison 1233 */ 1234 1235int 1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1237 register int tmp; 1238 1239 if (str1 == str2) return(0); 1240 if (str1 == NULL) return(-1); 1241 if (str2 == NULL) return(1); 1242 do { 1243 tmp = casemap[*str1++] - casemap[*str2]; 1244 if (tmp != 0) return(tmp); 1245 } while (*str2++ != 0); 1246 return 0; 1247} 1248 1249/** 1250 * xmlStrncasecmp: 1251 * @str1: the first xmlChar * 1252 * @str2: the second xmlChar * 1253 * @len: the max comparison length 1254 * 1255 * a strncasecmp for xmlChar's 1256 * 1257 * Returns the integer result of the comparison 1258 */ 1259 1260int 1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1262 register int tmp; 1263 1264 if (len <= 0) return(0); 1265 if (str1 == str2) return(0); 1266 if (str1 == NULL) return(-1); 1267 if (str2 == NULL) return(1); 1268 do { 1269 tmp = casemap[*str1++] - casemap[*str2]; 1270 if (tmp != 0 || --len == 0) return(tmp); 1271 } while (*str2++ != 0); 1272 return 0; 1273} 1274 1275/** 1276 * xmlStrchr: 1277 * @str: the xmlChar * array 1278 * @val: the xmlChar to search 1279 * 1280 * a strchr for xmlChar's 1281 * 1282 * Returns the xmlChar * for the first occurence or NULL. 1283 */ 1284 1285const xmlChar * 1286xmlStrchr(const xmlChar *str, xmlChar val) { 1287 if (str == NULL) return(NULL); 1288 while (*str != 0) { /* non input consuming */ 1289 if (*str == val) return((xmlChar *) str); 1290 str++; 1291 } 1292 return(NULL); 1293} 1294 1295/** 1296 * xmlStrstr: 1297 * @str: the xmlChar * array (haystack) 1298 * @val: the xmlChar to search (needle) 1299 * 1300 * a strstr for xmlChar's 1301 * 1302 * Returns the xmlChar * for the first occurence or NULL. 1303 */ 1304 1305const xmlChar * 1306xmlStrstr(const xmlChar *str, const xmlChar *val) { 1307 int n; 1308 1309 if (str == NULL) return(NULL); 1310 if (val == NULL) return(NULL); 1311 n = xmlStrlen(val); 1312 1313 if (n == 0) return(str); 1314 while (*str != 0) { /* non input consuming */ 1315 if (*str == *val) { 1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1317 } 1318 str++; 1319 } 1320 return(NULL); 1321} 1322 1323/** 1324 * xmlStrcasestr: 1325 * @str: the xmlChar * array (haystack) 1326 * @val: the xmlChar to search (needle) 1327 * 1328 * a case-ignoring strstr for xmlChar's 1329 * 1330 * Returns the xmlChar * for the first occurence or NULL. 1331 */ 1332 1333const xmlChar * 1334xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1335 int n; 1336 1337 if (str == NULL) return(NULL); 1338 if (val == NULL) return(NULL); 1339 n = xmlStrlen(val); 1340 1341 if (n == 0) return(str); 1342 while (*str != 0) { /* non input consuming */ 1343 if (casemap[*str] == casemap[*val]) 1344 if (!xmlStrncasecmp(str, val, n)) return(str); 1345 str++; 1346 } 1347 return(NULL); 1348} 1349 1350/** 1351 * xmlStrsub: 1352 * @str: the xmlChar * array (haystack) 1353 * @start: the index of the first char (zero based) 1354 * @len: the length of the substring 1355 * 1356 * Extract a substring of a given string 1357 * 1358 * Returns the xmlChar * for the first occurence or NULL. 1359 */ 1360 1361xmlChar * 1362xmlStrsub(const xmlChar *str, int start, int len) { 1363 int i; 1364 1365 if (str == NULL) return(NULL); 1366 if (start < 0) return(NULL); 1367 if (len < 0) return(NULL); 1368 1369 for (i = 0;i < start;i++) { 1370 if (*str == 0) return(NULL); 1371 str++; 1372 } 1373 if (*str == 0) return(NULL); 1374 return(xmlStrndup(str, len)); 1375} 1376 1377/** 1378 * xmlStrlen: 1379 * @str: the xmlChar * array 1380 * 1381 * length of a xmlChar's string 1382 * 1383 * Returns the number of xmlChar contained in the ARRAY. 1384 */ 1385 1386int 1387xmlStrlen(const xmlChar *str) { 1388 int len = 0; 1389 1390 if (str == NULL) return(0); 1391 while (*str != 0) { /* non input consuming */ 1392 str++; 1393 len++; 1394 } 1395 return(len); 1396} 1397 1398/** 1399 * xmlStrncat: 1400 * @cur: the original xmlChar * array 1401 * @add: the xmlChar * array added 1402 * @len: the length of @add 1403 * 1404 * a strncat for array of xmlChar's, it will extend cur with the len 1405 * first bytes of @add. 1406 * 1407 * Returns a new xmlChar *, the original @cur is reallocated if needed 1408 * and should not be freed 1409 */ 1410 1411xmlChar * 1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1413 int size; 1414 xmlChar *ret; 1415 1416 if ((add == NULL) || (len == 0)) 1417 return(cur); 1418 if (cur == NULL) 1419 return(xmlStrndup(add, len)); 1420 1421 size = xmlStrlen(cur); 1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1423 if (ret == NULL) { 1424 xmlGenericError(xmlGenericErrorContext, 1425 "xmlStrncat: realloc of %ld byte failed\n", 1426 (size + len + 1) * (long)sizeof(xmlChar)); 1427 return(cur); 1428 } 1429 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1430 ret[size + len] = 0; 1431 return(ret); 1432} 1433 1434/** 1435 * xmlStrcat: 1436 * @cur: the original xmlChar * array 1437 * @add: the xmlChar * array added 1438 * 1439 * a strcat for array of xmlChar's. Since they are supposed to be 1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1441 * a termination mark of '0'. 1442 * 1443 * Returns a new xmlChar * containing the concatenated string. 1444 */ 1445xmlChar * 1446xmlStrcat(xmlChar *cur, const xmlChar *add) { 1447 const xmlChar *p = add; 1448 1449 if (add == NULL) return(cur); 1450 if (cur == NULL) 1451 return(xmlStrdup(add)); 1452 1453 while (*p != 0) p++; /* non input consuming */ 1454 return(xmlStrncat(cur, add, p - add)); 1455} 1456 1457/************************************************************************ 1458 * * 1459 * Commodity functions, cleanup needed ? * 1460 * * 1461 ************************************************************************/ 1462 1463/** 1464 * areBlanks: 1465 * @ctxt: an XML parser context 1466 * @str: a xmlChar * 1467 * @len: the size of @str 1468 * 1469 * Is this a sequence of blank chars that one can ignore ? 1470 * 1471 * Returns 1 if ignorable 0 otherwise. 1472 */ 1473 1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1475 int i, ret; 1476 xmlNodePtr lastChild; 1477 1478 /* 1479 * Don't spend time trying to differentiate them, the same callback is 1480 * used ! 1481 */ 1482 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 1483 return(0); 1484 1485 /* 1486 * Check for xml:space value. 1487 */ 1488 if (*(ctxt->space) == 1) 1489 return(0); 1490 1491 /* 1492 * Check that the string is made of blanks 1493 */ 1494 for (i = 0;i < len;i++) 1495 if (!(IS_BLANK(str[i]))) return(0); 1496 1497 /* 1498 * Look if the element is mixed content in the Dtd if available 1499 */ 1500 if (ctxt->node == NULL) return(0); 1501 if (ctxt->myDoc != NULL) { 1502 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1503 if (ret == 0) return(1); 1504 if (ret == 1) return(0); 1505 } 1506 1507 /* 1508 * Otherwise, heuristic :-\ 1509 */ 1510 if (RAW != '<') return(0); 1511 if ((ctxt->node->children == NULL) && 1512 (RAW == '<') && (NXT(1) == '/')) return(0); 1513 1514 lastChild = xmlGetLastChild(ctxt->node); 1515 if (lastChild == NULL) { 1516 if ((ctxt->node->type != XML_ELEMENT_NODE) && 1517 (ctxt->node->content != NULL)) return(0); 1518 } else if (xmlNodeIsText(lastChild)) 1519 return(0); 1520 else if ((ctxt->node->children != NULL) && 1521 (xmlNodeIsText(ctxt->node->children))) 1522 return(0); 1523 return(1); 1524} 1525 1526/* 1527 * Forward definition for recusive behaviour. 1528 */ 1529void xmlParsePEReference(xmlParserCtxtPtr ctxt); 1530void xmlParseReference(xmlParserCtxtPtr ctxt); 1531 1532/************************************************************************ 1533 * * 1534 * Extra stuff for namespace support * 1535 * Relates to http://www.w3.org/TR/WD-xml-names * 1536 * * 1537 ************************************************************************/ 1538 1539/** 1540 * xmlSplitQName: 1541 * @ctxt: an XML parser context 1542 * @name: an XML parser context 1543 * @prefix: a xmlChar ** 1544 * 1545 * parse an UTF8 encoded XML qualified name string 1546 * 1547 * [NS 5] QName ::= (Prefix ':')? LocalPart 1548 * 1549 * [NS 6] Prefix ::= NCName 1550 * 1551 * [NS 7] LocalPart ::= NCName 1552 * 1553 * Returns the local part, and prefix is updated 1554 * to get the Prefix if any. 1555 */ 1556 1557xmlChar * 1558xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1559 xmlChar buf[XML_MAX_NAMELEN + 5]; 1560 xmlChar *buffer = NULL; 1561 int len = 0; 1562 int max = XML_MAX_NAMELEN; 1563 xmlChar *ret = NULL; 1564 const xmlChar *cur = name; 1565 int c; 1566 1567 *prefix = NULL; 1568 1569#ifndef XML_XML_NAMESPACE 1570 /* xml: prefix is not really a namespace */ 1571 if ((cur[0] == 'x') && (cur[1] == 'm') && 1572 (cur[2] == 'l') && (cur[3] == ':')) 1573 return(xmlStrdup(name)); 1574#endif 1575 1576 /* nasty but valid */ 1577 if (cur[0] == ':') 1578 return(xmlStrdup(name)); 1579 1580 c = *cur++; 1581 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1582 buf[len++] = c; 1583 c = *cur++; 1584 } 1585 if (len >= max) { 1586 /* 1587 * Okay someone managed to make a huge name, so he's ready to pay 1588 * for the processing speed. 1589 */ 1590 max = len * 2; 1591 1592 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1593 if (buffer == NULL) { 1594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1595 ctxt->sax->error(ctxt->userData, 1596 "xmlSplitQName: out of memory\n"); 1597 return(NULL); 1598 } 1599 memcpy(buffer, buf, len); 1600 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1601 if (len + 10 > max) { 1602 max *= 2; 1603 buffer = (xmlChar *) xmlRealloc(buffer, 1604 max * sizeof(xmlChar)); 1605 if (buffer == NULL) { 1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1607 ctxt->sax->error(ctxt->userData, 1608 "xmlSplitQName: out of memory\n"); 1609 return(NULL); 1610 } 1611 } 1612 buffer[len++] = c; 1613 c = *cur++; 1614 } 1615 buffer[len] = 0; 1616 } 1617 1618 if (buffer == NULL) 1619 ret = xmlStrndup(buf, len); 1620 else { 1621 ret = buffer; 1622 buffer = NULL; 1623 max = XML_MAX_NAMELEN; 1624 } 1625 1626 1627 if (c == ':') { 1628 c = *cur++; 1629 if (c == 0) return(ret); 1630 *prefix = ret; 1631 len = 0; 1632 1633 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1634 buf[len++] = c; 1635 c = *cur++; 1636 } 1637 if (len >= max) { 1638 /* 1639 * Okay someone managed to make a huge name, so he's ready to pay 1640 * for the processing speed. 1641 */ 1642 max = len * 2; 1643 1644 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1645 if (buffer == NULL) { 1646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1647 ctxt->sax->error(ctxt->userData, 1648 "xmlSplitQName: out of memory\n"); 1649 return(NULL); 1650 } 1651 memcpy(buffer, buf, len); 1652 while (c != 0) { /* tested bigname2.xml */ 1653 if (len + 10 > max) { 1654 max *= 2; 1655 buffer = (xmlChar *) xmlRealloc(buffer, 1656 max * sizeof(xmlChar)); 1657 if (buffer == NULL) { 1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1659 ctxt->sax->error(ctxt->userData, 1660 "xmlSplitQName: out of memory\n"); 1661 return(NULL); 1662 } 1663 } 1664 buffer[len++] = c; 1665 c = *cur++; 1666 } 1667 buffer[len] = 0; 1668 } 1669 1670 if (buffer == NULL) 1671 ret = xmlStrndup(buf, len); 1672 else { 1673 ret = buffer; 1674 } 1675 } 1676 1677 return(ret); 1678} 1679 1680/************************************************************************ 1681 * * 1682 * The parser itself * 1683 * Relates to http://www.w3.org/TR/REC-xml * 1684 * * 1685 ************************************************************************/ 1686 1687static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1688/** 1689 * xmlParseName: 1690 * @ctxt: an XML parser context 1691 * 1692 * parse an XML name. 1693 * 1694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1695 * CombiningChar | Extender 1696 * 1697 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1698 * 1699 * [6] Names ::= Name (S Name)* 1700 * 1701 * Returns the Name parsed or NULL 1702 */ 1703 1704xmlChar * 1705xmlParseName(xmlParserCtxtPtr ctxt) { 1706 const xmlChar *in; 1707 xmlChar *ret; 1708 int count = 0; 1709 1710 GROW; 1711 1712 /* 1713 * Accelerator for simple ASCII names 1714 */ 1715 in = ctxt->input->cur; 1716 if (((*in >= 0x61) && (*in <= 0x7A)) || 1717 ((*in >= 0x41) && (*in <= 0x5A)) || 1718 (*in == '_') || (*in == ':')) { 1719 in++; 1720 while (((*in >= 0x61) && (*in <= 0x7A)) || 1721 ((*in >= 0x41) && (*in <= 0x5A)) || 1722 ((*in >= 0x30) && (*in <= 0x39)) || 1723 (*in == '_') || (*in == '-') || 1724 (*in == ':') || (*in == '.')) 1725 in++; 1726 if ((*in > 0) && (*in < 0x80)) { 1727 count = in - ctxt->input->cur; 1728 ret = xmlStrndup(ctxt->input->cur, count); 1729 ctxt->input->cur = in; 1730 return(ret); 1731 } 1732 } 1733 return(xmlParseNameComplex(ctxt)); 1734} 1735 1736static xmlChar * 1737xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1738 xmlChar buf[XML_MAX_NAMELEN + 5]; 1739 int len = 0, l; 1740 int c; 1741 int count = 0; 1742 1743 /* 1744 * Handler for more complex cases 1745 */ 1746 GROW; 1747 c = CUR_CHAR(l); 1748 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1749 (!IS_LETTER(c) && (c != '_') && 1750 (c != ':'))) { 1751 return(NULL); 1752 } 1753 1754 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1755 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1756 (c == '.') || (c == '-') || 1757 (c == '_') || (c == ':') || 1758 (IS_COMBINING(c)) || 1759 (IS_EXTENDER(c)))) { 1760 if (count++ > 100) { 1761 count = 0; 1762 GROW; 1763 } 1764 COPY_BUF(l,buf,len,c); 1765 NEXTL(l); 1766 c = CUR_CHAR(l); 1767 if (len >= XML_MAX_NAMELEN) { 1768 /* 1769 * Okay someone managed to make a huge name, so he's ready to pay 1770 * for the processing speed. 1771 */ 1772 xmlChar *buffer; 1773 int max = len * 2; 1774 1775 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1776 if (buffer == NULL) { 1777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1778 ctxt->sax->error(ctxt->userData, 1779 "xmlParseNameComplex: out of memory\n"); 1780 return(NULL); 1781 } 1782 memcpy(buffer, buf, len); 1783 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1784 (c == '.') || (c == '-') || 1785 (c == '_') || (c == ':') || 1786 (IS_COMBINING(c)) || 1787 (IS_EXTENDER(c))) { 1788 if (count++ > 100) { 1789 count = 0; 1790 GROW; 1791 } 1792 if (len + 10 > max) { 1793 max *= 2; 1794 buffer = (xmlChar *) xmlRealloc(buffer, 1795 max * sizeof(xmlChar)); 1796 if (buffer == NULL) { 1797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1798 ctxt->sax->error(ctxt->userData, 1799 "xmlParseNameComplex: out of memory\n"); 1800 return(NULL); 1801 } 1802 } 1803 COPY_BUF(l,buffer,len,c); 1804 NEXTL(l); 1805 c = CUR_CHAR(l); 1806 } 1807 buffer[len] = 0; 1808 return(buffer); 1809 } 1810 } 1811 return(xmlStrndup(buf, len)); 1812} 1813 1814/** 1815 * xmlParseStringName: 1816 * @ctxt: an XML parser context 1817 * @str: a pointer to the string pointer (IN/OUT) 1818 * 1819 * parse an XML name. 1820 * 1821 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1822 * CombiningChar | Extender 1823 * 1824 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1825 * 1826 * [6] Names ::= Name (S Name)* 1827 * 1828 * Returns the Name parsed or NULL. The str pointer 1829 * is updated to the current location in the string. 1830 */ 1831 1832static xmlChar * 1833xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1834 xmlChar buf[XML_MAX_NAMELEN + 5]; 1835 const xmlChar *cur = *str; 1836 int len = 0, l; 1837 int c; 1838 1839 c = CUR_SCHAR(cur, l); 1840 if (!IS_LETTER(c) && (c != '_') && 1841 (c != ':')) { 1842 return(NULL); 1843 } 1844 1845 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1846 (c == '.') || (c == '-') || 1847 (c == '_') || (c == ':') || 1848 (IS_COMBINING(c)) || 1849 (IS_EXTENDER(c))) { 1850 COPY_BUF(l,buf,len,c); 1851 cur += l; 1852 c = CUR_SCHAR(cur, l); 1853 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1854 /* 1855 * Okay someone managed to make a huge name, so he's ready to pay 1856 * for the processing speed. 1857 */ 1858 xmlChar *buffer; 1859 int max = len * 2; 1860 1861 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1862 if (buffer == NULL) { 1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1864 ctxt->sax->error(ctxt->userData, 1865 "xmlParseStringName: out of memory\n"); 1866 return(NULL); 1867 } 1868 memcpy(buffer, buf, len); 1869 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1870 (c == '.') || (c == '-') || 1871 (c == '_') || (c == ':') || 1872 (IS_COMBINING(c)) || 1873 (IS_EXTENDER(c))) { 1874 if (len + 10 > max) { 1875 max *= 2; 1876 buffer = (xmlChar *) xmlRealloc(buffer, 1877 max * sizeof(xmlChar)); 1878 if (buffer == NULL) { 1879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1880 ctxt->sax->error(ctxt->userData, 1881 "xmlParseStringName: out of memory\n"); 1882 return(NULL); 1883 } 1884 } 1885 COPY_BUF(l,buffer,len,c); 1886 cur += l; 1887 c = CUR_SCHAR(cur, l); 1888 } 1889 buffer[len] = 0; 1890 *str = cur; 1891 return(buffer); 1892 } 1893 } 1894 *str = cur; 1895 return(xmlStrndup(buf, len)); 1896} 1897 1898/** 1899 * xmlParseNmtoken: 1900 * @ctxt: an XML parser context 1901 * 1902 * parse an XML Nmtoken. 1903 * 1904 * [7] Nmtoken ::= (NameChar)+ 1905 * 1906 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1907 * 1908 * Returns the Nmtoken parsed or NULL 1909 */ 1910 1911xmlChar * 1912xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1913 xmlChar buf[XML_MAX_NAMELEN + 5]; 1914 int len = 0, l; 1915 int c; 1916 int count = 0; 1917 1918 GROW; 1919 c = CUR_CHAR(l); 1920 1921 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1922 (c == '.') || (c == '-') || 1923 (c == '_') || (c == ':') || 1924 (IS_COMBINING(c)) || 1925 (IS_EXTENDER(c))) { 1926 if (count++ > 100) { 1927 count = 0; 1928 GROW; 1929 } 1930 COPY_BUF(l,buf,len,c); 1931 NEXTL(l); 1932 c = CUR_CHAR(l); 1933 if (len >= XML_MAX_NAMELEN) { 1934 /* 1935 * Okay someone managed to make a huge token, so he's ready to pay 1936 * for the processing speed. 1937 */ 1938 xmlChar *buffer; 1939 int max = len * 2; 1940 1941 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1942 if (buffer == NULL) { 1943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1944 ctxt->sax->error(ctxt->userData, 1945 "xmlParseNmtoken: out of memory\n"); 1946 return(NULL); 1947 } 1948 memcpy(buffer, buf, len); 1949 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1950 (c == '.') || (c == '-') || 1951 (c == '_') || (c == ':') || 1952 (IS_COMBINING(c)) || 1953 (IS_EXTENDER(c))) { 1954 if (count++ > 100) { 1955 count = 0; 1956 GROW; 1957 } 1958 if (len + 10 > max) { 1959 max *= 2; 1960 buffer = (xmlChar *) xmlRealloc(buffer, 1961 max * sizeof(xmlChar)); 1962 if (buffer == NULL) { 1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1964 ctxt->sax->error(ctxt->userData, 1965 "xmlParseNameComplex: out of memory\n"); 1966 return(NULL); 1967 } 1968 } 1969 COPY_BUF(l,buffer,len,c); 1970 NEXTL(l); 1971 c = CUR_CHAR(l); 1972 } 1973 buffer[len] = 0; 1974 return(buffer); 1975 } 1976 } 1977 if (len == 0) 1978 return(NULL); 1979 return(xmlStrndup(buf, len)); 1980} 1981 1982/** 1983 * xmlParseEntityValue: 1984 * @ctxt: an XML parser context 1985 * @orig: if non-NULL store a copy of the original entity value 1986 * 1987 * parse a value for ENTITY declarations 1988 * 1989 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 1990 * "'" ([^%&'] | PEReference | Reference)* "'" 1991 * 1992 * Returns the EntityValue parsed with reference substitued or NULL 1993 */ 1994 1995xmlChar * 1996xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 1997 xmlChar *buf = NULL; 1998 int len = 0; 1999 int size = XML_PARSER_BUFFER_SIZE; 2000 int c, l; 2001 xmlChar stop; 2002 xmlChar *ret = NULL; 2003 const xmlChar *cur = NULL; 2004 xmlParserInputPtr input; 2005 2006 if (RAW == '"') stop = '"'; 2007 else if (RAW == '\'') stop = '\''; 2008 else { 2009 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2011 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2012 ctxt->wellFormed = 0; 2013 ctxt->disableSAX = 1; 2014 return(NULL); 2015 } 2016 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2017 if (buf == NULL) { 2018 xmlGenericError(xmlGenericErrorContext, 2019 "malloc of %d byte failed\n", size); 2020 return(NULL); 2021 } 2022 2023 /* 2024 * The content of the entity definition is copied in a buffer. 2025 */ 2026 2027 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2028 input = ctxt->input; 2029 GROW; 2030 NEXT; 2031 c = CUR_CHAR(l); 2032 /* 2033 * NOTE: 4.4.5 Included in Literal 2034 * When a parameter entity reference appears in a literal entity 2035 * value, ... a single or double quote character in the replacement 2036 * text is always treated as a normal data character and will not 2037 * terminate the literal. 2038 * In practice it means we stop the loop only when back at parsing 2039 * the initial entity and the quote is found 2040 */ 2041 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2042 (ctxt->input != input))) { 2043 if (len + 5 >= size) { 2044 size *= 2; 2045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2046 if (buf == NULL) { 2047 xmlGenericError(xmlGenericErrorContext, 2048 "realloc of %d byte failed\n", size); 2049 return(NULL); 2050 } 2051 } 2052 COPY_BUF(l,buf,len,c); 2053 NEXTL(l); 2054 /* 2055 * Pop-up of finished entities. 2056 */ 2057 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2058 xmlPopInput(ctxt); 2059 2060 GROW; 2061 c = CUR_CHAR(l); 2062 if (c == 0) { 2063 GROW; 2064 c = CUR_CHAR(l); 2065 } 2066 } 2067 buf[len] = 0; 2068 2069 /* 2070 * Raise problem w.r.t. '&' and '%' being used in non-entities 2071 * reference constructs. Note Charref will be handled in 2072 * xmlStringDecodeEntities() 2073 */ 2074 cur = buf; 2075 while (*cur != 0) { /* non input consuming */ 2076 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2077 xmlChar *name; 2078 xmlChar tmp = *cur; 2079 2080 cur++; 2081 name = xmlParseStringName(ctxt, &cur); 2082 if ((name == NULL) || (*cur != ';')) { 2083 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2085 ctxt->sax->error(ctxt->userData, 2086 "EntityValue: '%c' forbidden except for entities references\n", 2087 tmp); 2088 ctxt->wellFormed = 0; 2089 ctxt->disableSAX = 1; 2090 } 2091 if ((ctxt->inSubset == 1) && (tmp == '%')) { 2092 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2094 ctxt->sax->error(ctxt->userData, 2095 "EntityValue: PEReferences forbidden in internal subset\n", 2096 tmp); 2097 ctxt->wellFormed = 0; 2098 ctxt->disableSAX = 1; 2099 } 2100 if (name != NULL) 2101 xmlFree(name); 2102 } 2103 cur++; 2104 } 2105 2106 /* 2107 * Then PEReference entities are substituted. 2108 */ 2109 if (c != stop) { 2110 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2112 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2113 ctxt->wellFormed = 0; 2114 ctxt->disableSAX = 1; 2115 xmlFree(buf); 2116 } else { 2117 NEXT; 2118 /* 2119 * NOTE: 4.4.7 Bypassed 2120 * When a general entity reference appears in the EntityValue in 2121 * an entity declaration, it is bypassed and left as is. 2122 * so XML_SUBSTITUTE_REF is not set here. 2123 */ 2124 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2125 0, 0, 0); 2126 if (orig != NULL) 2127 *orig = buf; 2128 else 2129 xmlFree(buf); 2130 } 2131 2132 return(ret); 2133} 2134 2135/** 2136 * xmlParseAttValue: 2137 * @ctxt: an XML parser context 2138 * 2139 * parse a value for an attribute 2140 * Note: the parser won't do substitution of entities here, this 2141 * will be handled later in xmlStringGetNodeList 2142 * 2143 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2144 * "'" ([^<&'] | Reference)* "'" 2145 * 2146 * 3.3.3 Attribute-Value Normalization: 2147 * Before the value of an attribute is passed to the application or 2148 * checked for validity, the XML processor must normalize it as follows: 2149 * - a character reference is processed by appending the referenced 2150 * character to the attribute value 2151 * - an entity reference is processed by recursively processing the 2152 * replacement text of the entity 2153 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2154 * appending #x20 to the normalized value, except that only a single 2155 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2156 * parsed entity or the literal entity value of an internal parsed entity 2157 * - other characters are processed by appending them to the normalized value 2158 * If the declared value is not CDATA, then the XML processor must further 2159 * process the normalized attribute value by discarding any leading and 2160 * trailing space (#x20) characters, and by replacing sequences of space 2161 * (#x20) characters by a single space (#x20) character. 2162 * All attributes for which no declaration has been read should be treated 2163 * by a non-validating parser as if declared CDATA. 2164 * 2165 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2166 */ 2167 2168xmlChar * 2169xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2170 xmlChar limit = 0; 2171 xmlChar *buf = NULL; 2172 int len = 0; 2173 int buf_size = 0; 2174 int c, l; 2175 xmlChar *current = NULL; 2176 xmlEntityPtr ent; 2177 2178 2179 SHRINK; 2180 if (NXT(0) == '"') { 2181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2182 limit = '"'; 2183 NEXT; 2184 } else if (NXT(0) == '\'') { 2185 limit = '\''; 2186 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2187 NEXT; 2188 } else { 2189 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2191 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2192 ctxt->wellFormed = 0; 2193 ctxt->disableSAX = 1; 2194 return(NULL); 2195 } 2196 2197 /* 2198 * allocate a translation buffer. 2199 */ 2200 buf_size = XML_PARSER_BUFFER_SIZE; 2201 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2202 if (buf == NULL) { 2203 perror("xmlParseAttValue: malloc failed"); 2204 return(NULL); 2205 } 2206 2207 /* 2208 * Ok loop until we reach one of the ending char or a size limit. 2209 */ 2210 c = CUR_CHAR(l); 2211 while (((NXT(0) != limit) && /* checked */ 2212 (c != '<')) || (ctxt->token != 0)) { 2213 if (c == 0) break; 2214 if (ctxt->token == '&') { 2215 if (ctxt->replaceEntities) { 2216 if (len > buf_size - 10) { 2217 growBuffer(buf); 2218 } 2219 buf[len++] = '&'; 2220 } else { 2221 /* 2222 * The reparsing will be done in xmlStringGetNodeList() 2223 * called by the attribute() function in SAX.c 2224 */ 2225 static xmlChar buffer[6] = "&"; 2226 2227 if (len > buf_size - 10) { 2228 growBuffer(buf); 2229 } 2230 current = &buffer[0]; 2231 while (*current != 0) { /* non input consuming */ 2232 buf[len++] = *current++; 2233 } 2234 ctxt->token = 0; 2235 } 2236 } else if (c == '&') { 2237 if (NXT(1) == '#') { 2238 int val = xmlParseCharRef(ctxt); 2239 if (val == '&') { 2240 if (ctxt->replaceEntities) { 2241 if (len > buf_size - 10) { 2242 growBuffer(buf); 2243 } 2244 buf[len++] = '&'; 2245 } else { 2246 /* 2247 * The reparsing will be done in xmlStringGetNodeList() 2248 * called by the attribute() function in SAX.c 2249 */ 2250 static xmlChar buffer[6] = "&"; 2251 2252 if (len > buf_size - 10) { 2253 growBuffer(buf); 2254 } 2255 current = &buffer[0]; 2256 while (*current != 0) { /* non input consuming */ 2257 buf[len++] = *current++; 2258 } 2259 } 2260 } else { 2261 if (len > buf_size - 10) { 2262 growBuffer(buf); 2263 } 2264 len += xmlCopyChar(0, &buf[len], val); 2265 } 2266 } else { 2267 ent = xmlParseEntityRef(ctxt); 2268 if ((ent != NULL) && 2269 (ctxt->replaceEntities != 0)) { 2270 xmlChar *rep; 2271 2272 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2273 rep = xmlStringDecodeEntities(ctxt, ent->content, 2274 XML_SUBSTITUTE_REF, 0, 0, 0); 2275 if (rep != NULL) { 2276 current = rep; 2277 while (*current != 0) { /* non input consuming */ 2278 buf[len++] = *current++; 2279 if (len > buf_size - 10) { 2280 growBuffer(buf); 2281 } 2282 } 2283 xmlFree(rep); 2284 } 2285 } else { 2286 if (len > buf_size - 10) { 2287 growBuffer(buf); 2288 } 2289 if (ent->content != NULL) 2290 buf[len++] = ent->content[0]; 2291 } 2292 } else if (ent != NULL) { 2293 int i = xmlStrlen(ent->name); 2294 const xmlChar *cur = ent->name; 2295 2296 /* 2297 * This may look absurd but is needed to detect 2298 * entities problems 2299 */ 2300 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2301 (ent->content != NULL)) { 2302 xmlChar *rep; 2303 rep = xmlStringDecodeEntities(ctxt, ent->content, 2304 XML_SUBSTITUTE_REF, 0, 0, 0); 2305 if (rep != NULL) 2306 xmlFree(rep); 2307 } 2308 2309 /* 2310 * Just output the reference 2311 */ 2312 buf[len++] = '&'; 2313 if (len > buf_size - i - 10) { 2314 growBuffer(buf); 2315 } 2316 for (;i > 0;i--) 2317 buf[len++] = *cur++; 2318 buf[len++] = ';'; 2319 } 2320 } 2321 } else { 2322 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2323 COPY_BUF(l,buf,len,0x20); 2324 if (len > buf_size - 10) { 2325 growBuffer(buf); 2326 } 2327 } else { 2328 COPY_BUF(l,buf,len,c); 2329 if (len > buf_size - 10) { 2330 growBuffer(buf); 2331 } 2332 } 2333 NEXTL(l); 2334 } 2335 GROW; 2336 c = CUR_CHAR(l); 2337 } 2338 buf[len++] = 0; 2339 if (RAW == '<') { 2340 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2342 ctxt->sax->error(ctxt->userData, 2343 "Unescaped '<' not allowed in attributes values\n"); 2344 ctxt->wellFormed = 0; 2345 ctxt->disableSAX = 1; 2346 } else if (RAW != limit) { 2347 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2349 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2350 ctxt->wellFormed = 0; 2351 ctxt->disableSAX = 1; 2352 } else 2353 NEXT; 2354 return(buf); 2355} 2356 2357/** 2358 * xmlParseSystemLiteral: 2359 * @ctxt: an XML parser context 2360 * 2361 * parse an XML Literal 2362 * 2363 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2364 * 2365 * Returns the SystemLiteral parsed or NULL 2366 */ 2367 2368xmlChar * 2369xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2370 xmlChar *buf = NULL; 2371 int len = 0; 2372 int size = XML_PARSER_BUFFER_SIZE; 2373 int cur, l; 2374 xmlChar stop; 2375 int state = ctxt->instate; 2376 int count = 0; 2377 2378 SHRINK; 2379 if (RAW == '"') { 2380 NEXT; 2381 stop = '"'; 2382 } else if (RAW == '\'') { 2383 NEXT; 2384 stop = '\''; 2385 } else { 2386 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2388 ctxt->sax->error(ctxt->userData, 2389 "SystemLiteral \" or ' expected\n"); 2390 ctxt->wellFormed = 0; 2391 ctxt->disableSAX = 1; 2392 return(NULL); 2393 } 2394 2395 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2396 if (buf == NULL) { 2397 xmlGenericError(xmlGenericErrorContext, 2398 "malloc of %d byte failed\n", size); 2399 return(NULL); 2400 } 2401 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2402 cur = CUR_CHAR(l); 2403 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2404 if (len + 5 >= size) { 2405 size *= 2; 2406 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2407 if (buf == NULL) { 2408 xmlGenericError(xmlGenericErrorContext, 2409 "realloc of %d byte failed\n", size); 2410 ctxt->instate = (xmlParserInputState) state; 2411 return(NULL); 2412 } 2413 } 2414 count++; 2415 if (count > 50) { 2416 GROW; 2417 count = 0; 2418 } 2419 COPY_BUF(l,buf,len,cur); 2420 NEXTL(l); 2421 cur = CUR_CHAR(l); 2422 if (cur == 0) { 2423 GROW; 2424 SHRINK; 2425 cur = CUR_CHAR(l); 2426 } 2427 } 2428 buf[len] = 0; 2429 ctxt->instate = (xmlParserInputState) state; 2430 if (!IS_CHAR(cur)) { 2431 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2433 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2434 ctxt->wellFormed = 0; 2435 ctxt->disableSAX = 1; 2436 } else { 2437 NEXT; 2438 } 2439 return(buf); 2440} 2441 2442/** 2443 * xmlParsePubidLiteral: 2444 * @ctxt: an XML parser context 2445 * 2446 * parse an XML public literal 2447 * 2448 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2449 * 2450 * Returns the PubidLiteral parsed or NULL. 2451 */ 2452 2453xmlChar * 2454xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2455 xmlChar *buf = NULL; 2456 int len = 0; 2457 int size = XML_PARSER_BUFFER_SIZE; 2458 xmlChar cur; 2459 xmlChar stop; 2460 int count = 0; 2461 2462 SHRINK; 2463 if (RAW == '"') { 2464 NEXT; 2465 stop = '"'; 2466 } else if (RAW == '\'') { 2467 NEXT; 2468 stop = '\''; 2469 } else { 2470 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2472 ctxt->sax->error(ctxt->userData, 2473 "SystemLiteral \" or ' expected\n"); 2474 ctxt->wellFormed = 0; 2475 ctxt->disableSAX = 1; 2476 return(NULL); 2477 } 2478 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2479 if (buf == NULL) { 2480 xmlGenericError(xmlGenericErrorContext, 2481 "malloc of %d byte failed\n", size); 2482 return(NULL); 2483 } 2484 cur = CUR; 2485 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2486 if (len + 1 >= size) { 2487 size *= 2; 2488 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2489 if (buf == NULL) { 2490 xmlGenericError(xmlGenericErrorContext, 2491 "realloc of %d byte failed\n", size); 2492 return(NULL); 2493 } 2494 } 2495 buf[len++] = cur; 2496 count++; 2497 if (count > 50) { 2498 GROW; 2499 count = 0; 2500 } 2501 NEXT; 2502 cur = CUR; 2503 if (cur == 0) { 2504 GROW; 2505 SHRINK; 2506 cur = CUR; 2507 } 2508 } 2509 buf[len] = 0; 2510 if (cur != stop) { 2511 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2513 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2514 ctxt->wellFormed = 0; 2515 ctxt->disableSAX = 1; 2516 } else { 2517 NEXT; 2518 } 2519 return(buf); 2520} 2521 2522void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2523/** 2524 * xmlParseCharData: 2525 * @ctxt: an XML parser context 2526 * @cdata: int indicating whether we are within a CDATA section 2527 * 2528 * parse a CharData section. 2529 * if we are within a CDATA section ']]>' marks an end of section. 2530 * 2531 * The right angle bracket (>) may be represented using the string ">", 2532 * and must, for compatibility, be escaped using ">" or a character 2533 * reference when it appears in the string "]]>" in content, when that 2534 * string is not marking the end of a CDATA section. 2535 * 2536 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2537 */ 2538 2539void 2540xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2541 const xmlChar *in; 2542 int nbchar = 0; 2543 int line = ctxt->input->line; 2544 int col = ctxt->input->col; 2545 2546 SHRINK; 2547 GROW; 2548 /* 2549 * Accelerated common case where input don't need to be 2550 * modified before passing it to the handler. 2551 */ 2552 if ((ctxt->token == 0) && (!cdata)) { 2553 in = ctxt->input->cur; 2554 do { 2555get_more: 2556 while (((*in >= 0x20) && (*in != '<') && 2557 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2558 in++; 2559 if (*in == 0xA) { 2560 ctxt->input->line++; 2561 in++; 2562 while (*in == 0xA) { 2563 ctxt->input->line++; 2564 in++; 2565 } 2566 goto get_more; 2567 } 2568 nbchar = in - ctxt->input->cur; 2569 if (nbchar > 0) { 2570 if (IS_BLANK(*ctxt->input->cur)) { 2571 const xmlChar *tmp = ctxt->input->cur; 2572 ctxt->input->cur = in; 2573 if (areBlanks(ctxt, tmp, nbchar)) { 2574 if (ctxt->sax->ignorableWhitespace != NULL) 2575 ctxt->sax->ignorableWhitespace(ctxt->userData, 2576 tmp, nbchar); 2577 } else { 2578 if (ctxt->sax->characters != NULL) 2579 ctxt->sax->characters(ctxt->userData, 2580 tmp, nbchar); 2581 } 2582 line = ctxt->input->line; 2583 col = ctxt->input->col; 2584 } else { 2585 if (ctxt->sax->characters != NULL) 2586 ctxt->sax->characters(ctxt->userData, 2587 ctxt->input->cur, nbchar); 2588 line = ctxt->input->line; 2589 col = ctxt->input->col; 2590 } 2591 } 2592 ctxt->input->cur = in; 2593 if (*in == 0xD) { 2594 in++; 2595 if (*in == 0xA) { 2596 ctxt->input->cur = in; 2597 in++; 2598 ctxt->input->line++; 2599 continue; /* while */ 2600 } 2601 in--; 2602 } 2603 if (*in == '<') { 2604 return; 2605 } 2606 if (*in == '&') { 2607 return; 2608 } 2609 SHRINK; 2610 GROW; 2611 in = ctxt->input->cur; 2612 } while ((*in >= 0x20) && (*in <= 0x7F)); 2613 nbchar = 0; 2614 } 2615 ctxt->input->line = line; 2616 ctxt->input->col = col; 2617 xmlParseCharDataComplex(ctxt, cdata); 2618} 2619 2620void 2621xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2622 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2623 int nbchar = 0; 2624 int cur, l; 2625 int count = 0; 2626 2627 SHRINK; 2628 GROW; 2629 cur = CUR_CHAR(l); 2630 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2631 ((cur != '&') || (ctxt->token == '&')) && 2632 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2633 if ((cur == ']') && (NXT(1) == ']') && 2634 (NXT(2) == '>')) { 2635 if (cdata) break; 2636 else { 2637 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2639 ctxt->sax->error(ctxt->userData, 2640 "Sequence ']]>' not allowed in content\n"); 2641 /* Should this be relaxed ??? I see a "must here */ 2642 ctxt->wellFormed = 0; 2643 ctxt->disableSAX = 1; 2644 } 2645 } 2646 COPY_BUF(l,buf,nbchar,cur); 2647 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2648 /* 2649 * Ok the segment is to be consumed as chars. 2650 */ 2651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2652 if (areBlanks(ctxt, buf, nbchar)) { 2653 if (ctxt->sax->ignorableWhitespace != NULL) 2654 ctxt->sax->ignorableWhitespace(ctxt->userData, 2655 buf, nbchar); 2656 } else { 2657 if (ctxt->sax->characters != NULL) 2658 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2659 } 2660 } 2661 nbchar = 0; 2662 } 2663 count++; 2664 if (count > 50) { 2665 GROW; 2666 count = 0; 2667 } 2668 NEXTL(l); 2669 cur = CUR_CHAR(l); 2670 } 2671 if (nbchar != 0) { 2672 /* 2673 * Ok the segment is to be consumed as chars. 2674 */ 2675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2676 if (areBlanks(ctxt, buf, nbchar)) { 2677 if (ctxt->sax->ignorableWhitespace != NULL) 2678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2679 } else { 2680 if (ctxt->sax->characters != NULL) 2681 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2682 } 2683 } 2684 } 2685} 2686 2687/** 2688 * xmlParseExternalID: 2689 * @ctxt: an XML parser context 2690 * @publicID: a xmlChar** receiving PubidLiteral 2691 * @strict: indicate whether we should restrict parsing to only 2692 * production [75], see NOTE below 2693 * 2694 * Parse an External ID or a Public ID 2695 * 2696 * NOTE: Productions [75] and [83] interract badly since [75] can generate 2697 * 'PUBLIC' S PubidLiteral S SystemLiteral 2698 * 2699 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2700 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2701 * 2702 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2703 * 2704 * Returns the function returns SystemLiteral and in the second 2705 * case publicID receives PubidLiteral, is strict is off 2706 * it is possible to return NULL and have publicID set. 2707 */ 2708 2709xmlChar * 2710xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2711 xmlChar *URI = NULL; 2712 2713 SHRINK; 2714 2715 *publicID = NULL; 2716 if ((RAW == 'S') && (NXT(1) == 'Y') && 2717 (NXT(2) == 'S') && (NXT(3) == 'T') && 2718 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2719 SKIP(6); 2720 if (!IS_BLANK(CUR)) { 2721 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2723 ctxt->sax->error(ctxt->userData, 2724 "Space required after 'SYSTEM'\n"); 2725 ctxt->wellFormed = 0; 2726 ctxt->disableSAX = 1; 2727 } 2728 SKIP_BLANKS; 2729 URI = xmlParseSystemLiteral(ctxt); 2730 if (URI == NULL) { 2731 ctxt->errNo = XML_ERR_URI_REQUIRED; 2732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2733 ctxt->sax->error(ctxt->userData, 2734 "xmlParseExternalID: SYSTEM, no URI\n"); 2735 ctxt->wellFormed = 0; 2736 ctxt->disableSAX = 1; 2737 } 2738 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2739 (NXT(2) == 'B') && (NXT(3) == 'L') && 2740 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2741 SKIP(6); 2742 if (!IS_BLANK(CUR)) { 2743 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2745 ctxt->sax->error(ctxt->userData, 2746 "Space required after 'PUBLIC'\n"); 2747 ctxt->wellFormed = 0; 2748 ctxt->disableSAX = 1; 2749 } 2750 SKIP_BLANKS; 2751 *publicID = xmlParsePubidLiteral(ctxt); 2752 if (*publicID == NULL) { 2753 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2755 ctxt->sax->error(ctxt->userData, 2756 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2757 ctxt->wellFormed = 0; 2758 ctxt->disableSAX = 1; 2759 } 2760 if (strict) { 2761 /* 2762 * We don't handle [83] so "S SystemLiteral" is required. 2763 */ 2764 if (!IS_BLANK(CUR)) { 2765 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2767 ctxt->sax->error(ctxt->userData, 2768 "Space required after the Public Identifier\n"); 2769 ctxt->wellFormed = 0; 2770 ctxt->disableSAX = 1; 2771 } 2772 } else { 2773 /* 2774 * We handle [83] so we return immediately, if 2775 * "S SystemLiteral" is not detected. From a purely parsing 2776 * point of view that's a nice mess. 2777 */ 2778 const xmlChar *ptr; 2779 GROW; 2780 2781 ptr = CUR_PTR; 2782 if (!IS_BLANK(*ptr)) return(NULL); 2783 2784 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2785 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2786 } 2787 SKIP_BLANKS; 2788 URI = xmlParseSystemLiteral(ctxt); 2789 if (URI == NULL) { 2790 ctxt->errNo = XML_ERR_URI_REQUIRED; 2791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2792 ctxt->sax->error(ctxt->userData, 2793 "xmlParseExternalID: PUBLIC, no URI\n"); 2794 ctxt->wellFormed = 0; 2795 ctxt->disableSAX = 1; 2796 } 2797 } 2798 return(URI); 2799} 2800 2801/** 2802 * xmlParseComment: 2803 * @ctxt: an XML parser context 2804 * 2805 * Skip an XML (SGML) comment <!-- .... --> 2806 * The spec says that "For compatibility, the string "--" (double-hyphen) 2807 * must not occur within comments. " 2808 * 2809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2810 */ 2811void 2812xmlParseComment(xmlParserCtxtPtr ctxt) { 2813 xmlChar *buf = NULL; 2814 int len; 2815 int size = XML_PARSER_BUFFER_SIZE; 2816 int q, ql; 2817 int r, rl; 2818 int cur, l; 2819 xmlParserInputState state; 2820 xmlParserInputPtr input = ctxt->input; 2821 int count = 0; 2822 2823 /* 2824 * Check that there is a comment right here. 2825 */ 2826 if ((RAW != '<') || (NXT(1) != '!') || 2827 (NXT(2) != '-') || (NXT(3) != '-')) return; 2828 2829 state = ctxt->instate; 2830 ctxt->instate = XML_PARSER_COMMENT; 2831 SHRINK; 2832 SKIP(4); 2833 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2834 if (buf == NULL) { 2835 xmlGenericError(xmlGenericErrorContext, 2836 "malloc of %d byte failed\n", size); 2837 ctxt->instate = state; 2838 return; 2839 } 2840 q = CUR_CHAR(ql); 2841 NEXTL(ql); 2842 r = CUR_CHAR(rl); 2843 NEXTL(rl); 2844 cur = CUR_CHAR(l); 2845 len = 0; 2846 while (IS_CHAR(cur) && /* checked */ 2847 ((cur != '>') || 2848 (r != '-') || (q != '-'))) { 2849 if ((r == '-') && (q == '-') && (len > 1)) { 2850 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2852 ctxt->sax->error(ctxt->userData, 2853 "Comment must not contain '--' (double-hyphen)`\n"); 2854 ctxt->wellFormed = 0; 2855 ctxt->disableSAX = 1; 2856 } 2857 if (len + 5 >= size) { 2858 size *= 2; 2859 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2860 if (buf == NULL) { 2861 xmlGenericError(xmlGenericErrorContext, 2862 "realloc of %d byte failed\n", size); 2863 ctxt->instate = state; 2864 return; 2865 } 2866 } 2867 COPY_BUF(ql,buf,len,q); 2868 q = r; 2869 ql = rl; 2870 r = cur; 2871 rl = l; 2872 2873 count++; 2874 if (count > 50) { 2875 GROW; 2876 count = 0; 2877 } 2878 NEXTL(l); 2879 cur = CUR_CHAR(l); 2880 if (cur == 0) { 2881 SHRINK; 2882 GROW; 2883 cur = CUR_CHAR(l); 2884 } 2885 } 2886 buf[len] = 0; 2887 if (!IS_CHAR(cur)) { 2888 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2890 ctxt->sax->error(ctxt->userData, 2891 "Comment not terminated \n<!--%.50s\n", buf); 2892 ctxt->wellFormed = 0; 2893 ctxt->disableSAX = 1; 2894 xmlFree(buf); 2895 } else { 2896 if (input != ctxt->input) { 2897 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2899 ctxt->sax->error(ctxt->userData, 2900"Comment doesn't start and stop in the same entity\n"); 2901 ctxt->wellFormed = 0; 2902 ctxt->disableSAX = 1; 2903 } 2904 NEXT; 2905 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2906 (!ctxt->disableSAX)) 2907 ctxt->sax->comment(ctxt->userData, buf); 2908 xmlFree(buf); 2909 } 2910 ctxt->instate = state; 2911} 2912 2913/** 2914 * xmlParsePITarget: 2915 * @ctxt: an XML parser context 2916 * 2917 * parse the name of a PI 2918 * 2919 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2920 * 2921 * Returns the PITarget name or NULL 2922 */ 2923 2924xmlChar * 2925xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2926 xmlChar *name; 2927 2928 name = xmlParseName(ctxt); 2929 if ((name != NULL) && 2930 ((name[0] == 'x') || (name[0] == 'X')) && 2931 ((name[1] == 'm') || (name[1] == 'M')) && 2932 ((name[2] == 'l') || (name[2] == 'L'))) { 2933 int i; 2934 if ((name[0] == 'x') && (name[1] == 'm') && 2935 (name[2] == 'l') && (name[3] == 0)) { 2936 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2938 ctxt->sax->error(ctxt->userData, 2939 "XML declaration allowed only at the start of the document\n"); 2940 ctxt->wellFormed = 0; 2941 ctxt->disableSAX = 1; 2942 return(name); 2943 } else if (name[3] == 0) { 2944 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2946 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2947 ctxt->wellFormed = 0; 2948 ctxt->disableSAX = 1; 2949 return(name); 2950 } 2951 for (i = 0;;i++) { 2952 if (xmlW3CPIs[i] == NULL) break; 2953 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2954 return(name); 2955 } 2956 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2957 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2958 ctxt->sax->warning(ctxt->userData, 2959 "xmlParsePItarget: invalid name prefix 'xml'\n"); 2960 } 2961 } 2962 return(name); 2963} 2964 2965#ifdef LIBXML_CATALOG_ENABLED 2966/** 2967 * xmlParseCatalogPI: 2968 * @ctxt: an XML parser context 2969 * @catalog: the PI value string 2970 * 2971 * parse an XML Catalog Processing Instruction. 2972 * 2973 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 2974 * 2975 * Occurs only if allowed by the user and if happening in the Misc 2976 * part of the document before any doctype informations 2977 * This will add the given catalog to the parsing context in order 2978 * to be used if there is a resolution need further down in the document 2979 */ 2980 2981static void 2982xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 2983 xmlChar *URL = NULL; 2984 const xmlChar *tmp, *base; 2985 xmlChar marker; 2986 2987 tmp = catalog; 2988 while (IS_BLANK(*tmp)) tmp++; 2989 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 2990 goto error; 2991 tmp += 7; 2992 while (IS_BLANK(*tmp)) tmp++; 2993 if (*tmp != '=') { 2994 return; 2995 } 2996 tmp++; 2997 while (IS_BLANK(*tmp)) tmp++; 2998 marker = *tmp; 2999 if ((marker != '\'') && (marker != '"')) 3000 goto error; 3001 tmp++; 3002 base = tmp; 3003 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3004 if (*tmp == 0) 3005 goto error; 3006 URL = xmlStrndup(base, tmp - base); 3007 tmp++; 3008 while (IS_BLANK(*tmp)) tmp++; 3009 if (*tmp != 0) 3010 goto error; 3011 3012 if (URL != NULL) { 3013 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3014 xmlFree(URL); 3015 } 3016 return; 3017 3018error: 3019 ctxt->errNo = XML_WAR_CATALOG_PI; 3020 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3021 ctxt->sax->warning(ctxt->userData, 3022 "Catalog PI syntax error: %s\n", catalog); 3023 if (URL != NULL) 3024 xmlFree(URL); 3025} 3026#endif 3027 3028/** 3029 * xmlParsePI: 3030 * @ctxt: an XML parser context 3031 * 3032 * parse an XML Processing Instruction. 3033 * 3034 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3035 * 3036 * The processing is transfered to SAX once parsed. 3037 */ 3038 3039void 3040xmlParsePI(xmlParserCtxtPtr ctxt) { 3041 xmlChar *buf = NULL; 3042 int len = 0; 3043 int size = XML_PARSER_BUFFER_SIZE; 3044 int cur, l; 3045 xmlChar *target; 3046 xmlParserInputState state; 3047 int count = 0; 3048 3049 if ((RAW == '<') && (NXT(1) == '?')) { 3050 xmlParserInputPtr input = ctxt->input; 3051 state = ctxt->instate; 3052 ctxt->instate = XML_PARSER_PI; 3053 /* 3054 * this is a Processing Instruction. 3055 */ 3056 SKIP(2); 3057 SHRINK; 3058 3059 /* 3060 * Parse the target name and check for special support like 3061 * namespace. 3062 */ 3063 target = xmlParsePITarget(ctxt); 3064 if (target != NULL) { 3065 if ((RAW == '?') && (NXT(1) == '>')) { 3066 if (input != ctxt->input) { 3067 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3069 ctxt->sax->error(ctxt->userData, 3070 "PI declaration doesn't start and stop in the same entity\n"); 3071 ctxt->wellFormed = 0; 3072 ctxt->disableSAX = 1; 3073 } 3074 SKIP(2); 3075 3076 /* 3077 * SAX: PI detected. 3078 */ 3079 if ((ctxt->sax) && (!ctxt->disableSAX) && 3080 (ctxt->sax->processingInstruction != NULL)) 3081 ctxt->sax->processingInstruction(ctxt->userData, 3082 target, NULL); 3083 ctxt->instate = state; 3084 xmlFree(target); 3085 return; 3086 } 3087 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3088 if (buf == NULL) { 3089 xmlGenericError(xmlGenericErrorContext, 3090 "malloc of %d byte failed\n", size); 3091 ctxt->instate = state; 3092 return; 3093 } 3094 cur = CUR; 3095 if (!IS_BLANK(cur)) { 3096 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3098 ctxt->sax->error(ctxt->userData, 3099 "xmlParsePI: PI %s space expected\n", target); 3100 ctxt->wellFormed = 0; 3101 ctxt->disableSAX = 1; 3102 } 3103 SKIP_BLANKS; 3104 cur = CUR_CHAR(l); 3105 while (IS_CHAR(cur) && /* checked */ 3106 ((cur != '?') || (NXT(1) != '>'))) { 3107 if (len + 5 >= size) { 3108 size *= 2; 3109 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3110 if (buf == NULL) { 3111 xmlGenericError(xmlGenericErrorContext, 3112 "realloc of %d byte failed\n", size); 3113 ctxt->instate = state; 3114 return; 3115 } 3116 } 3117 count++; 3118 if (count > 50) { 3119 GROW; 3120 count = 0; 3121 } 3122 COPY_BUF(l,buf,len,cur); 3123 NEXTL(l); 3124 cur = CUR_CHAR(l); 3125 if (cur == 0) { 3126 SHRINK; 3127 GROW; 3128 cur = CUR_CHAR(l); 3129 } 3130 } 3131 buf[len] = 0; 3132 if (cur != '?') { 3133 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3135 ctxt->sax->error(ctxt->userData, 3136 "xmlParsePI: PI %s never end ...\n", target); 3137 ctxt->wellFormed = 0; 3138 ctxt->disableSAX = 1; 3139 } else { 3140 if (input != ctxt->input) { 3141 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3143 ctxt->sax->error(ctxt->userData, 3144 "PI declaration doesn't start and stop in the same entity\n"); 3145 ctxt->wellFormed = 0; 3146 ctxt->disableSAX = 1; 3147 } 3148 SKIP(2); 3149 3150#ifdef LIBXML_CATALOG_ENABLED 3151 if (((state == XML_PARSER_MISC) || 3152 (state == XML_PARSER_START)) && 3153 (xmlStrEqual(target, XML_CATALOG_PI))) { 3154 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3155 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3156 (allow == XML_CATA_ALLOW_ALL)) 3157 xmlParseCatalogPI(ctxt, buf); 3158 } 3159#endif 3160 3161 3162 /* 3163 * SAX: PI detected. 3164 */ 3165 if ((ctxt->sax) && (!ctxt->disableSAX) && 3166 (ctxt->sax->processingInstruction != NULL)) 3167 ctxt->sax->processingInstruction(ctxt->userData, 3168 target, buf); 3169 } 3170 xmlFree(buf); 3171 xmlFree(target); 3172 } else { 3173 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3175 ctxt->sax->error(ctxt->userData, 3176 "xmlParsePI : no target name\n"); 3177 ctxt->wellFormed = 0; 3178 ctxt->disableSAX = 1; 3179 } 3180 ctxt->instate = state; 3181 } 3182} 3183 3184/** 3185 * xmlParseNotationDecl: 3186 * @ctxt: an XML parser context 3187 * 3188 * parse a notation declaration 3189 * 3190 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3191 * 3192 * Hence there is actually 3 choices: 3193 * 'PUBLIC' S PubidLiteral 3194 * 'PUBLIC' S PubidLiteral S SystemLiteral 3195 * and 'SYSTEM' S SystemLiteral 3196 * 3197 * See the NOTE on xmlParseExternalID(). 3198 */ 3199 3200void 3201xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3202 xmlChar *name; 3203 xmlChar *Pubid; 3204 xmlChar *Systemid; 3205 3206 if ((RAW == '<') && (NXT(1) == '!') && 3207 (NXT(2) == 'N') && (NXT(3) == 'O') && 3208 (NXT(4) == 'T') && (NXT(5) == 'A') && 3209 (NXT(6) == 'T') && (NXT(7) == 'I') && 3210 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3211 xmlParserInputPtr input = ctxt->input; 3212 SHRINK; 3213 SKIP(10); 3214 if (!IS_BLANK(CUR)) { 3215 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3217 ctxt->sax->error(ctxt->userData, 3218 "Space required after '<!NOTATION'\n"); 3219 ctxt->wellFormed = 0; 3220 ctxt->disableSAX = 1; 3221 return; 3222 } 3223 SKIP_BLANKS; 3224 3225 name = xmlParseName(ctxt); 3226 if (name == NULL) { 3227 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3229 ctxt->sax->error(ctxt->userData, 3230 "NOTATION: Name expected here\n"); 3231 ctxt->wellFormed = 0; 3232 ctxt->disableSAX = 1; 3233 return; 3234 } 3235 if (!IS_BLANK(CUR)) { 3236 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3238 ctxt->sax->error(ctxt->userData, 3239 "Space required after the NOTATION name'\n"); 3240 ctxt->wellFormed = 0; 3241 ctxt->disableSAX = 1; 3242 return; 3243 } 3244 SKIP_BLANKS; 3245 3246 /* 3247 * Parse the IDs. 3248 */ 3249 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3250 SKIP_BLANKS; 3251 3252 if (RAW == '>') { 3253 if (input != ctxt->input) { 3254 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3256 ctxt->sax->error(ctxt->userData, 3257"Notation declaration doesn't start and stop in the same entity\n"); 3258 ctxt->wellFormed = 0; 3259 ctxt->disableSAX = 1; 3260 } 3261 NEXT; 3262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3263 (ctxt->sax->notationDecl != NULL)) 3264 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3265 } else { 3266 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3268 ctxt->sax->error(ctxt->userData, 3269 "'>' required to close NOTATION declaration\n"); 3270 ctxt->wellFormed = 0; 3271 ctxt->disableSAX = 1; 3272 } 3273 xmlFree(name); 3274 if (Systemid != NULL) xmlFree(Systemid); 3275 if (Pubid != NULL) xmlFree(Pubid); 3276 } 3277} 3278 3279/** 3280 * xmlParseEntityDecl: 3281 * @ctxt: an XML parser context 3282 * 3283 * parse <!ENTITY declarations 3284 * 3285 * [70] EntityDecl ::= GEDecl | PEDecl 3286 * 3287 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3288 * 3289 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3290 * 3291 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3292 * 3293 * [74] PEDef ::= EntityValue | ExternalID 3294 * 3295 * [76] NDataDecl ::= S 'NDATA' S Name 3296 * 3297 * [ VC: Notation Declared ] 3298 * The Name must match the declared name of a notation. 3299 */ 3300 3301void 3302xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3303 xmlChar *name = NULL; 3304 xmlChar *value = NULL; 3305 xmlChar *URI = NULL, *literal = NULL; 3306 xmlChar *ndata = NULL; 3307 int isParameter = 0; 3308 xmlChar *orig = NULL; 3309 3310 GROW; 3311 if ((RAW == '<') && (NXT(1) == '!') && 3312 (NXT(2) == 'E') && (NXT(3) == 'N') && 3313 (NXT(4) == 'T') && (NXT(5) == 'I') && 3314 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3315 xmlParserInputPtr input = ctxt->input; 3316 ctxt->instate = XML_PARSER_ENTITY_DECL; 3317 SHRINK; 3318 SKIP(8); 3319 if (!IS_BLANK(CUR)) { 3320 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3322 ctxt->sax->error(ctxt->userData, 3323 "Space required after '<!ENTITY'\n"); 3324 ctxt->wellFormed = 0; 3325 ctxt->disableSAX = 1; 3326 } 3327 SKIP_BLANKS; 3328 3329 if (RAW == '%') { 3330 NEXT; 3331 if (!IS_BLANK(CUR)) { 3332 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3334 ctxt->sax->error(ctxt->userData, 3335 "Space required after '%'\n"); 3336 ctxt->wellFormed = 0; 3337 ctxt->disableSAX = 1; 3338 } 3339 SKIP_BLANKS; 3340 isParameter = 1; 3341 } 3342 3343 name = xmlParseName(ctxt); 3344 if (name == NULL) { 3345 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3347 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3348 ctxt->wellFormed = 0; 3349 ctxt->disableSAX = 1; 3350 return; 3351 } 3352 if (!IS_BLANK(CUR)) { 3353 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3355 ctxt->sax->error(ctxt->userData, 3356 "Space required after the entity name\n"); 3357 ctxt->wellFormed = 0; 3358 ctxt->disableSAX = 1; 3359 } 3360 SKIP_BLANKS; 3361 3362 /* 3363 * handle the various case of definitions... 3364 */ 3365 if (isParameter) { 3366 if ((RAW == '"') || (RAW == '\'')) { 3367 value = xmlParseEntityValue(ctxt, &orig); 3368 if (value) { 3369 if ((ctxt->sax != NULL) && 3370 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3371 ctxt->sax->entityDecl(ctxt->userData, name, 3372 XML_INTERNAL_PARAMETER_ENTITY, 3373 NULL, NULL, value); 3374 } 3375 } else { 3376 URI = xmlParseExternalID(ctxt, &literal, 1); 3377 if ((URI == NULL) && (literal == NULL)) { 3378 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3380 ctxt->sax->error(ctxt->userData, 3381 "Entity value required\n"); 3382 ctxt->wellFormed = 0; 3383 ctxt->disableSAX = 1; 3384 } 3385 if (URI) { 3386 xmlURIPtr uri; 3387 3388 uri = xmlParseURI((const char *) URI); 3389 if (uri == NULL) { 3390 ctxt->errNo = XML_ERR_INVALID_URI; 3391 if ((ctxt->sax != NULL) && 3392 (!ctxt->disableSAX) && 3393 (ctxt->sax->error != NULL)) 3394 ctxt->sax->error(ctxt->userData, 3395 "Invalid URI: %s\n", URI); 3396 ctxt->wellFormed = 0; 3397 } else { 3398 if (uri->fragment != NULL) { 3399 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3400 if ((ctxt->sax != NULL) && 3401 (!ctxt->disableSAX) && 3402 (ctxt->sax->error != NULL)) 3403 ctxt->sax->error(ctxt->userData, 3404 "Fragment not allowed: %s\n", URI); 3405 ctxt->wellFormed = 0; 3406 } else { 3407 if ((ctxt->sax != NULL) && 3408 (!ctxt->disableSAX) && 3409 (ctxt->sax->entityDecl != NULL)) 3410 ctxt->sax->entityDecl(ctxt->userData, name, 3411 XML_EXTERNAL_PARAMETER_ENTITY, 3412 literal, URI, NULL); 3413 } 3414 xmlFreeURI(uri); 3415 } 3416 } 3417 } 3418 } else { 3419 if ((RAW == '"') || (RAW == '\'')) { 3420 value = xmlParseEntityValue(ctxt, &orig); 3421 if ((ctxt->sax != NULL) && 3422 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3423 ctxt->sax->entityDecl(ctxt->userData, name, 3424 XML_INTERNAL_GENERAL_ENTITY, 3425 NULL, NULL, value); 3426 } else { 3427 URI = xmlParseExternalID(ctxt, &literal, 1); 3428 if ((URI == NULL) && (literal == NULL)) { 3429 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3431 ctxt->sax->error(ctxt->userData, 3432 "Entity value required\n"); 3433 ctxt->wellFormed = 0; 3434 ctxt->disableSAX = 1; 3435 } 3436 if (URI) { 3437 xmlURIPtr uri; 3438 3439 uri = xmlParseURI((const char *)URI); 3440 if (uri == NULL) { 3441 ctxt->errNo = XML_ERR_INVALID_URI; 3442 if ((ctxt->sax != NULL) && 3443 (!ctxt->disableSAX) && 3444 (ctxt->sax->error != NULL)) 3445 ctxt->sax->error(ctxt->userData, 3446 "Invalid URI: %s\n", URI); 3447 ctxt->wellFormed = 0; 3448 } else { 3449 if (uri->fragment != NULL) { 3450 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3451 if ((ctxt->sax != NULL) && 3452 (!ctxt->disableSAX) && 3453 (ctxt->sax->error != NULL)) 3454 ctxt->sax->error(ctxt->userData, 3455 "Fragment not allowed: %s\n", URI); 3456 ctxt->wellFormed = 0; 3457 } 3458 xmlFreeURI(uri); 3459 } 3460 } 3461 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3462 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3464 ctxt->sax->error(ctxt->userData, 3465 "Space required before 'NDATA'\n"); 3466 ctxt->wellFormed = 0; 3467 ctxt->disableSAX = 1; 3468 } 3469 SKIP_BLANKS; 3470 if ((RAW == 'N') && (NXT(1) == 'D') && 3471 (NXT(2) == 'A') && (NXT(3) == 'T') && 3472 (NXT(4) == 'A')) { 3473 SKIP(5); 3474 if (!IS_BLANK(CUR)) { 3475 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3477 ctxt->sax->error(ctxt->userData, 3478 "Space required after 'NDATA'\n"); 3479 ctxt->wellFormed = 0; 3480 ctxt->disableSAX = 1; 3481 } 3482 SKIP_BLANKS; 3483 ndata = xmlParseName(ctxt); 3484 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3485 (ctxt->sax->unparsedEntityDecl != NULL)) 3486 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3487 literal, URI, ndata); 3488 } else { 3489 if ((ctxt->sax != NULL) && 3490 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3491 ctxt->sax->entityDecl(ctxt->userData, name, 3492 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3493 literal, URI, NULL); 3494 } 3495 } 3496 } 3497 SKIP_BLANKS; 3498 if (RAW != '>') { 3499 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3501 ctxt->sax->error(ctxt->userData, 3502 "xmlParseEntityDecl: entity %s not terminated\n", name); 3503 ctxt->wellFormed = 0; 3504 ctxt->disableSAX = 1; 3505 } else { 3506 if (input != ctxt->input) { 3507 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3509 ctxt->sax->error(ctxt->userData, 3510"Entity declaration doesn't start and stop in the same entity\n"); 3511 ctxt->wellFormed = 0; 3512 ctxt->disableSAX = 1; 3513 } 3514 NEXT; 3515 } 3516 if (orig != NULL) { 3517 /* 3518 * Ugly mechanism to save the raw entity value. 3519 */ 3520 xmlEntityPtr cur = NULL; 3521 3522 if (isParameter) { 3523 if ((ctxt->sax != NULL) && 3524 (ctxt->sax->getParameterEntity != NULL)) 3525 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3526 } else { 3527 if ((ctxt->sax != NULL) && 3528 (ctxt->sax->getEntity != NULL)) 3529 cur = ctxt->sax->getEntity(ctxt->userData, name); 3530 } 3531 if (cur != NULL) { 3532 if (cur->orig != NULL) 3533 xmlFree(orig); 3534 else 3535 cur->orig = orig; 3536 } else 3537 xmlFree(orig); 3538 } 3539 if (name != NULL) xmlFree(name); 3540 if (value != NULL) xmlFree(value); 3541 if (URI != NULL) xmlFree(URI); 3542 if (literal != NULL) xmlFree(literal); 3543 if (ndata != NULL) xmlFree(ndata); 3544 } 3545} 3546 3547/** 3548 * xmlParseDefaultDecl: 3549 * @ctxt: an XML parser context 3550 * @value: Receive a possible fixed default value for the attribute 3551 * 3552 * Parse an attribute default declaration 3553 * 3554 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3555 * 3556 * [ VC: Required Attribute ] 3557 * if the default declaration is the keyword #REQUIRED, then the 3558 * attribute must be specified for all elements of the type in the 3559 * attribute-list declaration. 3560 * 3561 * [ VC: Attribute Default Legal ] 3562 * The declared default value must meet the lexical constraints of 3563 * the declared attribute type c.f. xmlValidateAttributeDecl() 3564 * 3565 * [ VC: Fixed Attribute Default ] 3566 * if an attribute has a default value declared with the #FIXED 3567 * keyword, instances of that attribute must match the default value. 3568 * 3569 * [ WFC: No < in Attribute Values ] 3570 * handled in xmlParseAttValue() 3571 * 3572 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3573 * or XML_ATTRIBUTE_FIXED. 3574 */ 3575 3576int 3577xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3578 int val; 3579 xmlChar *ret; 3580 3581 *value = NULL; 3582 if ((RAW == '#') && (NXT(1) == 'R') && 3583 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3584 (NXT(4) == 'U') && (NXT(5) == 'I') && 3585 (NXT(6) == 'R') && (NXT(7) == 'E') && 3586 (NXT(8) == 'D')) { 3587 SKIP(9); 3588 return(XML_ATTRIBUTE_REQUIRED); 3589 } 3590 if ((RAW == '#') && (NXT(1) == 'I') && 3591 (NXT(2) == 'M') && (NXT(3) == 'P') && 3592 (NXT(4) == 'L') && (NXT(5) == 'I') && 3593 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3594 SKIP(8); 3595 return(XML_ATTRIBUTE_IMPLIED); 3596 } 3597 val = XML_ATTRIBUTE_NONE; 3598 if ((RAW == '#') && (NXT(1) == 'F') && 3599 (NXT(2) == 'I') && (NXT(3) == 'X') && 3600 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3601 SKIP(6); 3602 val = XML_ATTRIBUTE_FIXED; 3603 if (!IS_BLANK(CUR)) { 3604 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3606 ctxt->sax->error(ctxt->userData, 3607 "Space required after '#FIXED'\n"); 3608 ctxt->wellFormed = 0; 3609 ctxt->disableSAX = 1; 3610 } 3611 SKIP_BLANKS; 3612 } 3613 ret = xmlParseAttValue(ctxt); 3614 ctxt->instate = XML_PARSER_DTD; 3615 if (ret == NULL) { 3616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3617 ctxt->sax->error(ctxt->userData, 3618 "Attribute default value declaration error\n"); 3619 ctxt->wellFormed = 0; 3620 ctxt->disableSAX = 1; 3621 } else 3622 *value = ret; 3623 return(val); 3624} 3625 3626/** 3627 * xmlParseNotationType: 3628 * @ctxt: an XML parser context 3629 * 3630 * parse an Notation attribute type. 3631 * 3632 * Note: the leading 'NOTATION' S part has already being parsed... 3633 * 3634 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3635 * 3636 * [ VC: Notation Attributes ] 3637 * Values of this type must match one of the notation names included 3638 * in the declaration; all notation names in the declaration must be declared. 3639 * 3640 * Returns: the notation attribute tree built while parsing 3641 */ 3642 3643xmlEnumerationPtr 3644xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3645 xmlChar *name; 3646 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3647 3648 if (RAW != '(') { 3649 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3651 ctxt->sax->error(ctxt->userData, 3652 "'(' required to start 'NOTATION'\n"); 3653 ctxt->wellFormed = 0; 3654 ctxt->disableSAX = 1; 3655 return(NULL); 3656 } 3657 SHRINK; 3658 do { 3659 NEXT; 3660 SKIP_BLANKS; 3661 name = xmlParseName(ctxt); 3662 if (name == NULL) { 3663 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3665 ctxt->sax->error(ctxt->userData, 3666 "Name expected in NOTATION declaration\n"); 3667 ctxt->wellFormed = 0; 3668 ctxt->disableSAX = 1; 3669 return(ret); 3670 } 3671 cur = xmlCreateEnumeration(name); 3672 xmlFree(name); 3673 if (cur == NULL) return(ret); 3674 if (last == NULL) ret = last = cur; 3675 else { 3676 last->next = cur; 3677 last = cur; 3678 } 3679 SKIP_BLANKS; 3680 } while (RAW == '|'); 3681 if (RAW != ')') { 3682 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3684 ctxt->sax->error(ctxt->userData, 3685 "')' required to finish NOTATION declaration\n"); 3686 ctxt->wellFormed = 0; 3687 ctxt->disableSAX = 1; 3688 if ((last != NULL) && (last != ret)) 3689 xmlFreeEnumeration(last); 3690 return(ret); 3691 } 3692 NEXT; 3693 return(ret); 3694} 3695 3696/** 3697 * xmlParseEnumerationType: 3698 * @ctxt: an XML parser context 3699 * 3700 * parse an Enumeration attribute type. 3701 * 3702 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3703 * 3704 * [ VC: Enumeration ] 3705 * Values of this type must match one of the Nmtoken tokens in 3706 * the declaration 3707 * 3708 * Returns: the enumeration attribute tree built while parsing 3709 */ 3710 3711xmlEnumerationPtr 3712xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3713 xmlChar *name; 3714 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3715 3716 if (RAW != '(') { 3717 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3719 ctxt->sax->error(ctxt->userData, 3720 "'(' required to start ATTLIST enumeration\n"); 3721 ctxt->wellFormed = 0; 3722 ctxt->disableSAX = 1; 3723 return(NULL); 3724 } 3725 SHRINK; 3726 do { 3727 NEXT; 3728 SKIP_BLANKS; 3729 name = xmlParseNmtoken(ctxt); 3730 if (name == NULL) { 3731 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3733 ctxt->sax->error(ctxt->userData, 3734 "NmToken expected in ATTLIST enumeration\n"); 3735 ctxt->wellFormed = 0; 3736 ctxt->disableSAX = 1; 3737 return(ret); 3738 } 3739 cur = xmlCreateEnumeration(name); 3740 xmlFree(name); 3741 if (cur == NULL) return(ret); 3742 if (last == NULL) ret = last = cur; 3743 else { 3744 last->next = cur; 3745 last = cur; 3746 } 3747 SKIP_BLANKS; 3748 } while (RAW == '|'); 3749 if (RAW != ')') { 3750 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3752 ctxt->sax->error(ctxt->userData, 3753 "')' required to finish ATTLIST enumeration\n"); 3754 ctxt->wellFormed = 0; 3755 ctxt->disableSAX = 1; 3756 return(ret); 3757 } 3758 NEXT; 3759 return(ret); 3760} 3761 3762/** 3763 * xmlParseEnumeratedType: 3764 * @ctxt: an XML parser context 3765 * @tree: the enumeration tree built while parsing 3766 * 3767 * parse an Enumerated attribute type. 3768 * 3769 * [57] EnumeratedType ::= NotationType | Enumeration 3770 * 3771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3772 * 3773 * 3774 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3775 */ 3776 3777int 3778xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3779 if ((RAW == 'N') && (NXT(1) == 'O') && 3780 (NXT(2) == 'T') && (NXT(3) == 'A') && 3781 (NXT(4) == 'T') && (NXT(5) == 'I') && 3782 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3783 SKIP(8); 3784 if (!IS_BLANK(CUR)) { 3785 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3787 ctxt->sax->error(ctxt->userData, 3788 "Space required after 'NOTATION'\n"); 3789 ctxt->wellFormed = 0; 3790 ctxt->disableSAX = 1; 3791 return(0); 3792 } 3793 SKIP_BLANKS; 3794 *tree = xmlParseNotationType(ctxt); 3795 if (*tree == NULL) return(0); 3796 return(XML_ATTRIBUTE_NOTATION); 3797 } 3798 *tree = xmlParseEnumerationType(ctxt); 3799 if (*tree == NULL) return(0); 3800 return(XML_ATTRIBUTE_ENUMERATION); 3801} 3802 3803/** 3804 * xmlParseAttributeType: 3805 * @ctxt: an XML parser context 3806 * @tree: the enumeration tree built while parsing 3807 * 3808 * parse the Attribute list def for an element 3809 * 3810 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3811 * 3812 * [55] StringType ::= 'CDATA' 3813 * 3814 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3815 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3816 * 3817 * Validity constraints for attribute values syntax are checked in 3818 * xmlValidateAttributeValue() 3819 * 3820 * [ VC: ID ] 3821 * Values of type ID must match the Name production. A name must not 3822 * appear more than once in an XML document as a value of this type; 3823 * i.e., ID values must uniquely identify the elements which bear them. 3824 * 3825 * [ VC: One ID per Element Type ] 3826 * No element type may have more than one ID attribute specified. 3827 * 3828 * [ VC: ID Attribute Default ] 3829 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3830 * 3831 * [ VC: IDREF ] 3832 * Values of type IDREF must match the Name production, and values 3833 * of type IDREFS must match Names; each IDREF Name must match the value 3834 * of an ID attribute on some element in the XML document; i.e. IDREF 3835 * values must match the value of some ID attribute. 3836 * 3837 * [ VC: Entity Name ] 3838 * Values of type ENTITY must match the Name production, values 3839 * of type ENTITIES must match Names; each Entity Name must match the 3840 * name of an unparsed entity declared in the DTD. 3841 * 3842 * [ VC: Name Token ] 3843 * Values of type NMTOKEN must match the Nmtoken production; values 3844 * of type NMTOKENS must match Nmtokens. 3845 * 3846 * Returns the attribute type 3847 */ 3848int 3849xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3850 SHRINK; 3851 if ((RAW == 'C') && (NXT(1) == 'D') && 3852 (NXT(2) == 'A') && (NXT(3) == 'T') && 3853 (NXT(4) == 'A')) { 3854 SKIP(5); 3855 return(XML_ATTRIBUTE_CDATA); 3856 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3857 (NXT(2) == 'R') && (NXT(3) == 'E') && 3858 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3859 SKIP(6); 3860 return(XML_ATTRIBUTE_IDREFS); 3861 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3862 (NXT(2) == 'R') && (NXT(3) == 'E') && 3863 (NXT(4) == 'F')) { 3864 SKIP(5); 3865 return(XML_ATTRIBUTE_IDREF); 3866 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3867 SKIP(2); 3868 return(XML_ATTRIBUTE_ID); 3869 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3870 (NXT(2) == 'T') && (NXT(3) == 'I') && 3871 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3872 SKIP(6); 3873 return(XML_ATTRIBUTE_ENTITY); 3874 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3875 (NXT(2) == 'T') && (NXT(3) == 'I') && 3876 (NXT(4) == 'T') && (NXT(5) == 'I') && 3877 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3878 SKIP(8); 3879 return(XML_ATTRIBUTE_ENTITIES); 3880 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3881 (NXT(2) == 'T') && (NXT(3) == 'O') && 3882 (NXT(4) == 'K') && (NXT(5) == 'E') && 3883 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3884 SKIP(8); 3885 return(XML_ATTRIBUTE_NMTOKENS); 3886 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3887 (NXT(2) == 'T') && (NXT(3) == 'O') && 3888 (NXT(4) == 'K') && (NXT(5) == 'E') && 3889 (NXT(6) == 'N')) { 3890 SKIP(7); 3891 return(XML_ATTRIBUTE_NMTOKEN); 3892 } 3893 return(xmlParseEnumeratedType(ctxt, tree)); 3894} 3895 3896/** 3897 * xmlParseAttributeListDecl: 3898 * @ctxt: an XML parser context 3899 * 3900 * : parse the Attribute list def for an element 3901 * 3902 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3903 * 3904 * [53] AttDef ::= S Name S AttType S DefaultDecl 3905 * 3906 */ 3907void 3908xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3909 xmlChar *elemName; 3910 xmlChar *attrName; 3911 xmlEnumerationPtr tree; 3912 3913 if ((RAW == '<') && (NXT(1) == '!') && 3914 (NXT(2) == 'A') && (NXT(3) == 'T') && 3915 (NXT(4) == 'T') && (NXT(5) == 'L') && 3916 (NXT(6) == 'I') && (NXT(7) == 'S') && 3917 (NXT(8) == 'T')) { 3918 xmlParserInputPtr input = ctxt->input; 3919 3920 SKIP(9); 3921 if (!IS_BLANK(CUR)) { 3922 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3924 ctxt->sax->error(ctxt->userData, 3925 "Space required after '<!ATTLIST'\n"); 3926 ctxt->wellFormed = 0; 3927 ctxt->disableSAX = 1; 3928 } 3929 SKIP_BLANKS; 3930 elemName = xmlParseName(ctxt); 3931 if (elemName == NULL) { 3932 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3934 ctxt->sax->error(ctxt->userData, 3935 "ATTLIST: no name for Element\n"); 3936 ctxt->wellFormed = 0; 3937 ctxt->disableSAX = 1; 3938 return; 3939 } 3940 SKIP_BLANKS; 3941 GROW; 3942 while (RAW != '>') { 3943 const xmlChar *check = CUR_PTR; 3944 int type; 3945 int def; 3946 xmlChar *defaultValue = NULL; 3947 3948 GROW; 3949 tree = NULL; 3950 attrName = xmlParseName(ctxt); 3951 if (attrName == NULL) { 3952 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3954 ctxt->sax->error(ctxt->userData, 3955 "ATTLIST: no name for Attribute\n"); 3956 ctxt->wellFormed = 0; 3957 ctxt->disableSAX = 1; 3958 break; 3959 } 3960 GROW; 3961 if (!IS_BLANK(CUR)) { 3962 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3964 ctxt->sax->error(ctxt->userData, 3965 "Space required after the attribute name\n"); 3966 ctxt->wellFormed = 0; 3967 ctxt->disableSAX = 1; 3968 if (attrName != NULL) 3969 xmlFree(attrName); 3970 if (defaultValue != NULL) 3971 xmlFree(defaultValue); 3972 break; 3973 } 3974 SKIP_BLANKS; 3975 3976 type = xmlParseAttributeType(ctxt, &tree); 3977 if (type <= 0) { 3978 if (attrName != NULL) 3979 xmlFree(attrName); 3980 if (defaultValue != NULL) 3981 xmlFree(defaultValue); 3982 break; 3983 } 3984 3985 GROW; 3986 if (!IS_BLANK(CUR)) { 3987 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3989 ctxt->sax->error(ctxt->userData, 3990 "Space required after the attribute type\n"); 3991 ctxt->wellFormed = 0; 3992 ctxt->disableSAX = 1; 3993 if (attrName != NULL) 3994 xmlFree(attrName); 3995 if (defaultValue != NULL) 3996 xmlFree(defaultValue); 3997 if (tree != NULL) 3998 xmlFreeEnumeration(tree); 3999 break; 4000 } 4001 SKIP_BLANKS; 4002 4003 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4004 if (def <= 0) { 4005 if (attrName != NULL) 4006 xmlFree(attrName); 4007 if (defaultValue != NULL) 4008 xmlFree(defaultValue); 4009 if (tree != NULL) 4010 xmlFreeEnumeration(tree); 4011 break; 4012 } 4013 4014 GROW; 4015 if (RAW != '>') { 4016 if (!IS_BLANK(CUR)) { 4017 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4019 ctxt->sax->error(ctxt->userData, 4020 "Space required after the attribute default value\n"); 4021 ctxt->wellFormed = 0; 4022 ctxt->disableSAX = 1; 4023 if (attrName != NULL) 4024 xmlFree(attrName); 4025 if (defaultValue != NULL) 4026 xmlFree(defaultValue); 4027 if (tree != NULL) 4028 xmlFreeEnumeration(tree); 4029 break; 4030 } 4031 SKIP_BLANKS; 4032 } 4033 if (check == CUR_PTR) { 4034 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4036 ctxt->sax->error(ctxt->userData, 4037 "xmlParseAttributeListDecl: detected internal error\n"); 4038 if (attrName != NULL) 4039 xmlFree(attrName); 4040 if (defaultValue != NULL) 4041 xmlFree(defaultValue); 4042 if (tree != NULL) 4043 xmlFreeEnumeration(tree); 4044 break; 4045 } 4046 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4047 (ctxt->sax->attributeDecl != NULL)) 4048 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4049 type, def, defaultValue, tree); 4050 if (attrName != NULL) 4051 xmlFree(attrName); 4052 if (defaultValue != NULL) 4053 xmlFree(defaultValue); 4054 GROW; 4055 } 4056 if (RAW == '>') { 4057 if (input != ctxt->input) { 4058 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4060 ctxt->sax->error(ctxt->userData, 4061"Attribute list declaration doesn't start and stop in the same entity\n"); 4062 ctxt->wellFormed = 0; 4063 ctxt->disableSAX = 1; 4064 } 4065 NEXT; 4066 } 4067 4068 xmlFree(elemName); 4069 } 4070} 4071 4072/** 4073 * xmlParseElementMixedContentDecl: 4074 * @ctxt: an XML parser context 4075 * 4076 * parse the declaration for a Mixed Element content 4077 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4078 * 4079 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4080 * '(' S? '#PCDATA' S? ')' 4081 * 4082 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4083 * 4084 * [ VC: No Duplicate Types ] 4085 * The same name must not appear more than once in a single 4086 * mixed-content declaration. 4087 * 4088 * returns: the list of the xmlElementContentPtr describing the element choices 4089 */ 4090xmlElementContentPtr 4091xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 4092 xmlElementContentPtr ret = NULL, cur = NULL, n; 4093 xmlChar *elem = NULL; 4094 4095 GROW; 4096 if ((RAW == '#') && (NXT(1) == 'P') && 4097 (NXT(2) == 'C') && (NXT(3) == 'D') && 4098 (NXT(4) == 'A') && (NXT(5) == 'T') && 4099 (NXT(6) == 'A')) { 4100 SKIP(7); 4101 SKIP_BLANKS; 4102 SHRINK; 4103 if (RAW == ')') { 4104 ctxt->entity = ctxt->input; 4105 NEXT; 4106 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4107 if (RAW == '*') { 4108 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4109 NEXT; 4110 } 4111 return(ret); 4112 } 4113 if ((RAW == '(') || (RAW == '|')) { 4114 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4115 if (ret == NULL) return(NULL); 4116 } 4117 while (RAW == '|') { 4118 NEXT; 4119 if (elem == NULL) { 4120 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4121 if (ret == NULL) return(NULL); 4122 ret->c1 = cur; 4123 if (cur != NULL) 4124 cur->parent = ret; 4125 cur = ret; 4126 } else { 4127 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4128 if (n == NULL) return(NULL); 4129 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4130 if (n->c1 != NULL) 4131 n->c1->parent = n; 4132 cur->c2 = n; 4133 if (n != NULL) 4134 n->parent = cur; 4135 cur = n; 4136 xmlFree(elem); 4137 } 4138 SKIP_BLANKS; 4139 elem = xmlParseName(ctxt); 4140 if (elem == NULL) { 4141 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4143 ctxt->sax->error(ctxt->userData, 4144 "xmlParseElementMixedContentDecl : Name expected\n"); 4145 ctxt->wellFormed = 0; 4146 ctxt->disableSAX = 1; 4147 xmlFreeElementContent(cur); 4148 return(NULL); 4149 } 4150 SKIP_BLANKS; 4151 GROW; 4152 } 4153 if ((RAW == ')') && (NXT(1) == '*')) { 4154 if (elem != NULL) { 4155 cur->c2 = xmlNewElementContent(elem, 4156 XML_ELEMENT_CONTENT_ELEMENT); 4157 if (cur->c2 != NULL) 4158 cur->c2->parent = cur; 4159 xmlFree(elem); 4160 } 4161 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4162 ctxt->entity = ctxt->input; 4163 SKIP(2); 4164 } else { 4165 if (elem != NULL) xmlFree(elem); 4166 xmlFreeElementContent(ret); 4167 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4169 ctxt->sax->error(ctxt->userData, 4170 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4171 ctxt->wellFormed = 0; 4172 ctxt->disableSAX = 1; 4173 return(NULL); 4174 } 4175 4176 } else { 4177 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4179 ctxt->sax->error(ctxt->userData, 4180 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4181 ctxt->wellFormed = 0; 4182 ctxt->disableSAX = 1; 4183 } 4184 return(ret); 4185} 4186 4187/** 4188 * xmlParseElementChildrenContentD: 4189 * @ctxt: an XML parser context 4190 * 4191 * VMS version of xmlParseElementChildrenContentDecl() 4192 * 4193 * Returns the tree of xmlElementContentPtr describing the element 4194 * hierarchy. 4195 */ 4196/** 4197 * xmlParseElementChildrenContentDecl: 4198 * @ctxt: an XML parser context 4199 * 4200 * parse the declaration for a Mixed Element content 4201 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4202 * 4203 * 4204 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4205 * 4206 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4207 * 4208 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4209 * 4210 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4211 * 4212 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4213 * TODO Parameter-entity replacement text must be properly nested 4214 * with parenthetized groups. That is to say, if either of the 4215 * opening or closing parentheses in a choice, seq, or Mixed 4216 * construct is contained in the replacement text for a parameter 4217 * entity, both must be contained in the same replacement text. For 4218 * interoperability, if a parameter-entity reference appears in a 4219 * choice, seq, or Mixed construct, its replacement text should not 4220 * be empty, and neither the first nor last non-blank character of 4221 * the replacement text should be a connector (| or ,). 4222 * 4223 * Returns the tree of xmlElementContentPtr describing the element 4224 * hierarchy. 4225 */ 4226xmlElementContentPtr 4227#ifdef VMS 4228xmlParseElementChildrenContentD 4229#else 4230xmlParseElementChildrenContentDecl 4231#endif 4232(xmlParserCtxtPtr ctxt) { 4233 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4234 xmlChar *elem; 4235 xmlChar type = 0; 4236 4237 SKIP_BLANKS; 4238 GROW; 4239 if (RAW == '(') { 4240 /* Recurse on first child */ 4241 NEXT; 4242 SKIP_BLANKS; 4243 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4244 SKIP_BLANKS; 4245 GROW; 4246 } else { 4247 elem = xmlParseName(ctxt); 4248 if (elem == NULL) { 4249 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4251 ctxt->sax->error(ctxt->userData, 4252 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4253 ctxt->wellFormed = 0; 4254 ctxt->disableSAX = 1; 4255 return(NULL); 4256 } 4257 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4258 GROW; 4259 if (RAW == '?') { 4260 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4261 NEXT; 4262 } else if (RAW == '*') { 4263 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4264 NEXT; 4265 } else if (RAW == '+') { 4266 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4267 NEXT; 4268 } else { 4269 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4270 } 4271 xmlFree(elem); 4272 GROW; 4273 } 4274 SKIP_BLANKS; 4275 SHRINK; 4276 while (RAW != ')') { 4277 /* 4278 * Each loop we parse one separator and one element. 4279 */ 4280 if (RAW == ',') { 4281 if (type == 0) type = CUR; 4282 4283 /* 4284 * Detect "Name | Name , Name" error 4285 */ 4286 else if (type != CUR) { 4287 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4289 ctxt->sax->error(ctxt->userData, 4290 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4291 type); 4292 ctxt->wellFormed = 0; 4293 ctxt->disableSAX = 1; 4294 if ((op != NULL) && (op != ret)) 4295 xmlFreeElementContent(op); 4296 if ((last != NULL) && (last != ret) && 4297 (last != ret->c1) && (last != ret->c2)) 4298 xmlFreeElementContent(last); 4299 if (ret != NULL) 4300 xmlFreeElementContent(ret); 4301 return(NULL); 4302 } 4303 NEXT; 4304 4305 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4306 if (op == NULL) { 4307 xmlFreeElementContent(ret); 4308 return(NULL); 4309 } 4310 if (last == NULL) { 4311 op->c1 = ret; 4312 if (ret != NULL) 4313 ret->parent = op; 4314 ret = cur = op; 4315 } else { 4316 cur->c2 = op; 4317 if (op != NULL) 4318 op->parent = cur; 4319 op->c1 = last; 4320 if (last != NULL) 4321 last->parent = op; 4322 cur =op; 4323 last = NULL; 4324 } 4325 } else if (RAW == '|') { 4326 if (type == 0) type = CUR; 4327 4328 /* 4329 * Detect "Name , Name | Name" error 4330 */ 4331 else if (type != CUR) { 4332 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4334 ctxt->sax->error(ctxt->userData, 4335 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4336 type); 4337 ctxt->wellFormed = 0; 4338 ctxt->disableSAX = 1; 4339 if ((op != NULL) && (op != ret) && (op != last)) 4340 xmlFreeElementContent(op); 4341 if ((last != NULL) && (last != ret) && 4342 (last != ret->c1) && (last != ret->c2)) 4343 xmlFreeElementContent(last); 4344 if (ret != NULL) 4345 xmlFreeElementContent(ret); 4346 return(NULL); 4347 } 4348 NEXT; 4349 4350 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4351 if (op == NULL) { 4352 if ((op != NULL) && (op != ret)) 4353 xmlFreeElementContent(op); 4354 if ((last != NULL) && (last != ret) && 4355 (last != ret->c1) && (last != ret->c2)) 4356 xmlFreeElementContent(last); 4357 if (ret != NULL) 4358 xmlFreeElementContent(ret); 4359 return(NULL); 4360 } 4361 if (last == NULL) { 4362 op->c1 = ret; 4363 if (ret != NULL) 4364 ret->parent = op; 4365 ret = cur = op; 4366 } else { 4367 cur->c2 = op; 4368 if (op != NULL) 4369 op->parent = cur; 4370 op->c1 = last; 4371 if (last != NULL) 4372 last->parent = op; 4373 cur =op; 4374 last = NULL; 4375 } 4376 } else { 4377 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4379 ctxt->sax->error(ctxt->userData, 4380 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4381 ctxt->wellFormed = 0; 4382 ctxt->disableSAX = 1; 4383 if ((op != NULL) && (op != ret)) 4384 xmlFreeElementContent(op); 4385 if ((last != NULL) && (last != ret) && 4386 (last != ret->c1) && (last != ret->c2)) 4387 xmlFreeElementContent(last); 4388 if (ret != NULL) 4389 xmlFreeElementContent(ret); 4390 return(NULL); 4391 } 4392 GROW; 4393 SKIP_BLANKS; 4394 GROW; 4395 if (RAW == '(') { 4396 /* Recurse on second child */ 4397 NEXT; 4398 SKIP_BLANKS; 4399 last = xmlParseElementChildrenContentDecl(ctxt); 4400 SKIP_BLANKS; 4401 } else { 4402 elem = xmlParseName(ctxt); 4403 if (elem == NULL) { 4404 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4406 ctxt->sax->error(ctxt->userData, 4407 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4408 ctxt->wellFormed = 0; 4409 ctxt->disableSAX = 1; 4410 if ((op != NULL) && (op != ret)) 4411 xmlFreeElementContent(op); 4412 if ((last != NULL) && (last != ret) && 4413 (last != ret->c1) && (last != ret->c2)) 4414 xmlFreeElementContent(last); 4415 if (ret != NULL) 4416 xmlFreeElementContent(ret); 4417 return(NULL); 4418 } 4419 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4420 xmlFree(elem); 4421 if (RAW == '?') { 4422 last->ocur = XML_ELEMENT_CONTENT_OPT; 4423 NEXT; 4424 } else if (RAW == '*') { 4425 last->ocur = XML_ELEMENT_CONTENT_MULT; 4426 NEXT; 4427 } else if (RAW == '+') { 4428 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4429 NEXT; 4430 } else { 4431 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4432 } 4433 } 4434 SKIP_BLANKS; 4435 GROW; 4436 } 4437 if ((cur != NULL) && (last != NULL)) { 4438 cur->c2 = last; 4439 if (last != NULL) 4440 last->parent = cur; 4441 } 4442 ctxt->entity = ctxt->input; 4443 NEXT; 4444 if (RAW == '?') { 4445 if (ret != NULL) 4446 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4447 NEXT; 4448 } else if (RAW == '*') { 4449 if (ret != NULL) 4450 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4451 NEXT; 4452 } else if (RAW == '+') { 4453 if (ret != NULL) 4454 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4455 NEXT; 4456 } 4457 return(ret); 4458} 4459 4460/** 4461 * xmlParseElementContentDecl: 4462 * @ctxt: an XML parser context 4463 * @name: the name of the element being defined. 4464 * @result: the Element Content pointer will be stored here if any 4465 * 4466 * parse the declaration for an Element content either Mixed or Children, 4467 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4468 * 4469 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4470 * 4471 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4472 */ 4473 4474int 4475xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4476 xmlElementContentPtr *result) { 4477 4478 xmlElementContentPtr tree = NULL; 4479 xmlParserInputPtr input = ctxt->input; 4480 int res; 4481 4482 *result = NULL; 4483 4484 if (RAW != '(') { 4485 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4487 ctxt->sax->error(ctxt->userData, 4488 "xmlParseElementContentDecl : %s '(' expected\n", name); 4489 ctxt->wellFormed = 0; 4490 ctxt->disableSAX = 1; 4491 return(-1); 4492 } 4493 NEXT; 4494 GROW; 4495 SKIP_BLANKS; 4496 if ((RAW == '#') && (NXT(1) == 'P') && 4497 (NXT(2) == 'C') && (NXT(3) == 'D') && 4498 (NXT(4) == 'A') && (NXT(5) == 'T') && 4499 (NXT(6) == 'A')) { 4500 tree = xmlParseElementMixedContentDecl(ctxt); 4501 res = XML_ELEMENT_TYPE_MIXED; 4502 } else { 4503 tree = xmlParseElementChildrenContentDecl(ctxt); 4504 res = XML_ELEMENT_TYPE_ELEMENT; 4505 } 4506 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4507 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4509 ctxt->sax->error(ctxt->userData, 4510"Element content declaration doesn't start and stop in the same entity\n"); 4511 ctxt->wellFormed = 0; 4512 ctxt->disableSAX = 1; 4513 } 4514 SKIP_BLANKS; 4515 *result = tree; 4516 return(res); 4517} 4518 4519/** 4520 * xmlParseElementDecl: 4521 * @ctxt: an XML parser context 4522 * 4523 * parse an Element declaration. 4524 * 4525 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4526 * 4527 * [ VC: Unique Element Type Declaration ] 4528 * No element type may be declared more than once 4529 * 4530 * Returns the type of the element, or -1 in case of error 4531 */ 4532int 4533xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4534 xmlChar *name; 4535 int ret = -1; 4536 xmlElementContentPtr content = NULL; 4537 4538 GROW; 4539 if ((RAW == '<') && (NXT(1) == '!') && 4540 (NXT(2) == 'E') && (NXT(3) == 'L') && 4541 (NXT(4) == 'E') && (NXT(5) == 'M') && 4542 (NXT(6) == 'E') && (NXT(7) == 'N') && 4543 (NXT(8) == 'T')) { 4544 xmlParserInputPtr input = ctxt->input; 4545 4546 SKIP(9); 4547 if (!IS_BLANK(CUR)) { 4548 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4550 ctxt->sax->error(ctxt->userData, 4551 "Space required after 'ELEMENT'\n"); 4552 ctxt->wellFormed = 0; 4553 ctxt->disableSAX = 1; 4554 } 4555 SKIP_BLANKS; 4556 name = xmlParseName(ctxt); 4557 if (name == NULL) { 4558 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4560 ctxt->sax->error(ctxt->userData, 4561 "xmlParseElementDecl: no name for Element\n"); 4562 ctxt->wellFormed = 0; 4563 ctxt->disableSAX = 1; 4564 return(-1); 4565 } 4566 while ((RAW == 0) && (ctxt->inputNr > 1)) 4567 xmlPopInput(ctxt); 4568 if (!IS_BLANK(CUR)) { 4569 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4571 ctxt->sax->error(ctxt->userData, 4572 "Space required after the element name\n"); 4573 ctxt->wellFormed = 0; 4574 ctxt->disableSAX = 1; 4575 } 4576 SKIP_BLANKS; 4577 if ((RAW == 'E') && (NXT(1) == 'M') && 4578 (NXT(2) == 'P') && (NXT(3) == 'T') && 4579 (NXT(4) == 'Y')) { 4580 SKIP(5); 4581 /* 4582 * Element must always be empty. 4583 */ 4584 ret = XML_ELEMENT_TYPE_EMPTY; 4585 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4586 (NXT(2) == 'Y')) { 4587 SKIP(3); 4588 /* 4589 * Element is a generic container. 4590 */ 4591 ret = XML_ELEMENT_TYPE_ANY; 4592 } else if (RAW == '(') { 4593 ret = xmlParseElementContentDecl(ctxt, name, &content); 4594 } else { 4595 /* 4596 * [ WFC: PEs in Internal Subset ] error handling. 4597 */ 4598 if ((RAW == '%') && (ctxt->external == 0) && 4599 (ctxt->inputNr == 1)) { 4600 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4602 ctxt->sax->error(ctxt->userData, 4603 "PEReference: forbidden within markup decl in internal subset\n"); 4604 } else { 4605 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4607 ctxt->sax->error(ctxt->userData, 4608 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4609 } 4610 ctxt->wellFormed = 0; 4611 ctxt->disableSAX = 1; 4612 if (name != NULL) xmlFree(name); 4613 return(-1); 4614 } 4615 4616 SKIP_BLANKS; 4617 /* 4618 * Pop-up of finished entities. 4619 */ 4620 while ((RAW == 0) && (ctxt->inputNr > 1)) 4621 xmlPopInput(ctxt); 4622 SKIP_BLANKS; 4623 4624 if (RAW != '>') { 4625 ctxt->errNo = XML_ERR_GT_REQUIRED; 4626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4627 ctxt->sax->error(ctxt->userData, 4628 "xmlParseElementDecl: expected '>' at the end\n"); 4629 ctxt->wellFormed = 0; 4630 ctxt->disableSAX = 1; 4631 } else { 4632 if (input != ctxt->input) { 4633 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4635 ctxt->sax->error(ctxt->userData, 4636"Element declaration doesn't start and stop in the same entity\n"); 4637 ctxt->wellFormed = 0; 4638 ctxt->disableSAX = 1; 4639 } 4640 4641 NEXT; 4642 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4643 (ctxt->sax->elementDecl != NULL)) 4644 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4645 content); 4646 } 4647 if (content != NULL) { 4648 xmlFreeElementContent(content); 4649 } 4650 if (name != NULL) { 4651 xmlFree(name); 4652 } 4653 } 4654 return(ret); 4655} 4656 4657/** 4658 * xmlParseConditionalSections 4659 * @ctxt: an XML parser context 4660 * 4661 * [61] conditionalSect ::= includeSect | ignoreSect 4662 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4663 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4664 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4665 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4666 */ 4667 4668static void 4669xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4670 SKIP(3); 4671 SKIP_BLANKS; 4672 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4673 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4674 (NXT(6) == 'E')) { 4675 SKIP(7); 4676 SKIP_BLANKS; 4677 if (RAW != '[') { 4678 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4680 ctxt->sax->error(ctxt->userData, 4681 "XML conditional section '[' expected\n"); 4682 ctxt->wellFormed = 0; 4683 ctxt->disableSAX = 1; 4684 } else { 4685 NEXT; 4686 } 4687 if (xmlParserDebugEntities) { 4688 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4689 xmlGenericError(xmlGenericErrorContext, 4690 "%s(%d): ", ctxt->input->filename, 4691 ctxt->input->line); 4692 xmlGenericError(xmlGenericErrorContext, 4693 "Entering INCLUDE Conditional Section\n"); 4694 } 4695 4696 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4697 (NXT(2) != '>'))) { 4698 const xmlChar *check = CUR_PTR; 4699 int cons = ctxt->input->consumed; 4700 int tok = ctxt->token; 4701 4702 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4703 xmlParseConditionalSections(ctxt); 4704 } else if (IS_BLANK(CUR)) { 4705 NEXT; 4706 } else if (RAW == '%') { 4707 xmlParsePEReference(ctxt); 4708 } else 4709 xmlParseMarkupDecl(ctxt); 4710 4711 /* 4712 * Pop-up of finished entities. 4713 */ 4714 while ((RAW == 0) && (ctxt->inputNr > 1)) 4715 xmlPopInput(ctxt); 4716 4717 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4718 (tok == ctxt->token)) { 4719 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4721 ctxt->sax->error(ctxt->userData, 4722 "Content error in the external subset\n"); 4723 ctxt->wellFormed = 0; 4724 ctxt->disableSAX = 1; 4725 break; 4726 } 4727 } 4728 if (xmlParserDebugEntities) { 4729 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4730 xmlGenericError(xmlGenericErrorContext, 4731 "%s(%d): ", ctxt->input->filename, 4732 ctxt->input->line); 4733 xmlGenericError(xmlGenericErrorContext, 4734 "Leaving INCLUDE Conditional Section\n"); 4735 } 4736 4737 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4738 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4739 int state; 4740 int instate; 4741 int depth = 0; 4742 4743 SKIP(6); 4744 SKIP_BLANKS; 4745 if (RAW != '[') { 4746 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4748 ctxt->sax->error(ctxt->userData, 4749 "XML conditional section '[' expected\n"); 4750 ctxt->wellFormed = 0; 4751 ctxt->disableSAX = 1; 4752 } else { 4753 NEXT; 4754 } 4755 if (xmlParserDebugEntities) { 4756 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4757 xmlGenericError(xmlGenericErrorContext, 4758 "%s(%d): ", ctxt->input->filename, 4759 ctxt->input->line); 4760 xmlGenericError(xmlGenericErrorContext, 4761 "Entering IGNORE Conditional Section\n"); 4762 } 4763 4764 /* 4765 * Parse up to the end of the conditionnal section 4766 * But disable SAX event generating DTD building in the meantime 4767 */ 4768 state = ctxt->disableSAX; 4769 instate = ctxt->instate; 4770 ctxt->disableSAX = 1; 4771 ctxt->instate = XML_PARSER_IGNORE; 4772 4773 while (depth >= 0) { 4774 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4775 depth++; 4776 SKIP(3); 4777 continue; 4778 } 4779 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4780 if (--depth >= 0) SKIP(3); 4781 continue; 4782 } 4783 NEXT; 4784 continue; 4785 } 4786 4787 ctxt->disableSAX = state; 4788 ctxt->instate = instate; 4789 4790 if (xmlParserDebugEntities) { 4791 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4792 xmlGenericError(xmlGenericErrorContext, 4793 "%s(%d): ", ctxt->input->filename, 4794 ctxt->input->line); 4795 xmlGenericError(xmlGenericErrorContext, 4796 "Leaving IGNORE Conditional Section\n"); 4797 } 4798 4799 } else { 4800 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4802 ctxt->sax->error(ctxt->userData, 4803 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4804 ctxt->wellFormed = 0; 4805 ctxt->disableSAX = 1; 4806 } 4807 4808 if (RAW == 0) 4809 SHRINK; 4810 4811 if (RAW == 0) { 4812 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4814 ctxt->sax->error(ctxt->userData, 4815 "XML conditional section not closed\n"); 4816 ctxt->wellFormed = 0; 4817 ctxt->disableSAX = 1; 4818 } else { 4819 SKIP(3); 4820 } 4821} 4822 4823/** 4824 * xmlParseMarkupDecl: 4825 * @ctxt: an XML parser context 4826 * 4827 * parse Markup declarations 4828 * 4829 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4830 * NotationDecl | PI | Comment 4831 * 4832 * [ VC: Proper Declaration/PE Nesting ] 4833 * Parameter-entity replacement text must be properly nested with 4834 * markup declarations. That is to say, if either the first character 4835 * or the last character of a markup declaration (markupdecl above) is 4836 * contained in the replacement text for a parameter-entity reference, 4837 * both must be contained in the same replacement text. 4838 * 4839 * [ WFC: PEs in Internal Subset ] 4840 * In the internal DTD subset, parameter-entity references can occur 4841 * only where markup declarations can occur, not within markup declarations. 4842 * (This does not apply to references that occur in external parameter 4843 * entities or to the external subset.) 4844 */ 4845void 4846xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4847 GROW; 4848 xmlParseElementDecl(ctxt); 4849 xmlParseAttributeListDecl(ctxt); 4850 xmlParseEntityDecl(ctxt); 4851 xmlParseNotationDecl(ctxt); 4852 xmlParsePI(ctxt); 4853 xmlParseComment(ctxt); 4854 /* 4855 * This is only for internal subset. On external entities, 4856 * the replacement is done before parsing stage 4857 */ 4858 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4859 xmlParsePEReference(ctxt); 4860 4861 /* 4862 * Conditional sections are allowed from entities included 4863 * by PE References in the internal subset. 4864 */ 4865 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 4866 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4867 xmlParseConditionalSections(ctxt); 4868 } 4869 } 4870 4871 ctxt->instate = XML_PARSER_DTD; 4872} 4873 4874/** 4875 * xmlParseTextDecl: 4876 * @ctxt: an XML parser context 4877 * 4878 * parse an XML declaration header for external entities 4879 * 4880 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4881 * 4882 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4883 */ 4884 4885void 4886xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4887 xmlChar *version; 4888 4889 /* 4890 * We know that '<?xml' is here. 4891 */ 4892 if ((RAW == '<') && (NXT(1) == '?') && 4893 (NXT(2) == 'x') && (NXT(3) == 'm') && 4894 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4895 SKIP(5); 4896 } else { 4897 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4899 ctxt->sax->error(ctxt->userData, 4900 "Text declaration '<?xml' required\n"); 4901 ctxt->wellFormed = 0; 4902 ctxt->disableSAX = 1; 4903 4904 return; 4905 } 4906 4907 if (!IS_BLANK(CUR)) { 4908 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4910 ctxt->sax->error(ctxt->userData, 4911 "Space needed after '<?xml'\n"); 4912 ctxt->wellFormed = 0; 4913 ctxt->disableSAX = 1; 4914 } 4915 SKIP_BLANKS; 4916 4917 /* 4918 * We may have the VersionInfo here. 4919 */ 4920 version = xmlParseVersionInfo(ctxt); 4921 if (version == NULL) 4922 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4923 ctxt->input->version = version; 4924 4925 /* 4926 * We must have the encoding declaration 4927 */ 4928 if (!IS_BLANK(CUR)) { 4929 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4931 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4932 ctxt->wellFormed = 0; 4933 ctxt->disableSAX = 1; 4934 } 4935 xmlParseEncodingDecl(ctxt); 4936 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4937 /* 4938 * The XML REC instructs us to stop parsing right here 4939 */ 4940 return; 4941 } 4942 4943 SKIP_BLANKS; 4944 if ((RAW == '?') && (NXT(1) == '>')) { 4945 SKIP(2); 4946 } else if (RAW == '>') { 4947 /* Deprecated old WD ... */ 4948 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4950 ctxt->sax->error(ctxt->userData, 4951 "XML declaration must end-up with '?>'\n"); 4952 ctxt->wellFormed = 0; 4953 ctxt->disableSAX = 1; 4954 NEXT; 4955 } else { 4956 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4958 ctxt->sax->error(ctxt->userData, 4959 "parsing XML declaration: '?>' expected\n"); 4960 ctxt->wellFormed = 0; 4961 ctxt->disableSAX = 1; 4962 MOVETO_ENDTAG(CUR_PTR); 4963 NEXT; 4964 } 4965} 4966 4967/** 4968 * xmlParseExternalSubset: 4969 * @ctxt: an XML parser context 4970 * @ExternalID: the external identifier 4971 * @SystemID: the system identifier (or URL) 4972 * 4973 * parse Markup declarations from an external subset 4974 * 4975 * [30] extSubset ::= textDecl? extSubsetDecl 4976 * 4977 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4978 */ 4979void 4980xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4981 const xmlChar *SystemID) { 4982 GROW; 4983 if ((RAW == '<') && (NXT(1) == '?') && 4984 (NXT(2) == 'x') && (NXT(3) == 'm') && 4985 (NXT(4) == 'l')) { 4986 xmlParseTextDecl(ctxt); 4987 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4988 /* 4989 * The XML REC instructs us to stop parsing right here 4990 */ 4991 ctxt->instate = XML_PARSER_EOF; 4992 return; 4993 } 4994 } 4995 if (ctxt->myDoc == NULL) { 4996 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4997 } 4998 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4999 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5000 5001 ctxt->instate = XML_PARSER_DTD; 5002 ctxt->external = 1; 5003 while (((RAW == '<') && (NXT(1) == '?')) || 5004 ((RAW == '<') && (NXT(1) == '!')) || 5005 (RAW == '%') || IS_BLANK(CUR)) { 5006 const xmlChar *check = CUR_PTR; 5007 int cons = ctxt->input->consumed; 5008 int tok = ctxt->token; 5009 5010 GROW; 5011 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5012 xmlParseConditionalSections(ctxt); 5013 } else if (IS_BLANK(CUR)) { 5014 NEXT; 5015 } else if (RAW == '%') { 5016 xmlParsePEReference(ctxt); 5017 } else 5018 xmlParseMarkupDecl(ctxt); 5019 5020 /* 5021 * Pop-up of finished entities. 5022 */ 5023 while ((RAW == 0) && (ctxt->inputNr > 1)) 5024 xmlPopInput(ctxt); 5025 5026 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 5027 (tok == ctxt->token)) { 5028 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5030 ctxt->sax->error(ctxt->userData, 5031 "Content error in the external subset\n"); 5032 ctxt->wellFormed = 0; 5033 ctxt->disableSAX = 1; 5034 break; 5035 } 5036 } 5037 5038 if (RAW != 0) { 5039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5041 ctxt->sax->error(ctxt->userData, 5042 "Extra content at the end of the document\n"); 5043 ctxt->wellFormed = 0; 5044 ctxt->disableSAX = 1; 5045 } 5046 5047} 5048 5049/** 5050 * xmlParseReference: 5051 * @ctxt: an XML parser context 5052 * 5053 * parse and handle entity references in content, depending on the SAX 5054 * interface, this may end-up in a call to character() if this is a 5055 * CharRef, a predefined entity, if there is no reference() callback. 5056 * or if the parser was asked to switch to that mode. 5057 * 5058 * [67] Reference ::= EntityRef | CharRef 5059 */ 5060void 5061xmlParseReference(xmlParserCtxtPtr ctxt) { 5062 xmlEntityPtr ent; 5063 xmlChar *val; 5064 if (RAW != '&') return; 5065 5066 if (NXT(1) == '#') { 5067 int i = 0; 5068 xmlChar out[10]; 5069 int hex = NXT(2); 5070 int value = xmlParseCharRef(ctxt); 5071 5072 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5073 /* 5074 * So we are using non-UTF-8 buffers 5075 * Check that the char fit on 8bits, if not 5076 * generate a CharRef. 5077 */ 5078 if (value <= 0xFF) { 5079 out[0] = value; 5080 out[1] = 0; 5081 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5082 (!ctxt->disableSAX)) 5083 ctxt->sax->characters(ctxt->userData, out, 1); 5084 } else { 5085 if ((hex == 'x') || (hex == 'X')) 5086 sprintf((char *)out, "#x%X", value); 5087 else 5088 sprintf((char *)out, "#%d", value); 5089 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5090 (!ctxt->disableSAX)) 5091 ctxt->sax->reference(ctxt->userData, out); 5092 } 5093 } else { 5094 /* 5095 * Just encode the value in UTF-8 5096 */ 5097 COPY_BUF(0 ,out, i, value); 5098 out[i] = 0; 5099 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5100 (!ctxt->disableSAX)) 5101 ctxt->sax->characters(ctxt->userData, out, i); 5102 } 5103 } else { 5104 ent = xmlParseEntityRef(ctxt); 5105 if (ent == NULL) return; 5106 if ((ent->name != NULL) && 5107 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5108 xmlNodePtr list = NULL; 5109 int ret; 5110 5111 5112 /* 5113 * The first reference to the entity trigger a parsing phase 5114 * where the ent->children is filled with the result from 5115 * the parsing. 5116 */ 5117 if (ent->children == NULL) { 5118 xmlChar *value; 5119 value = ent->content; 5120 5121 /* 5122 * Check that this entity is well formed 5123 */ 5124 if ((value != NULL) && 5125 (value[1] == 0) && (value[0] == '<') && 5126 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5127 /* 5128 * DONE: get definite answer on this !!! 5129 * Lots of entity decls are used to declare a single 5130 * char 5131 * <!ENTITY lt "<"> 5132 * Which seems to be valid since 5133 * 2.4: The ampersand character (&) and the left angle 5134 * bracket (<) may appear in their literal form only 5135 * when used ... They are also legal within the literal 5136 * entity value of an internal entity declaration;i 5137 * see "4.3.2 Well-Formed Parsed Entities". 5138 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5139 * Looking at the OASIS test suite and James Clark 5140 * tests, this is broken. However the XML REC uses 5141 * it. Is the XML REC not well-formed ???? 5142 * This is a hack to avoid this problem 5143 * 5144 * ANSWER: since lt gt amp .. are already defined, 5145 * this is a redefinition and hence the fact that the 5146 * contentis not well balanced is not a Wf error, this 5147 * is lousy but acceptable. 5148 */ 5149 list = xmlNewDocText(ctxt->myDoc, value); 5150 if (list != NULL) { 5151 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5152 (ent->children == NULL)) { 5153 ent->children = list; 5154 ent->last = list; 5155 list->parent = (xmlNodePtr) ent; 5156 } else { 5157 xmlFreeNodeList(list); 5158 } 5159 } else if (list != NULL) { 5160 xmlFreeNodeList(list); 5161 } 5162 } else { 5163 /* 5164 * 4.3.2: An internal general parsed entity is well-formed 5165 * if its replacement text matches the production labeled 5166 * content. 5167 */ 5168 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5169 ctxt->depth++; 5170 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 5171 ctxt->sax, NULL, ctxt->depth, 5172 value, &list); 5173 ctxt->depth--; 5174 } else if (ent->etype == 5175 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5176 ctxt->depth++; 5177 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5178 ctxt->sax, NULL, ctxt->depth, 5179 ent->URI, ent->ExternalID, &list); 5180 ctxt->depth--; 5181 } else { 5182 ret = -1; 5183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5184 ctxt->sax->error(ctxt->userData, 5185 "Internal: invalid entity type\n"); 5186 } 5187 if (ret == XML_ERR_ENTITY_LOOP) { 5188 ctxt->errNo = XML_ERR_ENTITY_LOOP; 5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5190 ctxt->sax->error(ctxt->userData, 5191 "Detected entity reference loop\n"); 5192 ctxt->wellFormed = 0; 5193 ctxt->disableSAX = 1; 5194 } else if ((ret == 0) && (list != NULL)) { 5195 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5196 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5197 (ent->children == NULL)) { 5198 ent->children = list; 5199 if (ctxt->replaceEntities) { 5200 /* 5201 * Prune it directly in the generated document 5202 * except for single text nodes. 5203 */ 5204 if ((list->type == XML_TEXT_NODE) && 5205 (list->next == NULL)) { 5206 list->parent = (xmlNodePtr) ent; 5207 list = NULL; 5208 } else { 5209 while (list != NULL) { 5210 list->parent = (xmlNodePtr) ctxt->node; 5211 if (list->next == NULL) 5212 ent->last = list; 5213 list = list->next; 5214 } 5215 list = ent->children; 5216 } 5217 } else { 5218 while (list != NULL) { 5219 list->parent = (xmlNodePtr) ent; 5220 if (list->next == NULL) 5221 ent->last = list; 5222 list = list->next; 5223 } 5224 } 5225 } else { 5226 xmlFreeNodeList(list); 5227 list = NULL; 5228 } 5229 } else if (ret > 0) { 5230 ctxt->errNo = ret; 5231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5232 ctxt->sax->error(ctxt->userData, 5233 "Entity value required\n"); 5234 ctxt->wellFormed = 0; 5235 ctxt->disableSAX = 1; 5236 } else if (list != NULL) { 5237 xmlFreeNodeList(list); 5238 list = NULL; 5239 } 5240 } 5241 } 5242 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5243 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5244 /* 5245 * Create a node. 5246 */ 5247 ctxt->sax->reference(ctxt->userData, ent->name); 5248 return; 5249 } else if (ctxt->replaceEntities) { 5250 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5251 /* 5252 * Seems we are generating the DOM content, do 5253 * a simple tree copy for all references except the first 5254 * In the first occurence list contains the replacement 5255 */ 5256 if (list == NULL) { 5257 xmlNodePtr new, cur; 5258 cur = ent->children; 5259 while (cur != NULL) { 5260 new = xmlCopyNode(cur, 1); 5261 xmlAddChild(ctxt->node, new); 5262 if (cur == ent->last) 5263 break; 5264 cur = cur->next; 5265 } 5266 } else { 5267 /* 5268 * the name change is to avoid coalescing of the 5269 * node with a prossible previous text one which 5270 * would make ent->children a dandling pointer 5271 */ 5272 if (ent->children->type == XML_TEXT_NODE) 5273 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5274 if ((ent->last != ent->children) && 5275 (ent->last->type == XML_TEXT_NODE)) 5276 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5277 xmlAddChildList(ctxt->node, ent->children); 5278 } 5279 5280 /* 5281 * This is to avoid a nasty side effect, see 5282 * characters() in SAX.c 5283 */ 5284 ctxt->nodemem = 0; 5285 ctxt->nodelen = 0; 5286 return; 5287 } else { 5288 /* 5289 * Probably running in SAX mode 5290 */ 5291 xmlParserInputPtr input; 5292 5293 input = xmlNewEntityInputStream(ctxt, ent); 5294 xmlPushInput(ctxt, input); 5295 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5296 (RAW == '<') && (NXT(1) == '?') && 5297 (NXT(2) == 'x') && (NXT(3) == 'm') && 5298 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5299 xmlParseTextDecl(ctxt); 5300 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5301 /* 5302 * The XML REC instructs us to stop parsing right here 5303 */ 5304 ctxt->instate = XML_PARSER_EOF; 5305 return; 5306 } 5307 if (input->standalone == 1) { 5308 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5310 ctxt->sax->error(ctxt->userData, 5311 "external parsed entities cannot be standalone\n"); 5312 ctxt->wellFormed = 0; 5313 ctxt->disableSAX = 1; 5314 } 5315 } 5316 return; 5317 } 5318 } 5319 } else { 5320 val = ent->content; 5321 if (val == NULL) return; 5322 /* 5323 * inline the entity. 5324 */ 5325 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5326 (!ctxt->disableSAX)) 5327 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5328 } 5329 } 5330} 5331 5332/** 5333 * xmlParseEntityRef: 5334 * @ctxt: an XML parser context 5335 * 5336 * parse ENTITY references declarations 5337 * 5338 * [68] EntityRef ::= '&' Name ';' 5339 * 5340 * [ WFC: Entity Declared ] 5341 * In a document without any DTD, a document with only an internal DTD 5342 * subset which contains no parameter entity references, or a document 5343 * with "standalone='yes'", the Name given in the entity reference 5344 * must match that in an entity declaration, except that well-formed 5345 * documents need not declare any of the following entities: amp, lt, 5346 * gt, apos, quot. The declaration of a parameter entity must precede 5347 * any reference to it. Similarly, the declaration of a general entity 5348 * must precede any reference to it which appears in a default value in an 5349 * attribute-list declaration. Note that if entities are declared in the 5350 * external subset or in external parameter entities, a non-validating 5351 * processor is not obligated to read and process their declarations; 5352 * for such documents, the rule that an entity must be declared is a 5353 * well-formedness constraint only if standalone='yes'. 5354 * 5355 * [ WFC: Parsed Entity ] 5356 * An entity reference must not contain the name of an unparsed entity 5357 * 5358 * Returns the xmlEntityPtr if found, or NULL otherwise. 5359 */ 5360xmlEntityPtr 5361xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5362 xmlChar *name; 5363 xmlEntityPtr ent = NULL; 5364 5365 GROW; 5366 5367 if (RAW == '&') { 5368 NEXT; 5369 name = xmlParseName(ctxt); 5370 if (name == NULL) { 5371 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5373 ctxt->sax->error(ctxt->userData, 5374 "xmlParseEntityRef: no name\n"); 5375 ctxt->wellFormed = 0; 5376 ctxt->disableSAX = 1; 5377 } else { 5378 if (RAW == ';') { 5379 NEXT; 5380 /* 5381 * Ask first SAX for entity resolution, otherwise try the 5382 * predefined set. 5383 */ 5384 if (ctxt->sax != NULL) { 5385 if (ctxt->sax->getEntity != NULL) 5386 ent = ctxt->sax->getEntity(ctxt->userData, name); 5387 if (ent == NULL) 5388 ent = xmlGetPredefinedEntity(name); 5389 } 5390 /* 5391 * [ WFC: Entity Declared ] 5392 * In a document without any DTD, a document with only an 5393 * internal DTD subset which contains no parameter entity 5394 * references, or a document with "standalone='yes'", the 5395 * Name given in the entity reference must match that in an 5396 * entity declaration, except that well-formed documents 5397 * need not declare any of the following entities: amp, lt, 5398 * gt, apos, quot. 5399 * The declaration of a parameter entity must precede any 5400 * reference to it. 5401 * Similarly, the declaration of a general entity must 5402 * precede any reference to it which appears in a default 5403 * value in an attribute-list declaration. Note that if 5404 * entities are declared in the external subset or in 5405 * external parameter entities, a non-validating processor 5406 * is not obligated to read and process their declarations; 5407 * for such documents, the rule that an entity must be 5408 * declared is a well-formedness constraint only if 5409 * standalone='yes'. 5410 */ 5411 if (ent == NULL) { 5412 if ((ctxt->standalone == 1) || 5413 ((ctxt->hasExternalSubset == 0) && 5414 (ctxt->hasPErefs == 0))) { 5415 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5417 ctxt->sax->error(ctxt->userData, 5418 "Entity '%s' not defined\n", name); 5419 ctxt->wellFormed = 0; 5420 ctxt->disableSAX = 1; 5421 } else { 5422 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5424 ctxt->sax->error(ctxt->userData, 5425 "Entity '%s' not defined\n", name); 5426 } 5427 } 5428 5429 /* 5430 * [ WFC: Parsed Entity ] 5431 * An entity reference must not contain the name of an 5432 * unparsed entity 5433 */ 5434 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5435 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5437 ctxt->sax->error(ctxt->userData, 5438 "Entity reference to unparsed entity %s\n", name); 5439 ctxt->wellFormed = 0; 5440 ctxt->disableSAX = 1; 5441 } 5442 5443 /* 5444 * [ WFC: No External Entity References ] 5445 * Attribute values cannot contain direct or indirect 5446 * entity references to external entities. 5447 */ 5448 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5449 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5450 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5452 ctxt->sax->error(ctxt->userData, 5453 "Attribute references external entity '%s'\n", name); 5454 ctxt->wellFormed = 0; 5455 ctxt->disableSAX = 1; 5456 } 5457 /* 5458 * [ WFC: No < in Attribute Values ] 5459 * The replacement text of any entity referred to directly or 5460 * indirectly in an attribute value (other than "<") must 5461 * not contain a <. 5462 */ 5463 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5464 (ent != NULL) && 5465 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5466 (ent->content != NULL) && 5467 (xmlStrchr(ent->content, '<'))) { 5468 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5470 ctxt->sax->error(ctxt->userData, 5471 "'<' in entity '%s' is not allowed in attributes values\n", name); 5472 ctxt->wellFormed = 0; 5473 ctxt->disableSAX = 1; 5474 } 5475 5476 /* 5477 * Internal check, no parameter entities here ... 5478 */ 5479 else { 5480 switch (ent->etype) { 5481 case XML_INTERNAL_PARAMETER_ENTITY: 5482 case XML_EXTERNAL_PARAMETER_ENTITY: 5483 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5485 ctxt->sax->error(ctxt->userData, 5486 "Attempt to reference the parameter entity '%s'\n", name); 5487 ctxt->wellFormed = 0; 5488 ctxt->disableSAX = 1; 5489 break; 5490 default: 5491 break; 5492 } 5493 } 5494 5495 /* 5496 * [ WFC: No Recursion ] 5497 * A parsed entity must not contain a recursive reference 5498 * to itself, either directly or indirectly. 5499 * Done somewhere else 5500 */ 5501 5502 } else { 5503 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5505 ctxt->sax->error(ctxt->userData, 5506 "xmlParseEntityRef: expecting ';'\n"); 5507 ctxt->wellFormed = 0; 5508 ctxt->disableSAX = 1; 5509 } 5510 xmlFree(name); 5511 } 5512 } 5513 return(ent); 5514} 5515 5516/** 5517 * xmlParseStringEntityRef: 5518 * @ctxt: an XML parser context 5519 * @str: a pointer to an index in the string 5520 * 5521 * parse ENTITY references declarations, but this version parses it from 5522 * a string value. 5523 * 5524 * [68] EntityRef ::= '&' Name ';' 5525 * 5526 * [ WFC: Entity Declared ] 5527 * In a document without any DTD, a document with only an internal DTD 5528 * subset which contains no parameter entity references, or a document 5529 * with "standalone='yes'", the Name given in the entity reference 5530 * must match that in an entity declaration, except that well-formed 5531 * documents need not declare any of the following entities: amp, lt, 5532 * gt, apos, quot. The declaration of a parameter entity must precede 5533 * any reference to it. Similarly, the declaration of a general entity 5534 * must precede any reference to it which appears in a default value in an 5535 * attribute-list declaration. Note that if entities are declared in the 5536 * external subset or in external parameter entities, a non-validating 5537 * processor is not obligated to read and process their declarations; 5538 * for such documents, the rule that an entity must be declared is a 5539 * well-formedness constraint only if standalone='yes'. 5540 * 5541 * [ WFC: Parsed Entity ] 5542 * An entity reference must not contain the name of an unparsed entity 5543 * 5544 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5545 * is updated to the current location in the string. 5546 */ 5547xmlEntityPtr 5548xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5549 xmlChar *name; 5550 const xmlChar *ptr; 5551 xmlChar cur; 5552 xmlEntityPtr ent = NULL; 5553 5554 if ((str == NULL) || (*str == NULL)) 5555 return(NULL); 5556 ptr = *str; 5557 cur = *ptr; 5558 if (cur == '&') { 5559 ptr++; 5560 cur = *ptr; 5561 name = xmlParseStringName(ctxt, &ptr); 5562 if (name == NULL) { 5563 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5565 ctxt->sax->error(ctxt->userData, 5566 "xmlParseStringEntityRef: no name\n"); 5567 ctxt->wellFormed = 0; 5568 ctxt->disableSAX = 1; 5569 } else { 5570 if (*ptr == ';') { 5571 ptr++; 5572 /* 5573 * Ask first SAX for entity resolution, otherwise try the 5574 * predefined set. 5575 */ 5576 if (ctxt->sax != NULL) { 5577 if (ctxt->sax->getEntity != NULL) 5578 ent = ctxt->sax->getEntity(ctxt->userData, name); 5579 if (ent == NULL) 5580 ent = xmlGetPredefinedEntity(name); 5581 } 5582 /* 5583 * [ WFC: Entity Declared ] 5584 * In a document without any DTD, a document with only an 5585 * internal DTD subset which contains no parameter entity 5586 * references, or a document with "standalone='yes'", the 5587 * Name given in the entity reference must match that in an 5588 * entity declaration, except that well-formed documents 5589 * need not declare any of the following entities: amp, lt, 5590 * gt, apos, quot. 5591 * The declaration of a parameter entity must precede any 5592 * reference to it. 5593 * Similarly, the declaration of a general entity must 5594 * precede any reference to it which appears in a default 5595 * value in an attribute-list declaration. Note that if 5596 * entities are declared in the external subset or in 5597 * external parameter entities, a non-validating processor 5598 * is not obligated to read and process their declarations; 5599 * for such documents, the rule that an entity must be 5600 * declared is a well-formedness constraint only if 5601 * standalone='yes'. 5602 */ 5603 if (ent == NULL) { 5604 if ((ctxt->standalone == 1) || 5605 ((ctxt->hasExternalSubset == 0) && 5606 (ctxt->hasPErefs == 0))) { 5607 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5609 ctxt->sax->error(ctxt->userData, 5610 "Entity '%s' not defined\n", name); 5611 ctxt->wellFormed = 0; 5612 ctxt->disableSAX = 1; 5613 } else { 5614 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5615 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5616 ctxt->sax->warning(ctxt->userData, 5617 "Entity '%s' not defined\n", name); 5618 } 5619 } 5620 5621 /* 5622 * [ WFC: Parsed Entity ] 5623 * An entity reference must not contain the name of an 5624 * unparsed entity 5625 */ 5626 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5627 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5629 ctxt->sax->error(ctxt->userData, 5630 "Entity reference to unparsed entity %s\n", name); 5631 ctxt->wellFormed = 0; 5632 ctxt->disableSAX = 1; 5633 } 5634 5635 /* 5636 * [ WFC: No External Entity References ] 5637 * Attribute values cannot contain direct or indirect 5638 * entity references to external entities. 5639 */ 5640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5642 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5644 ctxt->sax->error(ctxt->userData, 5645 "Attribute references external entity '%s'\n", name); 5646 ctxt->wellFormed = 0; 5647 ctxt->disableSAX = 1; 5648 } 5649 /* 5650 * [ WFC: No < in Attribute Values ] 5651 * The replacement text of any entity referred to directly or 5652 * indirectly in an attribute value (other than "<") must 5653 * not contain a <. 5654 */ 5655 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5656 (ent != NULL) && 5657 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5658 (ent->content != NULL) && 5659 (xmlStrchr(ent->content, '<'))) { 5660 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5662 ctxt->sax->error(ctxt->userData, 5663 "'<' in entity '%s' is not allowed in attributes values\n", name); 5664 ctxt->wellFormed = 0; 5665 ctxt->disableSAX = 1; 5666 } 5667 5668 /* 5669 * Internal check, no parameter entities here ... 5670 */ 5671 else { 5672 switch (ent->etype) { 5673 case XML_INTERNAL_PARAMETER_ENTITY: 5674 case XML_EXTERNAL_PARAMETER_ENTITY: 5675 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5677 ctxt->sax->error(ctxt->userData, 5678 "Attempt to reference the parameter entity '%s'\n", name); 5679 ctxt->wellFormed = 0; 5680 ctxt->disableSAX = 1; 5681 break; 5682 default: 5683 break; 5684 } 5685 } 5686 5687 /* 5688 * [ WFC: No Recursion ] 5689 * A parsed entity must not contain a recursive reference 5690 * to itself, either directly or indirectly. 5691 * Done somewhwere else 5692 */ 5693 5694 } else { 5695 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5697 ctxt->sax->error(ctxt->userData, 5698 "xmlParseStringEntityRef: expecting ';'\n"); 5699 ctxt->wellFormed = 0; 5700 ctxt->disableSAX = 1; 5701 } 5702 xmlFree(name); 5703 } 5704 } 5705 *str = ptr; 5706 return(ent); 5707} 5708 5709/** 5710 * xmlParsePEReference: 5711 * @ctxt: an XML parser context 5712 * 5713 * parse PEReference declarations 5714 * The entity content is handled directly by pushing it's content as 5715 * a new input stream. 5716 * 5717 * [69] PEReference ::= '%' Name ';' 5718 * 5719 * [ WFC: No Recursion ] 5720 * A parsed entity must not contain a recursive 5721 * reference to itself, either directly or indirectly. 5722 * 5723 * [ WFC: Entity Declared ] 5724 * In a document without any DTD, a document with only an internal DTD 5725 * subset which contains no parameter entity references, or a document 5726 * with "standalone='yes'", ... ... The declaration of a parameter 5727 * entity must precede any reference to it... 5728 * 5729 * [ VC: Entity Declared ] 5730 * In a document with an external subset or external parameter entities 5731 * with "standalone='no'", ... ... The declaration of a parameter entity 5732 * must precede any reference to it... 5733 * 5734 * [ WFC: In DTD ] 5735 * Parameter-entity references may only appear in the DTD. 5736 * NOTE: misleading but this is handled. 5737 */ 5738void 5739xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5740 xmlChar *name; 5741 xmlEntityPtr entity = NULL; 5742 xmlParserInputPtr input; 5743 5744 if (RAW == '%') { 5745 NEXT; 5746 name = xmlParseName(ctxt); 5747 if (name == NULL) { 5748 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5750 ctxt->sax->error(ctxt->userData, 5751 "xmlParsePEReference: no name\n"); 5752 ctxt->wellFormed = 0; 5753 ctxt->disableSAX = 1; 5754 } else { 5755 if (RAW == ';') { 5756 NEXT; 5757 if ((ctxt->sax != NULL) && 5758 (ctxt->sax->getParameterEntity != NULL)) 5759 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5760 name); 5761 if (entity == NULL) { 5762 /* 5763 * [ WFC: Entity Declared ] 5764 * In a document without any DTD, a document with only an 5765 * internal DTD subset which contains no parameter entity 5766 * references, or a document with "standalone='yes'", ... 5767 * ... The declaration of a parameter entity must precede 5768 * any reference to it... 5769 */ 5770 if ((ctxt->standalone == 1) || 5771 ((ctxt->hasExternalSubset == 0) && 5772 (ctxt->hasPErefs == 0))) { 5773 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5774 if ((!ctxt->disableSAX) && 5775 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5776 ctxt->sax->error(ctxt->userData, 5777 "PEReference: %%%s; not found\n", name); 5778 ctxt->wellFormed = 0; 5779 ctxt->disableSAX = 1; 5780 } else { 5781 /* 5782 * [ VC: Entity Declared ] 5783 * In a document with an external subset or external 5784 * parameter entities with "standalone='no'", ... 5785 * ... The declaration of a parameter entity must precede 5786 * any reference to it... 5787 */ 5788 if ((!ctxt->disableSAX) && 5789 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5790 ctxt->sax->warning(ctxt->userData, 5791 "PEReference: %%%s; not found\n", name); 5792 ctxt->valid = 0; 5793 } 5794 } else { 5795 /* 5796 * Internal checking in case the entity quest barfed 5797 */ 5798 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5799 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5800 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5801 ctxt->sax->warning(ctxt->userData, 5802 "Internal: %%%s; is not a parameter entity\n", name); 5803 } else { 5804 /* 5805 * TODO !!! 5806 * handle the extra spaces added before and after 5807 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5808 */ 5809 input = xmlNewEntityInputStream(ctxt, entity); 5810 xmlPushInput(ctxt, input); 5811 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5812 (RAW == '<') && (NXT(1) == '?') && 5813 (NXT(2) == 'x') && (NXT(3) == 'm') && 5814 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5815 xmlParseTextDecl(ctxt); 5816 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5817 /* 5818 * The XML REC instructs us to stop parsing 5819 * right here 5820 */ 5821 ctxt->instate = XML_PARSER_EOF; 5822 xmlFree(name); 5823 return; 5824 } 5825 } 5826 if (ctxt->token == 0) 5827 ctxt->token = ' '; 5828 } 5829 } 5830 ctxt->hasPErefs = 1; 5831 } else { 5832 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5834 ctxt->sax->error(ctxt->userData, 5835 "xmlParsePEReference: expecting ';'\n"); 5836 ctxt->wellFormed = 0; 5837 ctxt->disableSAX = 1; 5838 } 5839 xmlFree(name); 5840 } 5841 } 5842} 5843 5844/** 5845 * xmlParseStringPEReference: 5846 * @ctxt: an XML parser context 5847 * @str: a pointer to an index in the string 5848 * 5849 * parse PEReference declarations 5850 * 5851 * [69] PEReference ::= '%' Name ';' 5852 * 5853 * [ WFC: No Recursion ] 5854 * A parsed entity must not contain a recursive 5855 * reference to itself, either directly or indirectly. 5856 * 5857 * [ WFC: Entity Declared ] 5858 * In a document without any DTD, a document with only an internal DTD 5859 * subset which contains no parameter entity references, or a document 5860 * with "standalone='yes'", ... ... The declaration of a parameter 5861 * entity must precede any reference to it... 5862 * 5863 * [ VC: Entity Declared ] 5864 * In a document with an external subset or external parameter entities 5865 * with "standalone='no'", ... ... The declaration of a parameter entity 5866 * must precede any reference to it... 5867 * 5868 * [ WFC: In DTD ] 5869 * Parameter-entity references may only appear in the DTD. 5870 * NOTE: misleading but this is handled. 5871 * 5872 * Returns the string of the entity content. 5873 * str is updated to the current value of the index 5874 */ 5875xmlEntityPtr 5876xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5877 const xmlChar *ptr; 5878 xmlChar cur; 5879 xmlChar *name; 5880 xmlEntityPtr entity = NULL; 5881 5882 if ((str == NULL) || (*str == NULL)) return(NULL); 5883 ptr = *str; 5884 cur = *ptr; 5885 if (cur == '%') { 5886 ptr++; 5887 cur = *ptr; 5888 name = xmlParseStringName(ctxt, &ptr); 5889 if (name == NULL) { 5890 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5892 ctxt->sax->error(ctxt->userData, 5893 "xmlParseStringPEReference: no name\n"); 5894 ctxt->wellFormed = 0; 5895 ctxt->disableSAX = 1; 5896 } else { 5897 cur = *ptr; 5898 if (cur == ';') { 5899 ptr++; 5900 cur = *ptr; 5901 if ((ctxt->sax != NULL) && 5902 (ctxt->sax->getParameterEntity != NULL)) 5903 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5904 name); 5905 if (entity == NULL) { 5906 /* 5907 * [ WFC: Entity Declared ] 5908 * In a document without any DTD, a document with only an 5909 * internal DTD subset which contains no parameter entity 5910 * references, or a document with "standalone='yes'", ... 5911 * ... The declaration of a parameter entity must precede 5912 * any reference to it... 5913 */ 5914 if ((ctxt->standalone == 1) || 5915 ((ctxt->hasExternalSubset == 0) && 5916 (ctxt->hasPErefs == 0))) { 5917 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5919 ctxt->sax->error(ctxt->userData, 5920 "PEReference: %%%s; not found\n", name); 5921 ctxt->wellFormed = 0; 5922 ctxt->disableSAX = 1; 5923 } else { 5924 /* 5925 * [ VC: Entity Declared ] 5926 * In a document with an external subset or external 5927 * parameter entities with "standalone='no'", ... 5928 * ... The declaration of a parameter entity must 5929 * precede any reference to it... 5930 */ 5931 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5932 ctxt->sax->warning(ctxt->userData, 5933 "PEReference: %%%s; not found\n", name); 5934 ctxt->valid = 0; 5935 } 5936 } else { 5937 /* 5938 * Internal checking in case the entity quest barfed 5939 */ 5940 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5941 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5942 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5943 ctxt->sax->warning(ctxt->userData, 5944 "Internal: %%%s; is not a parameter entity\n", name); 5945 } 5946 } 5947 ctxt->hasPErefs = 1; 5948 } else { 5949 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5951 ctxt->sax->error(ctxt->userData, 5952 "xmlParseStringPEReference: expecting ';'\n"); 5953 ctxt->wellFormed = 0; 5954 ctxt->disableSAX = 1; 5955 } 5956 xmlFree(name); 5957 } 5958 } 5959 *str = ptr; 5960 return(entity); 5961} 5962 5963/** 5964 * xmlParseDocTypeDecl: 5965 * @ctxt: an XML parser context 5966 * 5967 * parse a DOCTYPE declaration 5968 * 5969 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5970 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5971 * 5972 * [ VC: Root Element Type ] 5973 * The Name in the document type declaration must match the element 5974 * type of the root element. 5975 */ 5976 5977void 5978xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5979 xmlChar *name = NULL; 5980 xmlChar *ExternalID = NULL; 5981 xmlChar *URI = NULL; 5982 5983 /* 5984 * We know that '<!DOCTYPE' has been detected. 5985 */ 5986 SKIP(9); 5987 5988 SKIP_BLANKS; 5989 5990 /* 5991 * Parse the DOCTYPE name. 5992 */ 5993 name = xmlParseName(ctxt); 5994 if (name == NULL) { 5995 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5997 ctxt->sax->error(ctxt->userData, 5998 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5999 ctxt->wellFormed = 0; 6000 ctxt->disableSAX = 1; 6001 } 6002 ctxt->intSubName = name; 6003 6004 SKIP_BLANKS; 6005 6006 /* 6007 * Check for SystemID and ExternalID 6008 */ 6009 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6010 6011 if ((URI != NULL) || (ExternalID != NULL)) { 6012 ctxt->hasExternalSubset = 1; 6013 } 6014 ctxt->extSubURI = URI; 6015 ctxt->extSubSystem = ExternalID; 6016 6017 SKIP_BLANKS; 6018 6019 /* 6020 * Create and update the internal subset. 6021 */ 6022 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6023 (!ctxt->disableSAX)) 6024 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6025 6026 /* 6027 * Is there any internal subset declarations ? 6028 * they are handled separately in xmlParseInternalSubset() 6029 */ 6030 if (RAW == '[') 6031 return; 6032 6033 /* 6034 * We should be at the end of the DOCTYPE declaration. 6035 */ 6036 if (RAW != '>') { 6037 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6039 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6040 ctxt->wellFormed = 0; 6041 ctxt->disableSAX = 1; 6042 } 6043 NEXT; 6044} 6045 6046/** 6047 * xmlParseInternalsubset: 6048 * @ctxt: an XML parser context 6049 * 6050 * parse the internal subset declaration 6051 * 6052 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6053 */ 6054 6055static void 6056xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6057 /* 6058 * Is there any DTD definition ? 6059 */ 6060 if (RAW == '[') { 6061 ctxt->instate = XML_PARSER_DTD; 6062 NEXT; 6063 /* 6064 * Parse the succession of Markup declarations and 6065 * PEReferences. 6066 * Subsequence (markupdecl | PEReference | S)* 6067 */ 6068 while (RAW != ']') { 6069 const xmlChar *check = CUR_PTR; 6070 int cons = ctxt->input->consumed; 6071 6072 SKIP_BLANKS; 6073 xmlParseMarkupDecl(ctxt); 6074 xmlParsePEReference(ctxt); 6075 6076 /* 6077 * Pop-up of finished entities. 6078 */ 6079 while ((RAW == 0) && (ctxt->inputNr > 1)) 6080 xmlPopInput(ctxt); 6081 6082 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6083 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6085 ctxt->sax->error(ctxt->userData, 6086 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6087 ctxt->wellFormed = 0; 6088 ctxt->disableSAX = 1; 6089 break; 6090 } 6091 } 6092 if (RAW == ']') { 6093 NEXT; 6094 SKIP_BLANKS; 6095 } 6096 } 6097 6098 /* 6099 * We should be at the end of the DOCTYPE declaration. 6100 */ 6101 if (RAW != '>') { 6102 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6104 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6105 ctxt->wellFormed = 0; 6106 ctxt->disableSAX = 1; 6107 } 6108 NEXT; 6109} 6110 6111/** 6112 * xmlParseAttribute: 6113 * @ctxt: an XML parser context 6114 * @value: a xmlChar ** used to store the value of the attribute 6115 * 6116 * parse an attribute 6117 * 6118 * [41] Attribute ::= Name Eq AttValue 6119 * 6120 * [ WFC: No External Entity References ] 6121 * Attribute values cannot contain direct or indirect entity references 6122 * to external entities. 6123 * 6124 * [ WFC: No < in Attribute Values ] 6125 * The replacement text of any entity referred to directly or indirectly in 6126 * an attribute value (other than "<") must not contain a <. 6127 * 6128 * [ VC: Attribute Value Type ] 6129 * The attribute must have been declared; the value must be of the type 6130 * declared for it. 6131 * 6132 * [25] Eq ::= S? '=' S? 6133 * 6134 * With namespace: 6135 * 6136 * [NS 11] Attribute ::= QName Eq AttValue 6137 * 6138 * Also the case QName == xmlns:??? is handled independently as a namespace 6139 * definition. 6140 * 6141 * Returns the attribute name, and the value in *value. 6142 */ 6143 6144xmlChar * 6145xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6146 xmlChar *name, *val; 6147 6148 *value = NULL; 6149 name = xmlParseName(ctxt); 6150 if (name == NULL) { 6151 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6153 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 6154 ctxt->wellFormed = 0; 6155 ctxt->disableSAX = 1; 6156 return(NULL); 6157 } 6158 6159 /* 6160 * read the value 6161 */ 6162 SKIP_BLANKS; 6163 if (RAW == '=') { 6164 NEXT; 6165 SKIP_BLANKS; 6166 val = xmlParseAttValue(ctxt); 6167 ctxt->instate = XML_PARSER_CONTENT; 6168 } else { 6169 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6171 ctxt->sax->error(ctxt->userData, 6172 "Specification mandate value for attribute %s\n", name); 6173 ctxt->wellFormed = 0; 6174 ctxt->disableSAX = 1; 6175 xmlFree(name); 6176 return(NULL); 6177 } 6178 6179 /* 6180 * Check that xml:lang conforms to the specification 6181 * No more registered as an error, just generate a warning now 6182 * since this was deprecated in XML second edition 6183 */ 6184 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6185 if (!xmlCheckLanguageID(val)) { 6186 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6187 ctxt->sax->warning(ctxt->userData, 6188 "Malformed value for xml:lang : %s\n", val); 6189 } 6190 } 6191 6192 /* 6193 * Check that xml:space conforms to the specification 6194 */ 6195 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6196 if (xmlStrEqual(val, BAD_CAST "default")) 6197 *(ctxt->space) = 0; 6198 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6199 *(ctxt->space) = 1; 6200 else { 6201 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6203 ctxt->sax->error(ctxt->userData, 6204"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 6205 val); 6206 ctxt->wellFormed = 0; 6207 ctxt->disableSAX = 1; 6208 } 6209 } 6210 6211 *value = val; 6212 return(name); 6213} 6214 6215/** 6216 * xmlParseStartTag: 6217 * @ctxt: an XML parser context 6218 * 6219 * parse a start of tag either for rule element or 6220 * EmptyElement. In both case we don't parse the tag closing chars. 6221 * 6222 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6223 * 6224 * [ WFC: Unique Att Spec ] 6225 * No attribute name may appear more than once in the same start-tag or 6226 * empty-element tag. 6227 * 6228 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6229 * 6230 * [ WFC: Unique Att Spec ] 6231 * No attribute name may appear more than once in the same start-tag or 6232 * empty-element tag. 6233 * 6234 * With namespace: 6235 * 6236 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6237 * 6238 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6239 * 6240 * Returns the element name parsed 6241 */ 6242 6243xmlChar * 6244xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6245 xmlChar *name; 6246 xmlChar *attname; 6247 xmlChar *attvalue; 6248 const xmlChar **atts = NULL; 6249 int nbatts = 0; 6250 int maxatts = 0; 6251 int i; 6252 6253 if (RAW != '<') return(NULL); 6254 NEXT1; 6255 6256 name = xmlParseName(ctxt); 6257 if (name == NULL) { 6258 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6260 ctxt->sax->error(ctxt->userData, 6261 "xmlParseStartTag: invalid element name\n"); 6262 ctxt->wellFormed = 0; 6263 ctxt->disableSAX = 1; 6264 return(NULL); 6265 } 6266 6267 /* 6268 * Now parse the attributes, it ends up with the ending 6269 * 6270 * (S Attribute)* S? 6271 */ 6272 SKIP_BLANKS; 6273 GROW; 6274 6275 while ((RAW != '>') && 6276 ((RAW != '/') || (NXT(1) != '>')) && 6277 (IS_CHAR(RAW))) { 6278 const xmlChar *q = CUR_PTR; 6279 int cons = ctxt->input->consumed; 6280 6281 attname = xmlParseAttribute(ctxt, &attvalue); 6282 if ((attname != NULL) && (attvalue != NULL)) { 6283 /* 6284 * [ WFC: Unique Att Spec ] 6285 * No attribute name may appear more than once in the same 6286 * start-tag or empty-element tag. 6287 */ 6288 for (i = 0; i < nbatts;i += 2) { 6289 if (xmlStrEqual(atts[i], attname)) { 6290 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6292 ctxt->sax->error(ctxt->userData, 6293 "Attribute %s redefined\n", 6294 attname); 6295 ctxt->wellFormed = 0; 6296 ctxt->disableSAX = 1; 6297 xmlFree(attname); 6298 xmlFree(attvalue); 6299 goto failed; 6300 } 6301 } 6302 6303 /* 6304 * Add the pair to atts 6305 */ 6306 if (atts == NULL) { 6307 maxatts = 10; 6308 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6309 if (atts == NULL) { 6310 xmlGenericError(xmlGenericErrorContext, 6311 "malloc of %ld byte failed\n", 6312 maxatts * (long)sizeof(xmlChar *)); 6313 return(NULL); 6314 } 6315 } else if (nbatts + 4 > maxatts) { 6316 maxatts *= 2; 6317 atts = (const xmlChar **) xmlRealloc((void *) atts, 6318 maxatts * sizeof(xmlChar *)); 6319 if (atts == NULL) { 6320 xmlGenericError(xmlGenericErrorContext, 6321 "realloc of %ld byte failed\n", 6322 maxatts * (long)sizeof(xmlChar *)); 6323 return(NULL); 6324 } 6325 } 6326 atts[nbatts++] = attname; 6327 atts[nbatts++] = attvalue; 6328 atts[nbatts] = NULL; 6329 atts[nbatts + 1] = NULL; 6330 } else { 6331 if (attname != NULL) 6332 xmlFree(attname); 6333 if (attvalue != NULL) 6334 xmlFree(attvalue); 6335 } 6336 6337failed: 6338 6339 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6340 break; 6341 if (!IS_BLANK(RAW)) { 6342 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6344 ctxt->sax->error(ctxt->userData, 6345 "attributes construct error\n"); 6346 ctxt->wellFormed = 0; 6347 ctxt->disableSAX = 1; 6348 } 6349 SKIP_BLANKS; 6350 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6351 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6353 ctxt->sax->error(ctxt->userData, 6354 "xmlParseStartTag: problem parsing attributes\n"); 6355 ctxt->wellFormed = 0; 6356 ctxt->disableSAX = 1; 6357 break; 6358 } 6359 GROW; 6360 } 6361 6362 /* 6363 * SAX: Start of Element ! 6364 */ 6365 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6366 (!ctxt->disableSAX)) 6367 ctxt->sax->startElement(ctxt->userData, name, atts); 6368 6369 if (atts != NULL) { 6370 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6371 xmlFree((void *) atts); 6372 } 6373 return(name); 6374} 6375 6376/** 6377 * xmlParseEndTag: 6378 * @ctxt: an XML parser context 6379 * 6380 * parse an end of tag 6381 * 6382 * [42] ETag ::= '</' Name S? '>' 6383 * 6384 * With namespace 6385 * 6386 * [NS 9] ETag ::= '</' QName S? '>' 6387 */ 6388 6389void 6390xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6391 xmlChar *name; 6392 xmlChar *oldname; 6393 6394 GROW; 6395 if ((RAW != '<') || (NXT(1) != '/')) { 6396 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6398 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6399 ctxt->wellFormed = 0; 6400 ctxt->disableSAX = 1; 6401 return; 6402 } 6403 SKIP(2); 6404 6405 name = xmlParseName(ctxt); 6406 6407 /* 6408 * We should definitely be at the ending "S? '>'" part 6409 */ 6410 GROW; 6411 SKIP_BLANKS; 6412 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6413 ctxt->errNo = XML_ERR_GT_REQUIRED; 6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6415 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6416 ctxt->wellFormed = 0; 6417 ctxt->disableSAX = 1; 6418 } else 6419 NEXT1; 6420 6421 /* 6422 * [ WFC: Element Type Match ] 6423 * The Name in an element's end-tag must match the element type in the 6424 * start-tag. 6425 * 6426 */ 6427 if ((name == NULL) || (ctxt->name == NULL) || 6428 (!xmlStrEqual(name, ctxt->name))) { 6429 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6431 if ((name != NULL) && (ctxt->name != NULL)) { 6432 ctxt->sax->error(ctxt->userData, 6433 "Opening and ending tag mismatch: %s and %s\n", 6434 ctxt->name, name); 6435 } else if (ctxt->name != NULL) { 6436 ctxt->sax->error(ctxt->userData, 6437 "Ending tag eror for: %s\n", ctxt->name); 6438 } else { 6439 ctxt->sax->error(ctxt->userData, 6440 "Ending tag error: internal error ???\n"); 6441 } 6442 6443 } 6444 ctxt->wellFormed = 0; 6445 ctxt->disableSAX = 1; 6446 } 6447 6448 /* 6449 * SAX: End of Tag 6450 */ 6451 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6452 (!ctxt->disableSAX)) 6453 ctxt->sax->endElement(ctxt->userData, name); 6454 6455 if (name != NULL) 6456 xmlFree(name); 6457 oldname = namePop(ctxt); 6458 spacePop(ctxt); 6459 if (oldname != NULL) { 6460#ifdef DEBUG_STACK 6461 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6462#endif 6463 xmlFree(oldname); 6464 } 6465 return; 6466} 6467 6468/** 6469 * xmlParseCDSect: 6470 * @ctxt: an XML parser context 6471 * 6472 * Parse escaped pure raw content. 6473 * 6474 * [18] CDSect ::= CDStart CData CDEnd 6475 * 6476 * [19] CDStart ::= '<![CDATA[' 6477 * 6478 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6479 * 6480 * [21] CDEnd ::= ']]>' 6481 */ 6482void 6483xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6484 xmlChar *buf = NULL; 6485 int len = 0; 6486 int size = XML_PARSER_BUFFER_SIZE; 6487 int r, rl; 6488 int s, sl; 6489 int cur, l; 6490 int count = 0; 6491 6492 if ((NXT(0) == '<') && (NXT(1) == '!') && 6493 (NXT(2) == '[') && (NXT(3) == 'C') && 6494 (NXT(4) == 'D') && (NXT(5) == 'A') && 6495 (NXT(6) == 'T') && (NXT(7) == 'A') && 6496 (NXT(8) == '[')) { 6497 SKIP(9); 6498 } else 6499 return; 6500 6501 ctxt->instate = XML_PARSER_CDATA_SECTION; 6502 r = CUR_CHAR(rl); 6503 if (!IS_CHAR(r)) { 6504 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6506 ctxt->sax->error(ctxt->userData, 6507 "CData section not finished\n"); 6508 ctxt->wellFormed = 0; 6509 ctxt->disableSAX = 1; 6510 ctxt->instate = XML_PARSER_CONTENT; 6511 return; 6512 } 6513 NEXTL(rl); 6514 s = CUR_CHAR(sl); 6515 if (!IS_CHAR(s)) { 6516 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6518 ctxt->sax->error(ctxt->userData, 6519 "CData section not finished\n"); 6520 ctxt->wellFormed = 0; 6521 ctxt->disableSAX = 1; 6522 ctxt->instate = XML_PARSER_CONTENT; 6523 return; 6524 } 6525 NEXTL(sl); 6526 cur = CUR_CHAR(l); 6527 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6528 if (buf == NULL) { 6529 xmlGenericError(xmlGenericErrorContext, 6530 "malloc of %d byte failed\n", size); 6531 return; 6532 } 6533 while (IS_CHAR(cur) && 6534 ((r != ']') || (s != ']') || (cur != '>'))) { 6535 if (len + 5 >= size) { 6536 size *= 2; 6537 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6538 if (buf == NULL) { 6539 xmlGenericError(xmlGenericErrorContext, 6540 "realloc of %d byte failed\n", size); 6541 return; 6542 } 6543 } 6544 COPY_BUF(rl,buf,len,r); 6545 r = s; 6546 rl = sl; 6547 s = cur; 6548 sl = l; 6549 count++; 6550 if (count > 50) { 6551 GROW; 6552 count = 0; 6553 } 6554 NEXTL(l); 6555 cur = CUR_CHAR(l); 6556 } 6557 buf[len] = 0; 6558 ctxt->instate = XML_PARSER_CONTENT; 6559 if (cur != '>') { 6560 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6562 ctxt->sax->error(ctxt->userData, 6563 "CData section not finished\n%.50s\n", buf); 6564 ctxt->wellFormed = 0; 6565 ctxt->disableSAX = 1; 6566 xmlFree(buf); 6567 return; 6568 } 6569 NEXTL(l); 6570 6571 /* 6572 * Ok the buffer is to be consumed as cdata. 6573 */ 6574 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6575 if (ctxt->sax->cdataBlock != NULL) 6576 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6577 else if (ctxt->sax->characters != NULL) 6578 ctxt->sax->characters(ctxt->userData, buf, len); 6579 } 6580 xmlFree(buf); 6581} 6582 6583/** 6584 * xmlParseContent: 6585 * @ctxt: an XML parser context 6586 * 6587 * Parse a content: 6588 * 6589 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6590 */ 6591 6592void 6593xmlParseContent(xmlParserCtxtPtr ctxt) { 6594 GROW; 6595 while (((RAW != 0) || (ctxt->token != 0)) && 6596 ((RAW != '<') || (NXT(1) != '/'))) { 6597 const xmlChar *test = CUR_PTR; 6598 int cons = ctxt->input->consumed; 6599 int tok = ctxt->token; 6600 const xmlChar *cur = ctxt->input->cur; 6601 6602 /* 6603 * Handle possible processed charrefs. 6604 */ 6605 if (ctxt->token != 0) { 6606 xmlParseCharData(ctxt, 0); 6607 } 6608 /* 6609 * First case : a Processing Instruction. 6610 */ 6611 else if ((*cur == '<') && (cur[1] == '?')) { 6612 xmlParsePI(ctxt); 6613 } 6614 6615 /* 6616 * Second case : a CDSection 6617 */ 6618 else if ((*cur == '<') && (NXT(1) == '!') && 6619 (NXT(2) == '[') && (NXT(3) == 'C') && 6620 (NXT(4) == 'D') && (NXT(5) == 'A') && 6621 (NXT(6) == 'T') && (NXT(7) == 'A') && 6622 (NXT(8) == '[')) { 6623 xmlParseCDSect(ctxt); 6624 } 6625 6626 /* 6627 * Third case : a comment 6628 */ 6629 else if ((*cur == '<') && (NXT(1) == '!') && 6630 (NXT(2) == '-') && (NXT(3) == '-')) { 6631 xmlParseComment(ctxt); 6632 ctxt->instate = XML_PARSER_CONTENT; 6633 } 6634 6635 /* 6636 * Fourth case : a sub-element. 6637 */ 6638 else if (*cur == '<') { 6639 xmlParseElement(ctxt); 6640 } 6641 6642 /* 6643 * Fifth case : a reference. If if has not been resolved, 6644 * parsing returns it's Name, create the node 6645 */ 6646 6647 else if (*cur == '&') { 6648 xmlParseReference(ctxt); 6649 } 6650 6651 /* 6652 * Last case, text. Note that References are handled directly. 6653 */ 6654 else { 6655 xmlParseCharData(ctxt, 0); 6656 } 6657 6658 GROW; 6659 /* 6660 * Pop-up of finished entities. 6661 */ 6662 while ((RAW == 0) && (ctxt->inputNr > 1)) 6663 xmlPopInput(ctxt); 6664 SHRINK; 6665 6666 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6667 (tok == ctxt->token)) { 6668 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6670 ctxt->sax->error(ctxt->userData, 6671 "detected an error in element content\n"); 6672 ctxt->wellFormed = 0; 6673 ctxt->disableSAX = 1; 6674 ctxt->instate = XML_PARSER_EOF; 6675 break; 6676 } 6677 } 6678} 6679 6680/** 6681 * xmlParseElement: 6682 * @ctxt: an XML parser context 6683 * 6684 * parse an XML element, this is highly recursive 6685 * 6686 * [39] element ::= EmptyElemTag | STag content ETag 6687 * 6688 * [ WFC: Element Type Match ] 6689 * The Name in an element's end-tag must match the element type in the 6690 * start-tag. 6691 * 6692 * [ VC: Element Valid ] 6693 * An element is valid if there is a declaration matching elementdecl 6694 * where the Name matches the element type and one of the following holds: 6695 * - The declaration matches EMPTY and the element has no content. 6696 * - The declaration matches children and the sequence of child elements 6697 * belongs to the language generated by the regular expression in the 6698 * content model, with optional white space (characters matching the 6699 * nonterminal S) between each pair of child elements. 6700 * - The declaration matches Mixed and the content consists of character 6701 * data and child elements whose types match names in the content model. 6702 * - The declaration matches ANY, and the types of any child elements have 6703 * been declared. 6704 */ 6705 6706void 6707xmlParseElement(xmlParserCtxtPtr ctxt) { 6708 const xmlChar *openTag = CUR_PTR; 6709 xmlChar *name; 6710 xmlChar *oldname; 6711 xmlParserNodeInfo node_info; 6712 xmlNodePtr ret; 6713 6714 /* Capture start position */ 6715 if (ctxt->record_info) { 6716 node_info.begin_pos = ctxt->input->consumed + 6717 (CUR_PTR - ctxt->input->base); 6718 node_info.begin_line = ctxt->input->line; 6719 } 6720 6721 if (ctxt->spaceNr == 0) 6722 spacePush(ctxt, -1); 6723 else 6724 spacePush(ctxt, *ctxt->space); 6725 6726 name = xmlParseStartTag(ctxt); 6727 if (name == NULL) { 6728 spacePop(ctxt); 6729 return; 6730 } 6731 namePush(ctxt, name); 6732 ret = ctxt->node; 6733 6734 /* 6735 * [ VC: Root Element Type ] 6736 * The Name in the document type declaration must match the element 6737 * type of the root element. 6738 */ 6739 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6740 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6741 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6742 6743 /* 6744 * Check for an Empty Element. 6745 */ 6746 if ((RAW == '/') && (NXT(1) == '>')) { 6747 SKIP(2); 6748 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6749 (!ctxt->disableSAX)) 6750 ctxt->sax->endElement(ctxt->userData, name); 6751 oldname = namePop(ctxt); 6752 spacePop(ctxt); 6753 if (oldname != NULL) { 6754#ifdef DEBUG_STACK 6755 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6756#endif 6757 xmlFree(oldname); 6758 } 6759 if ( ret != NULL && ctxt->record_info ) { 6760 node_info.end_pos = ctxt->input->consumed + 6761 (CUR_PTR - ctxt->input->base); 6762 node_info.end_line = ctxt->input->line; 6763 node_info.node = ret; 6764 xmlParserAddNodeInfo(ctxt, &node_info); 6765 } 6766 return; 6767 } 6768 if (RAW == '>') { 6769 NEXT1; 6770 } else { 6771 ctxt->errNo = XML_ERR_GT_REQUIRED; 6772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6773 ctxt->sax->error(ctxt->userData, 6774 "Couldn't find end of Start Tag\n%.30s\n", 6775 openTag); 6776 ctxt->wellFormed = 0; 6777 ctxt->disableSAX = 1; 6778 6779 /* 6780 * end of parsing of this node. 6781 */ 6782 nodePop(ctxt); 6783 oldname = namePop(ctxt); 6784 spacePop(ctxt); 6785 if (oldname != NULL) { 6786#ifdef DEBUG_STACK 6787 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6788#endif 6789 xmlFree(oldname); 6790 } 6791 6792 /* 6793 * Capture end position and add node 6794 */ 6795 if ( ret != NULL && ctxt->record_info ) { 6796 node_info.end_pos = ctxt->input->consumed + 6797 (CUR_PTR - ctxt->input->base); 6798 node_info.end_line = ctxt->input->line; 6799 node_info.node = ret; 6800 xmlParserAddNodeInfo(ctxt, &node_info); 6801 } 6802 return; 6803 } 6804 6805 /* 6806 * Parse the content of the element: 6807 */ 6808 xmlParseContent(ctxt); 6809 if (!IS_CHAR(RAW)) { 6810 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6812 ctxt->sax->error(ctxt->userData, 6813 "Premature end of data in tag %.30s\n", openTag); 6814 ctxt->wellFormed = 0; 6815 ctxt->disableSAX = 1; 6816 6817 /* 6818 * end of parsing of this node. 6819 */ 6820 nodePop(ctxt); 6821 oldname = namePop(ctxt); 6822 spacePop(ctxt); 6823 if (oldname != NULL) { 6824#ifdef DEBUG_STACK 6825 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6826#endif 6827 xmlFree(oldname); 6828 } 6829 return; 6830 } 6831 6832 /* 6833 * parse the end of tag: '</' should be here. 6834 */ 6835 xmlParseEndTag(ctxt); 6836 6837 /* 6838 * Capture end position and add node 6839 */ 6840 if ( ret != NULL && ctxt->record_info ) { 6841 node_info.end_pos = ctxt->input->consumed + 6842 (CUR_PTR - ctxt->input->base); 6843 node_info.end_line = ctxt->input->line; 6844 node_info.node = ret; 6845 xmlParserAddNodeInfo(ctxt, &node_info); 6846 } 6847} 6848 6849/** 6850 * xmlParseVersionNum: 6851 * @ctxt: an XML parser context 6852 * 6853 * parse the XML version value. 6854 * 6855 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6856 * 6857 * Returns the string giving the XML version number, or NULL 6858 */ 6859xmlChar * 6860xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6861 xmlChar *buf = NULL; 6862 int len = 0; 6863 int size = 10; 6864 xmlChar cur; 6865 6866 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6867 if (buf == NULL) { 6868 xmlGenericError(xmlGenericErrorContext, 6869 "malloc of %d byte failed\n", size); 6870 return(NULL); 6871 } 6872 cur = CUR; 6873 while (((cur >= 'a') && (cur <= 'z')) || 6874 ((cur >= 'A') && (cur <= 'Z')) || 6875 ((cur >= '0') && (cur <= '9')) || 6876 (cur == '_') || (cur == '.') || 6877 (cur == ':') || (cur == '-')) { 6878 if (len + 1 >= size) { 6879 size *= 2; 6880 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6881 if (buf == NULL) { 6882 xmlGenericError(xmlGenericErrorContext, 6883 "realloc of %d byte failed\n", size); 6884 return(NULL); 6885 } 6886 } 6887 buf[len++] = cur; 6888 NEXT; 6889 cur=CUR; 6890 } 6891 buf[len] = 0; 6892 return(buf); 6893} 6894 6895/** 6896 * xmlParseVersionInfo: 6897 * @ctxt: an XML parser context 6898 * 6899 * parse the XML version. 6900 * 6901 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6902 * 6903 * [25] Eq ::= S? '=' S? 6904 * 6905 * Returns the version string, e.g. "1.0" 6906 */ 6907 6908xmlChar * 6909xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6910 xmlChar *version = NULL; 6911 const xmlChar *q; 6912 6913 if ((RAW == 'v') && (NXT(1) == 'e') && 6914 (NXT(2) == 'r') && (NXT(3) == 's') && 6915 (NXT(4) == 'i') && (NXT(5) == 'o') && 6916 (NXT(6) == 'n')) { 6917 SKIP(7); 6918 SKIP_BLANKS; 6919 if (RAW != '=') { 6920 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6922 ctxt->sax->error(ctxt->userData, 6923 "xmlParseVersionInfo : expected '='\n"); 6924 ctxt->wellFormed = 0; 6925 ctxt->disableSAX = 1; 6926 return(NULL); 6927 } 6928 NEXT; 6929 SKIP_BLANKS; 6930 if (RAW == '"') { 6931 NEXT; 6932 q = CUR_PTR; 6933 version = xmlParseVersionNum(ctxt); 6934 if (RAW != '"') { 6935 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6937 ctxt->sax->error(ctxt->userData, 6938 "String not closed\n%.50s\n", q); 6939 ctxt->wellFormed = 0; 6940 ctxt->disableSAX = 1; 6941 } else 6942 NEXT; 6943 } else if (RAW == '\''){ 6944 NEXT; 6945 q = CUR_PTR; 6946 version = xmlParseVersionNum(ctxt); 6947 if (RAW != '\'') { 6948 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6950 ctxt->sax->error(ctxt->userData, 6951 "String not closed\n%.50s\n", q); 6952 ctxt->wellFormed = 0; 6953 ctxt->disableSAX = 1; 6954 } else 6955 NEXT; 6956 } else { 6957 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6959 ctxt->sax->error(ctxt->userData, 6960 "xmlParseVersionInfo : expected ' or \"\n"); 6961 ctxt->wellFormed = 0; 6962 ctxt->disableSAX = 1; 6963 } 6964 } 6965 return(version); 6966} 6967 6968/** 6969 * xmlParseEncName: 6970 * @ctxt: an XML parser context 6971 * 6972 * parse the XML encoding name 6973 * 6974 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6975 * 6976 * Returns the encoding name value or NULL 6977 */ 6978xmlChar * 6979xmlParseEncName(xmlParserCtxtPtr ctxt) { 6980 xmlChar *buf = NULL; 6981 int len = 0; 6982 int size = 10; 6983 xmlChar cur; 6984 6985 cur = CUR; 6986 if (((cur >= 'a') && (cur <= 'z')) || 6987 ((cur >= 'A') && (cur <= 'Z'))) { 6988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6989 if (buf == NULL) { 6990 xmlGenericError(xmlGenericErrorContext, 6991 "malloc of %d byte failed\n", size); 6992 return(NULL); 6993 } 6994 6995 buf[len++] = cur; 6996 NEXT; 6997 cur = CUR; 6998 while (((cur >= 'a') && (cur <= 'z')) || 6999 ((cur >= 'A') && (cur <= 'Z')) || 7000 ((cur >= '0') && (cur <= '9')) || 7001 (cur == '.') || (cur == '_') || 7002 (cur == '-')) { 7003 if (len + 1 >= size) { 7004 size *= 2; 7005 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7006 if (buf == NULL) { 7007 xmlGenericError(xmlGenericErrorContext, 7008 "realloc of %d byte failed\n", size); 7009 return(NULL); 7010 } 7011 } 7012 buf[len++] = cur; 7013 NEXT; 7014 cur = CUR; 7015 if (cur == 0) { 7016 SHRINK; 7017 GROW; 7018 cur = CUR; 7019 } 7020 } 7021 buf[len] = 0; 7022 } else { 7023 ctxt->errNo = XML_ERR_ENCODING_NAME; 7024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7025 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 7026 ctxt->wellFormed = 0; 7027 ctxt->disableSAX = 1; 7028 } 7029 return(buf); 7030} 7031 7032/** 7033 * xmlParseEncodingDecl: 7034 * @ctxt: an XML parser context 7035 * 7036 * parse the XML encoding declaration 7037 * 7038 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 7039 * 7040 * this setups the conversion filters. 7041 * 7042 * Returns the encoding value or NULL 7043 */ 7044 7045xmlChar * 7046xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 7047 xmlChar *encoding = NULL; 7048 const xmlChar *q; 7049 7050 SKIP_BLANKS; 7051 if ((RAW == 'e') && (NXT(1) == 'n') && 7052 (NXT(2) == 'c') && (NXT(3) == 'o') && 7053 (NXT(4) == 'd') && (NXT(5) == 'i') && 7054 (NXT(6) == 'n') && (NXT(7) == 'g')) { 7055 SKIP(8); 7056 SKIP_BLANKS; 7057 if (RAW != '=') { 7058 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7060 ctxt->sax->error(ctxt->userData, 7061 "xmlParseEncodingDecl : expected '='\n"); 7062 ctxt->wellFormed = 0; 7063 ctxt->disableSAX = 1; 7064 return(NULL); 7065 } 7066 NEXT; 7067 SKIP_BLANKS; 7068 if (RAW == '"') { 7069 NEXT; 7070 q = CUR_PTR; 7071 encoding = xmlParseEncName(ctxt); 7072 if (RAW != '"') { 7073 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7075 ctxt->sax->error(ctxt->userData, 7076 "String not closed\n%.50s\n", q); 7077 ctxt->wellFormed = 0; 7078 ctxt->disableSAX = 1; 7079 } else 7080 NEXT; 7081 } else if (RAW == '\''){ 7082 NEXT; 7083 q = CUR_PTR; 7084 encoding = xmlParseEncName(ctxt); 7085 if (RAW != '\'') { 7086 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7088 ctxt->sax->error(ctxt->userData, 7089 "String not closed\n%.50s\n", q); 7090 ctxt->wellFormed = 0; 7091 ctxt->disableSAX = 1; 7092 } else 7093 NEXT; 7094 } else if (RAW == '"'){ 7095 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7097 ctxt->sax->error(ctxt->userData, 7098 "xmlParseEncodingDecl : expected ' or \"\n"); 7099 ctxt->wellFormed = 0; 7100 ctxt->disableSAX = 1; 7101 } 7102 if (encoding != NULL) { 7103 xmlCharEncoding enc; 7104 xmlCharEncodingHandlerPtr handler; 7105 7106 if (ctxt->input->encoding != NULL) 7107 xmlFree((xmlChar *) ctxt->input->encoding); 7108 ctxt->input->encoding = encoding; 7109 7110 enc = xmlParseCharEncoding((const char *) encoding); 7111 /* 7112 * registered set of known encodings 7113 */ 7114 if (enc != XML_CHAR_ENCODING_ERROR) { 7115 xmlSwitchEncoding(ctxt, enc); 7116 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7117 xmlFree(encoding); 7118 return(NULL); 7119 } 7120 } else { 7121 /* 7122 * fallback for unknown encodings 7123 */ 7124 handler = xmlFindCharEncodingHandler((const char *) encoding); 7125 if (handler != NULL) { 7126 xmlSwitchToEncoding(ctxt, handler); 7127 } else { 7128 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 7129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7130 ctxt->sax->error(ctxt->userData, 7131 "Unsupported encoding %s\n", encoding); 7132 return(NULL); 7133 } 7134 } 7135 } 7136 } 7137 return(encoding); 7138} 7139 7140/** 7141 * xmlParseSDDecl: 7142 * @ctxt: an XML parser context 7143 * 7144 * parse the XML standalone declaration 7145 * 7146 * [32] SDDecl ::= S 'standalone' Eq 7147 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 7148 * 7149 * [ VC: Standalone Document Declaration ] 7150 * TODO The standalone document declaration must have the value "no" 7151 * if any external markup declarations contain declarations of: 7152 * - attributes with default values, if elements to which these 7153 * attributes apply appear in the document without specifications 7154 * of values for these attributes, or 7155 * - entities (other than amp, lt, gt, apos, quot), if references 7156 * to those entities appear in the document, or 7157 * - attributes with values subject to normalization, where the 7158 * attribute appears in the document with a value which will change 7159 * as a result of normalization, or 7160 * - element types with element content, if white space occurs directly 7161 * within any instance of those types. 7162 * 7163 * Returns 1 if standalone, 0 otherwise 7164 */ 7165 7166int 7167xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 7168 int standalone = -1; 7169 7170 SKIP_BLANKS; 7171 if ((RAW == 's') && (NXT(1) == 't') && 7172 (NXT(2) == 'a') && (NXT(3) == 'n') && 7173 (NXT(4) == 'd') && (NXT(5) == 'a') && 7174 (NXT(6) == 'l') && (NXT(7) == 'o') && 7175 (NXT(8) == 'n') && (NXT(9) == 'e')) { 7176 SKIP(10); 7177 SKIP_BLANKS; 7178 if (RAW != '=') { 7179 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7181 ctxt->sax->error(ctxt->userData, 7182 "XML standalone declaration : expected '='\n"); 7183 ctxt->wellFormed = 0; 7184 ctxt->disableSAX = 1; 7185 return(standalone); 7186 } 7187 NEXT; 7188 SKIP_BLANKS; 7189 if (RAW == '\''){ 7190 NEXT; 7191 if ((RAW == 'n') && (NXT(1) == 'o')) { 7192 standalone = 0; 7193 SKIP(2); 7194 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7195 (NXT(2) == 's')) { 7196 standalone = 1; 7197 SKIP(3); 7198 } else { 7199 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7201 ctxt->sax->error(ctxt->userData, 7202 "standalone accepts only 'yes' or 'no'\n"); 7203 ctxt->wellFormed = 0; 7204 ctxt->disableSAX = 1; 7205 } 7206 if (RAW != '\'') { 7207 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7209 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7210 ctxt->wellFormed = 0; 7211 ctxt->disableSAX = 1; 7212 } else 7213 NEXT; 7214 } else if (RAW == '"'){ 7215 NEXT; 7216 if ((RAW == 'n') && (NXT(1) == 'o')) { 7217 standalone = 0; 7218 SKIP(2); 7219 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7220 (NXT(2) == 's')) { 7221 standalone = 1; 7222 SKIP(3); 7223 } else { 7224 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7226 ctxt->sax->error(ctxt->userData, 7227 "standalone accepts only 'yes' or 'no'\n"); 7228 ctxt->wellFormed = 0; 7229 ctxt->disableSAX = 1; 7230 } 7231 if (RAW != '"') { 7232 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7234 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7235 ctxt->wellFormed = 0; 7236 ctxt->disableSAX = 1; 7237 } else 7238 NEXT; 7239 } else { 7240 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7242 ctxt->sax->error(ctxt->userData, 7243 "Standalone value not found\n"); 7244 ctxt->wellFormed = 0; 7245 ctxt->disableSAX = 1; 7246 } 7247 } 7248 return(standalone); 7249} 7250 7251/** 7252 * xmlParseXMLDecl: 7253 * @ctxt: an XML parser context 7254 * 7255 * parse an XML declaration header 7256 * 7257 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 7258 */ 7259 7260void 7261xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7262 xmlChar *version; 7263 7264 /* 7265 * We know that '<?xml' is here. 7266 */ 7267 SKIP(5); 7268 7269 if (!IS_BLANK(RAW)) { 7270 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7272 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7273 ctxt->wellFormed = 0; 7274 ctxt->disableSAX = 1; 7275 } 7276 SKIP_BLANKS; 7277 7278 /* 7279 * We should have the VersionInfo here. 7280 */ 7281 version = xmlParseVersionInfo(ctxt); 7282 if (version == NULL) 7283 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7284 else if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 7285 /* 7286 * TODO: Blueberry should be detected here 7287 */ 7288 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7289 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", 7290 version); 7291 } 7292 ctxt->version = xmlStrdup(version); 7293 xmlFree(version); 7294 7295 /* 7296 * We may have the encoding declaration 7297 */ 7298 if (!IS_BLANK(RAW)) { 7299 if ((RAW == '?') && (NXT(1) == '>')) { 7300 SKIP(2); 7301 return; 7302 } 7303 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7305 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7306 ctxt->wellFormed = 0; 7307 ctxt->disableSAX = 1; 7308 } 7309 xmlParseEncodingDecl(ctxt); 7310 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7311 /* 7312 * The XML REC instructs us to stop parsing right here 7313 */ 7314 return; 7315 } 7316 7317 /* 7318 * We may have the standalone status. 7319 */ 7320 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7321 if ((RAW == '?') && (NXT(1) == '>')) { 7322 SKIP(2); 7323 return; 7324 } 7325 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7327 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7328 ctxt->wellFormed = 0; 7329 ctxt->disableSAX = 1; 7330 } 7331 SKIP_BLANKS; 7332 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7333 7334 SKIP_BLANKS; 7335 if ((RAW == '?') && (NXT(1) == '>')) { 7336 SKIP(2); 7337 } else if (RAW == '>') { 7338 /* Deprecated old WD ... */ 7339 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7341 ctxt->sax->error(ctxt->userData, 7342 "XML declaration must end-up with '?>'\n"); 7343 ctxt->wellFormed = 0; 7344 ctxt->disableSAX = 1; 7345 NEXT; 7346 } else { 7347 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7349 ctxt->sax->error(ctxt->userData, 7350 "parsing XML declaration: '?>' expected\n"); 7351 ctxt->wellFormed = 0; 7352 ctxt->disableSAX = 1; 7353 MOVETO_ENDTAG(CUR_PTR); 7354 NEXT; 7355 } 7356} 7357 7358/** 7359 * xmlParseMisc: 7360 * @ctxt: an XML parser context 7361 * 7362 * parse an XML Misc* optionnal field. 7363 * 7364 * [27] Misc ::= Comment | PI | S 7365 */ 7366 7367void 7368xmlParseMisc(xmlParserCtxtPtr ctxt) { 7369 while (((RAW == '<') && (NXT(1) == '?')) || 7370 ((RAW == '<') && (NXT(1) == '!') && 7371 (NXT(2) == '-') && (NXT(3) == '-')) || 7372 IS_BLANK(CUR)) { 7373 if ((RAW == '<') && (NXT(1) == '?')) { 7374 xmlParsePI(ctxt); 7375 } else if (IS_BLANK(CUR)) { 7376 NEXT; 7377 } else 7378 xmlParseComment(ctxt); 7379 } 7380} 7381 7382/** 7383 * xmlParseDocument: 7384 * @ctxt: an XML parser context 7385 * 7386 * parse an XML document (and build a tree if using the standard SAX 7387 * interface). 7388 * 7389 * [1] document ::= prolog element Misc* 7390 * 7391 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7392 * 7393 * Returns 0, -1 in case of error. the parser context is augmented 7394 * as a result of the parsing. 7395 */ 7396 7397int 7398xmlParseDocument(xmlParserCtxtPtr ctxt) { 7399 xmlChar start[4]; 7400 xmlCharEncoding enc; 7401 7402 xmlInitParser(); 7403 7404 GROW; 7405 7406 /* 7407 * SAX: beginning of the document processing. 7408 */ 7409 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7410 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7411 7412 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 7413 /* 7414 * Get the 4 first bytes and decode the charset 7415 * if enc != XML_CHAR_ENCODING_NONE 7416 * plug some encoding conversion routines. 7417 */ 7418 start[0] = RAW; 7419 start[1] = NXT(1); 7420 start[2] = NXT(2); 7421 start[3] = NXT(3); 7422 enc = xmlDetectCharEncoding(start, 4); 7423 if (enc != XML_CHAR_ENCODING_NONE) { 7424 xmlSwitchEncoding(ctxt, enc); 7425 } 7426 } 7427 7428 7429 if (CUR == 0) { 7430 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7432 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7433 ctxt->wellFormed = 0; 7434 ctxt->disableSAX = 1; 7435 } 7436 7437 /* 7438 * Check for the XMLDecl in the Prolog. 7439 */ 7440 GROW; 7441 if ((RAW == '<') && (NXT(1) == '?') && 7442 (NXT(2) == 'x') && (NXT(3) == 'm') && 7443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7444 7445 /* 7446 * Note that we will switch encoding on the fly. 7447 */ 7448 xmlParseXMLDecl(ctxt); 7449 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7450 /* 7451 * The XML REC instructs us to stop parsing right here 7452 */ 7453 return(-1); 7454 } 7455 ctxt->standalone = ctxt->input->standalone; 7456 SKIP_BLANKS; 7457 } else { 7458 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7459 } 7460 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7461 ctxt->sax->startDocument(ctxt->userData); 7462 7463 /* 7464 * The Misc part of the Prolog 7465 */ 7466 GROW; 7467 xmlParseMisc(ctxt); 7468 7469 /* 7470 * Then possibly doc type declaration(s) and more Misc 7471 * (doctypedecl Misc*)? 7472 */ 7473 GROW; 7474 if ((RAW == '<') && (NXT(1) == '!') && 7475 (NXT(2) == 'D') && (NXT(3) == 'O') && 7476 (NXT(4) == 'C') && (NXT(5) == 'T') && 7477 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7478 (NXT(8) == 'E')) { 7479 7480 ctxt->inSubset = 1; 7481 xmlParseDocTypeDecl(ctxt); 7482 if (RAW == '[') { 7483 ctxt->instate = XML_PARSER_DTD; 7484 xmlParseInternalSubset(ctxt); 7485 } 7486 7487 /* 7488 * Create and update the external subset. 7489 */ 7490 ctxt->inSubset = 2; 7491 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7492 (!ctxt->disableSAX)) 7493 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7494 ctxt->extSubSystem, ctxt->extSubURI); 7495 ctxt->inSubset = 0; 7496 7497 7498 ctxt->instate = XML_PARSER_PROLOG; 7499 xmlParseMisc(ctxt); 7500 } 7501 7502 /* 7503 * Time to start parsing the tree itself 7504 */ 7505 GROW; 7506 if (RAW != '<') { 7507 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7509 ctxt->sax->error(ctxt->userData, 7510 "Start tag expected, '<' not found\n"); 7511 ctxt->wellFormed = 0; 7512 ctxt->disableSAX = 1; 7513 ctxt->instate = XML_PARSER_EOF; 7514 } else { 7515 ctxt->instate = XML_PARSER_CONTENT; 7516 xmlParseElement(ctxt); 7517 ctxt->instate = XML_PARSER_EPILOG; 7518 7519 7520 /* 7521 * The Misc part at the end 7522 */ 7523 xmlParseMisc(ctxt); 7524 7525 if (RAW != 0) { 7526 ctxt->errNo = XML_ERR_DOCUMENT_END; 7527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7528 ctxt->sax->error(ctxt->userData, 7529 "Extra content at the end of the document\n"); 7530 ctxt->wellFormed = 0; 7531 ctxt->disableSAX = 1; 7532 } 7533 ctxt->instate = XML_PARSER_EOF; 7534 } 7535 7536 /* 7537 * SAX: end of the document processing. 7538 */ 7539 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7540 (!ctxt->disableSAX)) 7541 ctxt->sax->endDocument(ctxt->userData); 7542 7543 if (! ctxt->wellFormed) return(-1); 7544 return(0); 7545} 7546 7547/** 7548 * xmlParseExtParsedEnt: 7549 * @ctxt: an XML parser context 7550 * 7551 * parse a genreral parsed entity 7552 * An external general parsed entity is well-formed if it matches the 7553 * production labeled extParsedEnt. 7554 * 7555 * [78] extParsedEnt ::= TextDecl? content 7556 * 7557 * Returns 0, -1 in case of error. the parser context is augmented 7558 * as a result of the parsing. 7559 */ 7560 7561int 7562xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7563 xmlChar start[4]; 7564 xmlCharEncoding enc; 7565 7566 xmlDefaultSAXHandlerInit(); 7567 7568 GROW; 7569 7570 /* 7571 * SAX: beginning of the document processing. 7572 */ 7573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7574 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7575 7576 /* 7577 * Get the 4 first bytes and decode the charset 7578 * if enc != XML_CHAR_ENCODING_NONE 7579 * plug some encoding conversion routines. 7580 */ 7581 start[0] = RAW; 7582 start[1] = NXT(1); 7583 start[2] = NXT(2); 7584 start[3] = NXT(3); 7585 enc = xmlDetectCharEncoding(start, 4); 7586 if (enc != XML_CHAR_ENCODING_NONE) { 7587 xmlSwitchEncoding(ctxt, enc); 7588 } 7589 7590 7591 if (CUR == 0) { 7592 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7594 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7595 ctxt->wellFormed = 0; 7596 ctxt->disableSAX = 1; 7597 } 7598 7599 /* 7600 * Check for the XMLDecl in the Prolog. 7601 */ 7602 GROW; 7603 if ((RAW == '<') && (NXT(1) == '?') && 7604 (NXT(2) == 'x') && (NXT(3) == 'm') && 7605 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7606 7607 /* 7608 * Note that we will switch encoding on the fly. 7609 */ 7610 xmlParseXMLDecl(ctxt); 7611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7612 /* 7613 * The XML REC instructs us to stop parsing right here 7614 */ 7615 return(-1); 7616 } 7617 SKIP_BLANKS; 7618 } else { 7619 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7620 } 7621 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7622 ctxt->sax->startDocument(ctxt->userData); 7623 7624 /* 7625 * Doing validity checking on chunk doesn't make sense 7626 */ 7627 ctxt->instate = XML_PARSER_CONTENT; 7628 ctxt->validate = 0; 7629 ctxt->loadsubset = 0; 7630 ctxt->depth = 0; 7631 7632 xmlParseContent(ctxt); 7633 7634 if ((RAW == '<') && (NXT(1) == '/')) { 7635 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7637 ctxt->sax->error(ctxt->userData, 7638 "chunk is not well balanced\n"); 7639 ctxt->wellFormed = 0; 7640 ctxt->disableSAX = 1; 7641 } else if (RAW != 0) { 7642 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7644 ctxt->sax->error(ctxt->userData, 7645 "extra content at the end of well balanced chunk\n"); 7646 ctxt->wellFormed = 0; 7647 ctxt->disableSAX = 1; 7648 } 7649 7650 /* 7651 * SAX: end of the document processing. 7652 */ 7653 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7654 (!ctxt->disableSAX)) 7655 ctxt->sax->endDocument(ctxt->userData); 7656 7657 if (! ctxt->wellFormed) return(-1); 7658 return(0); 7659} 7660 7661/************************************************************************ 7662 * * 7663 * Progressive parsing interfaces * 7664 * * 7665 ************************************************************************/ 7666 7667/** 7668 * xmlParseLookupSequence: 7669 * @ctxt: an XML parser context 7670 * @first: the first char to lookup 7671 * @next: the next char to lookup or zero 7672 * @third: the next char to lookup or zero 7673 * 7674 * Try to find if a sequence (first, next, third) or just (first next) or 7675 * (first) is available in the input stream. 7676 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7677 * to avoid rescanning sequences of bytes, it DOES change the state of the 7678 * parser, do not use liberally. 7679 * 7680 * Returns the index to the current parsing point if the full sequence 7681 * is available, -1 otherwise. 7682 */ 7683static int 7684xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7685 xmlChar next, xmlChar third) { 7686 int base, len; 7687 xmlParserInputPtr in; 7688 const xmlChar *buf; 7689 7690 in = ctxt->input; 7691 if (in == NULL) return(-1); 7692 base = in->cur - in->base; 7693 if (base < 0) return(-1); 7694 if (ctxt->checkIndex > base) 7695 base = ctxt->checkIndex; 7696 if (in->buf == NULL) { 7697 buf = in->base; 7698 len = in->length; 7699 } else { 7700 buf = in->buf->buffer->content; 7701 len = in->buf->buffer->use; 7702 } 7703 /* take into account the sequence length */ 7704 if (third) len -= 2; 7705 else if (next) len --; 7706 for (;base < len;base++) { 7707 if (buf[base] == first) { 7708 if (third != 0) { 7709 if ((buf[base + 1] != next) || 7710 (buf[base + 2] != third)) continue; 7711 } else if (next != 0) { 7712 if (buf[base + 1] != next) continue; 7713 } 7714 ctxt->checkIndex = 0; 7715#ifdef DEBUG_PUSH 7716 if (next == 0) 7717 xmlGenericError(xmlGenericErrorContext, 7718 "PP: lookup '%c' found at %d\n", 7719 first, base); 7720 else if (third == 0) 7721 xmlGenericError(xmlGenericErrorContext, 7722 "PP: lookup '%c%c' found at %d\n", 7723 first, next, base); 7724 else 7725 xmlGenericError(xmlGenericErrorContext, 7726 "PP: lookup '%c%c%c' found at %d\n", 7727 first, next, third, base); 7728#endif 7729 return(base - (in->cur - in->base)); 7730 } 7731 } 7732 ctxt->checkIndex = base; 7733#ifdef DEBUG_PUSH 7734 if (next == 0) 7735 xmlGenericError(xmlGenericErrorContext, 7736 "PP: lookup '%c' failed\n", first); 7737 else if (third == 0) 7738 xmlGenericError(xmlGenericErrorContext, 7739 "PP: lookup '%c%c' failed\n", first, next); 7740 else 7741 xmlGenericError(xmlGenericErrorContext, 7742 "PP: lookup '%c%c%c' failed\n", first, next, third); 7743#endif 7744 return(-1); 7745} 7746 7747/** 7748 * xmlParseTryOrFinish: 7749 * @ctxt: an XML parser context 7750 * @terminate: last chunk indicator 7751 * 7752 * Try to progress on parsing 7753 * 7754 * Returns zero if no parsing was possible 7755 */ 7756static int 7757xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7758 int ret = 0; 7759 int avail; 7760 xmlChar cur, next; 7761 7762#ifdef DEBUG_PUSH 7763 switch (ctxt->instate) { 7764 case XML_PARSER_EOF: 7765 xmlGenericError(xmlGenericErrorContext, 7766 "PP: try EOF\n"); break; 7767 case XML_PARSER_START: 7768 xmlGenericError(xmlGenericErrorContext, 7769 "PP: try START\n"); break; 7770 case XML_PARSER_MISC: 7771 xmlGenericError(xmlGenericErrorContext, 7772 "PP: try MISC\n");break; 7773 case XML_PARSER_COMMENT: 7774 xmlGenericError(xmlGenericErrorContext, 7775 "PP: try COMMENT\n");break; 7776 case XML_PARSER_PROLOG: 7777 xmlGenericError(xmlGenericErrorContext, 7778 "PP: try PROLOG\n");break; 7779 case XML_PARSER_START_TAG: 7780 xmlGenericError(xmlGenericErrorContext, 7781 "PP: try START_TAG\n");break; 7782 case XML_PARSER_CONTENT: 7783 xmlGenericError(xmlGenericErrorContext, 7784 "PP: try CONTENT\n");break; 7785 case XML_PARSER_CDATA_SECTION: 7786 xmlGenericError(xmlGenericErrorContext, 7787 "PP: try CDATA_SECTION\n");break; 7788 case XML_PARSER_END_TAG: 7789 xmlGenericError(xmlGenericErrorContext, 7790 "PP: try END_TAG\n");break; 7791 case XML_PARSER_ENTITY_DECL: 7792 xmlGenericError(xmlGenericErrorContext, 7793 "PP: try ENTITY_DECL\n");break; 7794 case XML_PARSER_ENTITY_VALUE: 7795 xmlGenericError(xmlGenericErrorContext, 7796 "PP: try ENTITY_VALUE\n");break; 7797 case XML_PARSER_ATTRIBUTE_VALUE: 7798 xmlGenericError(xmlGenericErrorContext, 7799 "PP: try ATTRIBUTE_VALUE\n");break; 7800 case XML_PARSER_DTD: 7801 xmlGenericError(xmlGenericErrorContext, 7802 "PP: try DTD\n");break; 7803 case XML_PARSER_EPILOG: 7804 xmlGenericError(xmlGenericErrorContext, 7805 "PP: try EPILOG\n");break; 7806 case XML_PARSER_PI: 7807 xmlGenericError(xmlGenericErrorContext, 7808 "PP: try PI\n");break; 7809 case XML_PARSER_IGNORE: 7810 xmlGenericError(xmlGenericErrorContext, 7811 "PP: try IGNORE\n");break; 7812 } 7813#endif 7814 7815 while (1) { 7816 /* 7817 * Pop-up of finished entities. 7818 */ 7819 while ((RAW == 0) && (ctxt->inputNr > 1)) 7820 xmlPopInput(ctxt); 7821 7822 if (ctxt->input ==NULL) break; 7823 if (ctxt->input->buf == NULL) 7824 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7825 else 7826 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7827 if (avail < 1) 7828 goto done; 7829 switch (ctxt->instate) { 7830 case XML_PARSER_EOF: 7831 /* 7832 * Document parsing is done ! 7833 */ 7834 goto done; 7835 case XML_PARSER_START: 7836 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 7837 xmlChar start[4]; 7838 xmlCharEncoding enc; 7839 7840 /* 7841 * Very first chars read from the document flow. 7842 */ 7843 if (avail < 4) 7844 goto done; 7845 7846 /* 7847 * Get the 4 first bytes and decode the charset 7848 * if enc != XML_CHAR_ENCODING_NONE 7849 * plug some encoding conversion routines. 7850 */ 7851 start[0] = RAW; 7852 start[1] = NXT(1); 7853 start[2] = NXT(2); 7854 start[3] = NXT(3); 7855 enc = xmlDetectCharEncoding(start, 4); 7856 if (enc != XML_CHAR_ENCODING_NONE) { 7857 xmlSwitchEncoding(ctxt, enc); 7858 } 7859 break; 7860 } 7861 7862 cur = ctxt->input->cur[0]; 7863 next = ctxt->input->cur[1]; 7864 if (cur == 0) { 7865 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7866 ctxt->sax->setDocumentLocator(ctxt->userData, 7867 &xmlDefaultSAXLocator); 7868 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7870 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7871 ctxt->wellFormed = 0; 7872 ctxt->disableSAX = 1; 7873 ctxt->instate = XML_PARSER_EOF; 7874#ifdef DEBUG_PUSH 7875 xmlGenericError(xmlGenericErrorContext, 7876 "PP: entering EOF\n"); 7877#endif 7878 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7879 ctxt->sax->endDocument(ctxt->userData); 7880 goto done; 7881 } 7882 if ((cur == '<') && (next == '?')) { 7883 /* PI or XML decl */ 7884 if (avail < 5) return(ret); 7885 if ((!terminate) && 7886 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7887 return(ret); 7888 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7889 ctxt->sax->setDocumentLocator(ctxt->userData, 7890 &xmlDefaultSAXLocator); 7891 if ((ctxt->input->cur[2] == 'x') && 7892 (ctxt->input->cur[3] == 'm') && 7893 (ctxt->input->cur[4] == 'l') && 7894 (IS_BLANK(ctxt->input->cur[5]))) { 7895 ret += 5; 7896#ifdef DEBUG_PUSH 7897 xmlGenericError(xmlGenericErrorContext, 7898 "PP: Parsing XML Decl\n"); 7899#endif 7900 xmlParseXMLDecl(ctxt); 7901 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7902 /* 7903 * The XML REC instructs us to stop parsing right 7904 * here 7905 */ 7906 ctxt->instate = XML_PARSER_EOF; 7907 return(0); 7908 } 7909 ctxt->standalone = ctxt->input->standalone; 7910 if ((ctxt->encoding == NULL) && 7911 (ctxt->input->encoding != NULL)) 7912 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 7913 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7914 (!ctxt->disableSAX)) 7915 ctxt->sax->startDocument(ctxt->userData); 7916 ctxt->instate = XML_PARSER_MISC; 7917#ifdef DEBUG_PUSH 7918 xmlGenericError(xmlGenericErrorContext, 7919 "PP: entering MISC\n"); 7920#endif 7921 } else { 7922 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7923 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7924 (!ctxt->disableSAX)) 7925 ctxt->sax->startDocument(ctxt->userData); 7926 ctxt->instate = XML_PARSER_MISC; 7927#ifdef DEBUG_PUSH 7928 xmlGenericError(xmlGenericErrorContext, 7929 "PP: entering MISC\n"); 7930#endif 7931 } 7932 } else { 7933 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7934 ctxt->sax->setDocumentLocator(ctxt->userData, 7935 &xmlDefaultSAXLocator); 7936 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7937 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7938 (!ctxt->disableSAX)) 7939 ctxt->sax->startDocument(ctxt->userData); 7940 ctxt->instate = XML_PARSER_MISC; 7941#ifdef DEBUG_PUSH 7942 xmlGenericError(xmlGenericErrorContext, 7943 "PP: entering MISC\n"); 7944#endif 7945 } 7946 break; 7947 case XML_PARSER_MISC: 7948 SKIP_BLANKS; 7949 if (ctxt->input->buf == NULL) 7950 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7951 else 7952 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7953 if (avail < 2) 7954 goto done; 7955 cur = ctxt->input->cur[0]; 7956 next = ctxt->input->cur[1]; 7957 if ((cur == '<') && (next == '?')) { 7958 if ((!terminate) && 7959 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7960 goto done; 7961#ifdef DEBUG_PUSH 7962 xmlGenericError(xmlGenericErrorContext, 7963 "PP: Parsing PI\n"); 7964#endif 7965 xmlParsePI(ctxt); 7966 } else if ((cur == '<') && (next == '!') && 7967 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7968 if ((!terminate) && 7969 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7970 goto done; 7971#ifdef DEBUG_PUSH 7972 xmlGenericError(xmlGenericErrorContext, 7973 "PP: Parsing Comment\n"); 7974#endif 7975 xmlParseComment(ctxt); 7976 ctxt->instate = XML_PARSER_MISC; 7977 } else if ((cur == '<') && (next == '!') && 7978 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 7979 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 7980 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 7981 (ctxt->input->cur[8] == 'E')) { 7982 if ((!terminate) && 7983 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7984 goto done; 7985#ifdef DEBUG_PUSH 7986 xmlGenericError(xmlGenericErrorContext, 7987 "PP: Parsing internal subset\n"); 7988#endif 7989 ctxt->inSubset = 1; 7990 xmlParseDocTypeDecl(ctxt); 7991 if (RAW == '[') { 7992 ctxt->instate = XML_PARSER_DTD; 7993#ifdef DEBUG_PUSH 7994 xmlGenericError(xmlGenericErrorContext, 7995 "PP: entering DTD\n"); 7996#endif 7997 } else { 7998 /* 7999 * Create and update the external subset. 8000 */ 8001 ctxt->inSubset = 2; 8002 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8003 (ctxt->sax->externalSubset != NULL)) 8004 ctxt->sax->externalSubset(ctxt->userData, 8005 ctxt->intSubName, ctxt->extSubSystem, 8006 ctxt->extSubURI); 8007 ctxt->inSubset = 0; 8008 ctxt->instate = XML_PARSER_PROLOG; 8009#ifdef DEBUG_PUSH 8010 xmlGenericError(xmlGenericErrorContext, 8011 "PP: entering PROLOG\n"); 8012#endif 8013 } 8014 } else if ((cur == '<') && (next == '!') && 8015 (avail < 9)) { 8016 goto done; 8017 } else { 8018 ctxt->instate = XML_PARSER_START_TAG; 8019#ifdef DEBUG_PUSH 8020 xmlGenericError(xmlGenericErrorContext, 8021 "PP: entering START_TAG\n"); 8022#endif 8023 } 8024 break; 8025 case XML_PARSER_IGNORE: 8026 xmlGenericError(xmlGenericErrorContext, 8027 "PP: internal error, state == IGNORE"); 8028 ctxt->instate = XML_PARSER_DTD; 8029#ifdef DEBUG_PUSH 8030 xmlGenericError(xmlGenericErrorContext, 8031 "PP: entering DTD\n"); 8032#endif 8033 break; 8034 case XML_PARSER_PROLOG: 8035 SKIP_BLANKS; 8036 if (ctxt->input->buf == NULL) 8037 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8038 else 8039 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8040 if (avail < 2) 8041 goto done; 8042 cur = ctxt->input->cur[0]; 8043 next = ctxt->input->cur[1]; 8044 if ((cur == '<') && (next == '?')) { 8045 if ((!terminate) && 8046 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8047 goto done; 8048#ifdef DEBUG_PUSH 8049 xmlGenericError(xmlGenericErrorContext, 8050 "PP: Parsing PI\n"); 8051#endif 8052 xmlParsePI(ctxt); 8053 } else if ((cur == '<') && (next == '!') && 8054 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8055 if ((!terminate) && 8056 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8057 goto done; 8058#ifdef DEBUG_PUSH 8059 xmlGenericError(xmlGenericErrorContext, 8060 "PP: Parsing Comment\n"); 8061#endif 8062 xmlParseComment(ctxt); 8063 ctxt->instate = XML_PARSER_PROLOG; 8064 } else if ((cur == '<') && (next == '!') && 8065 (avail < 4)) { 8066 goto done; 8067 } else { 8068 ctxt->instate = XML_PARSER_START_TAG; 8069#ifdef DEBUG_PUSH 8070 xmlGenericError(xmlGenericErrorContext, 8071 "PP: entering START_TAG\n"); 8072#endif 8073 } 8074 break; 8075 case XML_PARSER_EPILOG: 8076 SKIP_BLANKS; 8077 if (ctxt->input->buf == NULL) 8078 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8079 else 8080 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8081 if (avail < 2) 8082 goto done; 8083 cur = ctxt->input->cur[0]; 8084 next = ctxt->input->cur[1]; 8085 if ((cur == '<') && (next == '?')) { 8086 if ((!terminate) && 8087 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8088 goto done; 8089#ifdef DEBUG_PUSH 8090 xmlGenericError(xmlGenericErrorContext, 8091 "PP: Parsing PI\n"); 8092#endif 8093 xmlParsePI(ctxt); 8094 ctxt->instate = XML_PARSER_EPILOG; 8095 } else if ((cur == '<') && (next == '!') && 8096 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8097 if ((!terminate) && 8098 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8099 goto done; 8100#ifdef DEBUG_PUSH 8101 xmlGenericError(xmlGenericErrorContext, 8102 "PP: Parsing Comment\n"); 8103#endif 8104 xmlParseComment(ctxt); 8105 ctxt->instate = XML_PARSER_EPILOG; 8106 } else if ((cur == '<') && (next == '!') && 8107 (avail < 4)) { 8108 goto done; 8109 } else { 8110 ctxt->errNo = XML_ERR_DOCUMENT_END; 8111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8112 ctxt->sax->error(ctxt->userData, 8113 "Extra content at the end of the document\n"); 8114 ctxt->wellFormed = 0; 8115 ctxt->disableSAX = 1; 8116 ctxt->instate = XML_PARSER_EOF; 8117#ifdef DEBUG_PUSH 8118 xmlGenericError(xmlGenericErrorContext, 8119 "PP: entering EOF\n"); 8120#endif 8121 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8122 (!ctxt->disableSAX)) 8123 ctxt->sax->endDocument(ctxt->userData); 8124 goto done; 8125 } 8126 break; 8127 case XML_PARSER_START_TAG: { 8128 xmlChar *name, *oldname; 8129 8130 if ((avail < 2) && (ctxt->inputNr == 1)) 8131 goto done; 8132 cur = ctxt->input->cur[0]; 8133 if (cur != '<') { 8134 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8136 ctxt->sax->error(ctxt->userData, 8137 "Start tag expect, '<' not found\n"); 8138 ctxt->wellFormed = 0; 8139 ctxt->disableSAX = 1; 8140 ctxt->instate = XML_PARSER_EOF; 8141#ifdef DEBUG_PUSH 8142 xmlGenericError(xmlGenericErrorContext, 8143 "PP: entering EOF\n"); 8144#endif 8145 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8146 (!ctxt->disableSAX)) 8147 ctxt->sax->endDocument(ctxt->userData); 8148 goto done; 8149 } 8150 if ((!terminate) && 8151 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8152 goto done; 8153 if (ctxt->spaceNr == 0) 8154 spacePush(ctxt, -1); 8155 else 8156 spacePush(ctxt, *ctxt->space); 8157 name = xmlParseStartTag(ctxt); 8158 if (name == NULL) { 8159 spacePop(ctxt); 8160 ctxt->instate = XML_PARSER_EOF; 8161#ifdef DEBUG_PUSH 8162 xmlGenericError(xmlGenericErrorContext, 8163 "PP: entering EOF\n"); 8164#endif 8165 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8166 (!ctxt->disableSAX)) 8167 ctxt->sax->endDocument(ctxt->userData); 8168 goto done; 8169 } 8170 namePush(ctxt, xmlStrdup(name)); 8171 8172 /* 8173 * [ VC: Root Element Type ] 8174 * The Name in the document type declaration must match 8175 * the element type of the root element. 8176 */ 8177 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8178 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8179 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8180 8181 /* 8182 * Check for an Empty Element. 8183 */ 8184 if ((RAW == '/') && (NXT(1) == '>')) { 8185 SKIP(2); 8186 if ((ctxt->sax != NULL) && 8187 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 8188 ctxt->sax->endElement(ctxt->userData, name); 8189 xmlFree(name); 8190 oldname = namePop(ctxt); 8191 spacePop(ctxt); 8192 if (oldname != NULL) { 8193#ifdef DEBUG_STACK 8194 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8195#endif 8196 xmlFree(oldname); 8197 } 8198 if (ctxt->name == NULL) { 8199 ctxt->instate = XML_PARSER_EPILOG; 8200#ifdef DEBUG_PUSH 8201 xmlGenericError(xmlGenericErrorContext, 8202 "PP: entering EPILOG\n"); 8203#endif 8204 } else { 8205 ctxt->instate = XML_PARSER_CONTENT; 8206#ifdef DEBUG_PUSH 8207 xmlGenericError(xmlGenericErrorContext, 8208 "PP: entering CONTENT\n"); 8209#endif 8210 } 8211 break; 8212 } 8213 if (RAW == '>') { 8214 NEXT; 8215 } else { 8216 ctxt->errNo = XML_ERR_GT_REQUIRED; 8217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8218 ctxt->sax->error(ctxt->userData, 8219 "Couldn't find end of Start Tag %s\n", 8220 name); 8221 ctxt->wellFormed = 0; 8222 ctxt->disableSAX = 1; 8223 8224 /* 8225 * end of parsing of this node. 8226 */ 8227 nodePop(ctxt); 8228 oldname = namePop(ctxt); 8229 spacePop(ctxt); 8230 if (oldname != NULL) { 8231#ifdef DEBUG_STACK 8232 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8233#endif 8234 xmlFree(oldname); 8235 } 8236 } 8237 xmlFree(name); 8238 ctxt->instate = XML_PARSER_CONTENT; 8239#ifdef DEBUG_PUSH 8240 xmlGenericError(xmlGenericErrorContext, 8241 "PP: entering CONTENT\n"); 8242#endif 8243 break; 8244 } 8245 case XML_PARSER_CONTENT: { 8246 const xmlChar *test; 8247 int cons; 8248 int tok; 8249 8250 /* 8251 * Handle preparsed entities and charRef 8252 */ 8253 if (ctxt->token != 0) { 8254 xmlChar current[2] = { 0 , 0 } ; 8255 8256 current[0] = (xmlChar) ctxt->token; 8257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8258 (ctxt->sax->characters != NULL)) 8259 ctxt->sax->characters(ctxt->userData, current, 1); 8260 ctxt->token = 0; 8261 } 8262 if ((avail < 2) && (ctxt->inputNr == 1)) 8263 goto done; 8264 cur = ctxt->input->cur[0]; 8265 next = ctxt->input->cur[1]; 8266 8267 test = CUR_PTR; 8268 cons = ctxt->input->consumed; 8269 tok = ctxt->token; 8270 if ((cur == '<') && (next == '?')) { 8271 if ((!terminate) && 8272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8273 goto done; 8274#ifdef DEBUG_PUSH 8275 xmlGenericError(xmlGenericErrorContext, 8276 "PP: Parsing PI\n"); 8277#endif 8278 xmlParsePI(ctxt); 8279 } else if ((cur == '<') && (next == '!') && 8280 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8281 if ((!terminate) && 8282 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8283 goto done; 8284#ifdef DEBUG_PUSH 8285 xmlGenericError(xmlGenericErrorContext, 8286 "PP: Parsing Comment\n"); 8287#endif 8288 xmlParseComment(ctxt); 8289 ctxt->instate = XML_PARSER_CONTENT; 8290 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8291 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8292 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8293 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8294 (ctxt->input->cur[8] == '[')) { 8295 SKIP(9); 8296 ctxt->instate = XML_PARSER_CDATA_SECTION; 8297#ifdef DEBUG_PUSH 8298 xmlGenericError(xmlGenericErrorContext, 8299 "PP: entering CDATA_SECTION\n"); 8300#endif 8301 break; 8302 } else if ((cur == '<') && (next == '!') && 8303 (avail < 9)) { 8304 goto done; 8305 } else if ((cur == '<') && (next == '/')) { 8306 ctxt->instate = XML_PARSER_END_TAG; 8307#ifdef DEBUG_PUSH 8308 xmlGenericError(xmlGenericErrorContext, 8309 "PP: entering END_TAG\n"); 8310#endif 8311 break; 8312 } else if (cur == '<') { 8313 ctxt->instate = XML_PARSER_START_TAG; 8314#ifdef DEBUG_PUSH 8315 xmlGenericError(xmlGenericErrorContext, 8316 "PP: entering START_TAG\n"); 8317#endif 8318 break; 8319 } else if (cur == '&') { 8320 if ((!terminate) && 8321 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8322 goto done; 8323#ifdef DEBUG_PUSH 8324 xmlGenericError(xmlGenericErrorContext, 8325 "PP: Parsing Reference\n"); 8326#endif 8327 xmlParseReference(ctxt); 8328 } else { 8329 /* TODO Avoid the extra copy, handle directly !!! */ 8330 /* 8331 * Goal of the following test is: 8332 * - minimize calls to the SAX 'character' callback 8333 * when they are mergeable 8334 * - handle an problem for isBlank when we only parse 8335 * a sequence of blank chars and the next one is 8336 * not available to check against '<' presence. 8337 * - tries to homogenize the differences in SAX 8338 * callbacks beween the push and pull versions 8339 * of the parser. 8340 */ 8341 if ((ctxt->inputNr == 1) && 8342 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8343 if ((!terminate) && 8344 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8345 goto done; 8346 } 8347 ctxt->checkIndex = 0; 8348#ifdef DEBUG_PUSH 8349 xmlGenericError(xmlGenericErrorContext, 8350 "PP: Parsing char data\n"); 8351#endif 8352 xmlParseCharData(ctxt, 0); 8353 } 8354 /* 8355 * Pop-up of finished entities. 8356 */ 8357 while ((RAW == 0) && (ctxt->inputNr > 1)) 8358 xmlPopInput(ctxt); 8359 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 8360 (tok == ctxt->token)) { 8361 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8363 ctxt->sax->error(ctxt->userData, 8364 "detected an error in element content\n"); 8365 ctxt->wellFormed = 0; 8366 ctxt->disableSAX = 1; 8367 ctxt->instate = XML_PARSER_EOF; 8368 break; 8369 } 8370 break; 8371 } 8372 case XML_PARSER_CDATA_SECTION: { 8373 /* 8374 * The Push mode need to have the SAX callback for 8375 * cdataBlock merge back contiguous callbacks. 8376 */ 8377 int base; 8378 8379 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8380 if (base < 0) { 8381 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8383 if (ctxt->sax->cdataBlock != NULL) 8384 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8385 XML_PARSER_BIG_BUFFER_SIZE); 8386 } 8387 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8388 ctxt->checkIndex = 0; 8389 } 8390 goto done; 8391 } else { 8392 if ((ctxt->sax != NULL) && (base > 0) && 8393 (!ctxt->disableSAX)) { 8394 if (ctxt->sax->cdataBlock != NULL) 8395 ctxt->sax->cdataBlock(ctxt->userData, 8396 ctxt->input->cur, base); 8397 } 8398 SKIP(base + 3); 8399 ctxt->checkIndex = 0; 8400 ctxt->instate = XML_PARSER_CONTENT; 8401#ifdef DEBUG_PUSH 8402 xmlGenericError(xmlGenericErrorContext, 8403 "PP: entering CONTENT\n"); 8404#endif 8405 } 8406 break; 8407 } 8408 case XML_PARSER_END_TAG: 8409 if (avail < 2) 8410 goto done; 8411 if ((!terminate) && 8412 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8413 goto done; 8414 xmlParseEndTag(ctxt); 8415 if (ctxt->name == NULL) { 8416 ctxt->instate = XML_PARSER_EPILOG; 8417#ifdef DEBUG_PUSH 8418 xmlGenericError(xmlGenericErrorContext, 8419 "PP: entering EPILOG\n"); 8420#endif 8421 } else { 8422 ctxt->instate = XML_PARSER_CONTENT; 8423#ifdef DEBUG_PUSH 8424 xmlGenericError(xmlGenericErrorContext, 8425 "PP: entering CONTENT\n"); 8426#endif 8427 } 8428 break; 8429 case XML_PARSER_DTD: { 8430 /* 8431 * Sorry but progressive parsing of the internal subset 8432 * is not expected to be supported. We first check that 8433 * the full content of the internal subset is available and 8434 * the parsing is launched only at that point. 8435 * Internal subset ends up with "']' S? '>'" in an unescaped 8436 * section and not in a ']]>' sequence which are conditional 8437 * sections (whoever argued to keep that crap in XML deserve 8438 * a place in hell !). 8439 */ 8440 int base, i; 8441 xmlChar *buf; 8442 xmlChar quote = 0; 8443 8444 base = ctxt->input->cur - ctxt->input->base; 8445 if (base < 0) return(0); 8446 if (ctxt->checkIndex > base) 8447 base = ctxt->checkIndex; 8448 buf = ctxt->input->buf->buffer->content; 8449 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8450 base++) { 8451 if (quote != 0) { 8452 if (buf[base] == quote) 8453 quote = 0; 8454 continue; 8455 } 8456 if (buf[base] == '"') { 8457 quote = '"'; 8458 continue; 8459 } 8460 if (buf[base] == '\'') { 8461 quote = '\''; 8462 continue; 8463 } 8464 if (buf[base] == ']') { 8465 if ((unsigned int) base +1 >= 8466 ctxt->input->buf->buffer->use) 8467 break; 8468 if (buf[base + 1] == ']') { 8469 /* conditional crap, skip both ']' ! */ 8470 base++; 8471 continue; 8472 } 8473 for (i = 0; 8474 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8475 i++) { 8476 if (buf[base + i] == '>') 8477 goto found_end_int_subset; 8478 } 8479 break; 8480 } 8481 } 8482 /* 8483 * We didn't found the end of the Internal subset 8484 */ 8485 if (quote == 0) 8486 ctxt->checkIndex = base; 8487#ifdef DEBUG_PUSH 8488 if (next == 0) 8489 xmlGenericError(xmlGenericErrorContext, 8490 "PP: lookup of int subset end filed\n"); 8491#endif 8492 goto done; 8493 8494found_end_int_subset: 8495 xmlParseInternalSubset(ctxt); 8496 ctxt->inSubset = 2; 8497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8498 (ctxt->sax->externalSubset != NULL)) 8499 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8500 ctxt->extSubSystem, ctxt->extSubURI); 8501 ctxt->inSubset = 0; 8502 ctxt->instate = XML_PARSER_PROLOG; 8503 ctxt->checkIndex = 0; 8504#ifdef DEBUG_PUSH 8505 xmlGenericError(xmlGenericErrorContext, 8506 "PP: entering PROLOG\n"); 8507#endif 8508 break; 8509 } 8510 case XML_PARSER_COMMENT: 8511 xmlGenericError(xmlGenericErrorContext, 8512 "PP: internal error, state == COMMENT\n"); 8513 ctxt->instate = XML_PARSER_CONTENT; 8514#ifdef DEBUG_PUSH 8515 xmlGenericError(xmlGenericErrorContext, 8516 "PP: entering CONTENT\n"); 8517#endif 8518 break; 8519 case XML_PARSER_PI: 8520 xmlGenericError(xmlGenericErrorContext, 8521 "PP: internal error, state == PI\n"); 8522 ctxt->instate = XML_PARSER_CONTENT; 8523#ifdef DEBUG_PUSH 8524 xmlGenericError(xmlGenericErrorContext, 8525 "PP: entering CONTENT\n"); 8526#endif 8527 break; 8528 case XML_PARSER_ENTITY_DECL: 8529 xmlGenericError(xmlGenericErrorContext, 8530 "PP: internal error, state == ENTITY_DECL\n"); 8531 ctxt->instate = XML_PARSER_DTD; 8532#ifdef DEBUG_PUSH 8533 xmlGenericError(xmlGenericErrorContext, 8534 "PP: entering DTD\n"); 8535#endif 8536 break; 8537 case XML_PARSER_ENTITY_VALUE: 8538 xmlGenericError(xmlGenericErrorContext, 8539 "PP: internal error, state == ENTITY_VALUE\n"); 8540 ctxt->instate = XML_PARSER_CONTENT; 8541#ifdef DEBUG_PUSH 8542 xmlGenericError(xmlGenericErrorContext, 8543 "PP: entering DTD\n"); 8544#endif 8545 break; 8546 case XML_PARSER_ATTRIBUTE_VALUE: 8547 xmlGenericError(xmlGenericErrorContext, 8548 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8549 ctxt->instate = XML_PARSER_START_TAG; 8550#ifdef DEBUG_PUSH 8551 xmlGenericError(xmlGenericErrorContext, 8552 "PP: entering START_TAG\n"); 8553#endif 8554 break; 8555 case XML_PARSER_SYSTEM_LITERAL: 8556 xmlGenericError(xmlGenericErrorContext, 8557 "PP: internal error, state == SYSTEM_LITERAL\n"); 8558 ctxt->instate = XML_PARSER_START_TAG; 8559#ifdef DEBUG_PUSH 8560 xmlGenericError(xmlGenericErrorContext, 8561 "PP: entering START_TAG\n"); 8562#endif 8563 break; 8564 } 8565 } 8566done: 8567#ifdef DEBUG_PUSH 8568 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8569#endif 8570 return(ret); 8571} 8572 8573/** 8574 * xmlParseChunk: 8575 * @ctxt: an XML parser context 8576 * @chunk: an char array 8577 * @size: the size in byte of the chunk 8578 * @terminate: last chunk indicator 8579 * 8580 * Parse a Chunk of memory 8581 * 8582 * Returns zero if no error, the xmlParserErrors otherwise. 8583 */ 8584int 8585xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8586 int terminate) { 8587 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8588 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8589 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8590 int cur = ctxt->input->cur - ctxt->input->base; 8591 8592 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8593 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8594 ctxt->input->cur = ctxt->input->base + cur; 8595 ctxt->input->end = 8596 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8597#ifdef DEBUG_PUSH 8598 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8599#endif 8600 8601 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8602 xmlParseTryOrFinish(ctxt, terminate); 8603 } else if (ctxt->instate != XML_PARSER_EOF) { 8604 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8605 xmlParserInputBufferPtr in = ctxt->input->buf; 8606 if ((in->encoder != NULL) && (in->buffer != NULL) && 8607 (in->raw != NULL)) { 8608 int nbchars; 8609 8610 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8611 if (nbchars < 0) { 8612 xmlGenericError(xmlGenericErrorContext, 8613 "xmlParseChunk: encoder error\n"); 8614 return(XML_ERR_INVALID_ENCODING); 8615 } 8616 } 8617 } 8618 } 8619 xmlParseTryOrFinish(ctxt, terminate); 8620 if (terminate) { 8621 /* 8622 * Check for termination 8623 */ 8624 if ((ctxt->instate != XML_PARSER_EOF) && 8625 (ctxt->instate != XML_PARSER_EPILOG)) { 8626 ctxt->errNo = XML_ERR_DOCUMENT_END; 8627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8628 ctxt->sax->error(ctxt->userData, 8629 "Extra content at the end of the document\n"); 8630 ctxt->wellFormed = 0; 8631 ctxt->disableSAX = 1; 8632 } 8633 if (ctxt->instate != XML_PARSER_EOF) { 8634 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8635 (!ctxt->disableSAX)) 8636 ctxt->sax->endDocument(ctxt->userData); 8637 } 8638 ctxt->instate = XML_PARSER_EOF; 8639 } 8640 return((xmlParserErrors) ctxt->errNo); 8641} 8642 8643/************************************************************************ 8644 * * 8645 * I/O front end functions to the parser * 8646 * * 8647 ************************************************************************/ 8648 8649/** 8650 * xmlStopParser: 8651 * @ctxt: an XML parser context 8652 * 8653 * Blocks further parser processing 8654 */ 8655void 8656xmlStopParser(xmlParserCtxtPtr ctxt) { 8657 ctxt->instate = XML_PARSER_EOF; 8658 if (ctxt->input != NULL) 8659 ctxt->input->cur = BAD_CAST""; 8660} 8661 8662/** 8663 * xmlCreatePushParserCtxt: 8664 * @sax: a SAX handler 8665 * @user_data: The user data returned on SAX callbacks 8666 * @chunk: a pointer to an array of chars 8667 * @size: number of chars in the array 8668 * @filename: an optional file name or URI 8669 * 8670 * Create a parser context for using the XML parser in push mode 8671 * To allow content encoding detection, @size should be >= 4 8672 * The value of @filename is used for fetching external entities 8673 * and error/warning reports. 8674 * 8675 * Returns the new parser context or NULL 8676 */ 8677xmlParserCtxtPtr 8678xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8679 const char *chunk, int size, const char *filename) { 8680 xmlParserCtxtPtr ctxt; 8681 xmlParserInputPtr inputStream; 8682 xmlParserInputBufferPtr buf; 8683 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8684 8685 /* 8686 * plug some encoding conversion routines 8687 */ 8688 if ((chunk != NULL) && (size >= 4)) 8689 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8690 8691 buf = xmlAllocParserInputBuffer(enc); 8692 if (buf == NULL) return(NULL); 8693 8694 ctxt = xmlNewParserCtxt(); 8695 if (ctxt == NULL) { 8696 xmlFree(buf); 8697 return(NULL); 8698 } 8699 if (sax != NULL) { 8700 if (ctxt->sax != &xmlDefaultSAXHandler) 8701 xmlFree(ctxt->sax); 8702 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8703 if (ctxt->sax == NULL) { 8704 xmlFree(buf); 8705 xmlFree(ctxt); 8706 return(NULL); 8707 } 8708 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8709 if (user_data != NULL) 8710 ctxt->userData = user_data; 8711 } 8712 if (filename == NULL) { 8713 ctxt->directory = NULL; 8714 } else { 8715 ctxt->directory = xmlParserGetDirectory(filename); 8716 } 8717 8718 inputStream = xmlNewInputStream(ctxt); 8719 if (inputStream == NULL) { 8720 xmlFreeParserCtxt(ctxt); 8721 return(NULL); 8722 } 8723 8724 if (filename == NULL) 8725 inputStream->filename = NULL; 8726 else 8727 inputStream->filename = xmlMemStrdup(filename); 8728 inputStream->buf = buf; 8729 inputStream->base = inputStream->buf->buffer->content; 8730 inputStream->cur = inputStream->buf->buffer->content; 8731 inputStream->end = 8732 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 8733 8734 inputPush(ctxt, inputStream); 8735 8736 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8737 (ctxt->input->buf != NULL)) { 8738 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8739#ifdef DEBUG_PUSH 8740 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8741#endif 8742 } 8743 8744 if (enc != XML_CHAR_ENCODING_NONE) { 8745 xmlSwitchEncoding(ctxt, enc); 8746 } 8747 8748 return(ctxt); 8749} 8750 8751/** 8752 * xmlCreateIOParserCtxt: 8753 * @sax: a SAX handler 8754 * @user_data: The user data returned on SAX callbacks 8755 * @ioread: an I/O read function 8756 * @ioclose: an I/O close function 8757 * @ioctx: an I/O handler 8758 * @enc: the charset encoding if known 8759 * 8760 * Create a parser context for using the XML parser with an existing 8761 * I/O stream 8762 * 8763 * Returns the new parser context or NULL 8764 */ 8765xmlParserCtxtPtr 8766xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8767 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8768 void *ioctx, xmlCharEncoding enc) { 8769 xmlParserCtxtPtr ctxt; 8770 xmlParserInputPtr inputStream; 8771 xmlParserInputBufferPtr buf; 8772 8773 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8774 if (buf == NULL) return(NULL); 8775 8776 ctxt = xmlNewParserCtxt(); 8777 if (ctxt == NULL) { 8778 xmlFree(buf); 8779 return(NULL); 8780 } 8781 if (sax != NULL) { 8782 if (ctxt->sax != &xmlDefaultSAXHandler) 8783 xmlFree(ctxt->sax); 8784 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8785 if (ctxt->sax == NULL) { 8786 xmlFree(buf); 8787 xmlFree(ctxt); 8788 return(NULL); 8789 } 8790 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8791 if (user_data != NULL) 8792 ctxt->userData = user_data; 8793 } 8794 8795 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8796 if (inputStream == NULL) { 8797 xmlFreeParserCtxt(ctxt); 8798 return(NULL); 8799 } 8800 inputPush(ctxt, inputStream); 8801 8802 return(ctxt); 8803} 8804 8805/************************************************************************ 8806 * * 8807 * Front ends when parsing a Dtd * 8808 * * 8809 ************************************************************************/ 8810 8811/** 8812 * xmlIOParseDTD: 8813 * @sax: the SAX handler block or NULL 8814 * @input: an Input Buffer 8815 * @enc: the charset encoding if known 8816 * 8817 * Load and parse a DTD 8818 * 8819 * Returns the resulting xmlDtdPtr or NULL in case of error. 8820 * @input will be freed at parsing end. 8821 */ 8822 8823xmlDtdPtr 8824xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 8825 xmlCharEncoding enc) { 8826 xmlDtdPtr ret = NULL; 8827 xmlParserCtxtPtr ctxt; 8828 xmlParserInputPtr pinput = NULL; 8829 xmlChar start[4]; 8830 8831 if (input == NULL) 8832 return(NULL); 8833 8834 ctxt = xmlNewParserCtxt(); 8835 if (ctxt == NULL) { 8836 return(NULL); 8837 } 8838 8839 /* 8840 * Set-up the SAX context 8841 */ 8842 if (sax != NULL) { 8843 if (ctxt->sax != NULL) 8844 xmlFree(ctxt->sax); 8845 ctxt->sax = sax; 8846 ctxt->userData = NULL; 8847 } 8848 8849 /* 8850 * generate a parser input from the I/O handler 8851 */ 8852 8853 pinput = xmlNewIOInputStream(ctxt, input, enc); 8854 if (pinput == NULL) { 8855 if (sax != NULL) ctxt->sax = NULL; 8856 xmlFreeParserCtxt(ctxt); 8857 return(NULL); 8858 } 8859 8860 /* 8861 * plug some encoding conversion routines here. 8862 */ 8863 xmlPushInput(ctxt, pinput); 8864 8865 pinput->filename = NULL; 8866 pinput->line = 1; 8867 pinput->col = 1; 8868 pinput->base = ctxt->input->cur; 8869 pinput->cur = ctxt->input->cur; 8870 pinput->free = NULL; 8871 8872 /* 8873 * let's parse that entity knowing it's an external subset. 8874 */ 8875 ctxt->inSubset = 2; 8876 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8877 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8878 BAD_CAST "none", BAD_CAST "none"); 8879 8880 if (enc == XML_CHAR_ENCODING_NONE) { 8881 /* 8882 * Get the 4 first bytes and decode the charset 8883 * if enc != XML_CHAR_ENCODING_NONE 8884 * plug some encoding conversion routines. 8885 */ 8886 start[0] = RAW; 8887 start[1] = NXT(1); 8888 start[2] = NXT(2); 8889 start[3] = NXT(3); 8890 enc = xmlDetectCharEncoding(start, 4); 8891 if (enc != XML_CHAR_ENCODING_NONE) { 8892 xmlSwitchEncoding(ctxt, enc); 8893 } 8894 } 8895 8896 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 8897 8898 if (ctxt->myDoc != NULL) { 8899 if (ctxt->wellFormed) { 8900 ret = ctxt->myDoc->extSubset; 8901 ctxt->myDoc->extSubset = NULL; 8902 } else { 8903 ret = NULL; 8904 } 8905 xmlFreeDoc(ctxt->myDoc); 8906 ctxt->myDoc = NULL; 8907 } 8908 if (sax != NULL) ctxt->sax = NULL; 8909 xmlFreeParserCtxt(ctxt); 8910 8911 return(ret); 8912} 8913 8914/** 8915 * xmlSAXParseDTD: 8916 * @sax: the SAX handler block 8917 * @ExternalID: a NAME* containing the External ID of the DTD 8918 * @SystemID: a NAME* containing the URL to the DTD 8919 * 8920 * Load and parse an external subset. 8921 * 8922 * Returns the resulting xmlDtdPtr or NULL in case of error. 8923 */ 8924 8925xmlDtdPtr 8926xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 8927 const xmlChar *SystemID) { 8928 xmlDtdPtr ret = NULL; 8929 xmlParserCtxtPtr ctxt; 8930 xmlParserInputPtr input = NULL; 8931 xmlCharEncoding enc; 8932 8933 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 8934 8935 ctxt = xmlNewParserCtxt(); 8936 if (ctxt == NULL) { 8937 return(NULL); 8938 } 8939 8940 /* 8941 * Set-up the SAX context 8942 */ 8943 if (sax != NULL) { 8944 if (ctxt->sax != NULL) 8945 xmlFree(ctxt->sax); 8946 ctxt->sax = sax; 8947 ctxt->userData = NULL; 8948 } 8949 8950 /* 8951 * Ask the Entity resolver to load the damn thing 8952 */ 8953 8954 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8955 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8956 if (input == NULL) { 8957 if (sax != NULL) ctxt->sax = NULL; 8958 xmlFreeParserCtxt(ctxt); 8959 return(NULL); 8960 } 8961 8962 /* 8963 * plug some encoding conversion routines here. 8964 */ 8965 xmlPushInput(ctxt, input); 8966 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 8967 xmlSwitchEncoding(ctxt, enc); 8968 8969 if (input->filename == NULL) 8970 input->filename = (char *) xmlStrdup(SystemID); 8971 input->line = 1; 8972 input->col = 1; 8973 input->base = ctxt->input->cur; 8974 input->cur = ctxt->input->cur; 8975 input->free = NULL; 8976 8977 /* 8978 * let's parse that entity knowing it's an external subset. 8979 */ 8980 ctxt->inSubset = 2; 8981 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8982 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8983 ExternalID, SystemID); 8984 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8985 8986 if (ctxt->myDoc != NULL) { 8987 if (ctxt->wellFormed) { 8988 ret = ctxt->myDoc->extSubset; 8989 ctxt->myDoc->extSubset = NULL; 8990 } else { 8991 ret = NULL; 8992 } 8993 xmlFreeDoc(ctxt->myDoc); 8994 ctxt->myDoc = NULL; 8995 } 8996 if (sax != NULL) ctxt->sax = NULL; 8997 xmlFreeParserCtxt(ctxt); 8998 8999 return(ret); 9000} 9001 9002/** 9003 * xmlParseDTD: 9004 * @ExternalID: a NAME* containing the External ID of the DTD 9005 * @SystemID: a NAME* containing the URL to the DTD 9006 * 9007 * Load and parse an external subset. 9008 * 9009 * Returns the resulting xmlDtdPtr or NULL in case of error. 9010 */ 9011 9012xmlDtdPtr 9013xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 9014 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 9015} 9016 9017/************************************************************************ 9018 * * 9019 * Front ends when parsing an Entity * 9020 * * 9021 ************************************************************************/ 9022 9023/** 9024 * xmlParseCtxtExternalEntity: 9025 * @ctx: the existing parsing context 9026 * @URL: the URL for the entity to load 9027 * @ID: the System ID for the entity to load 9028 * @lst: the return value for the set of parsed nodes 9029 * 9030 * Parse an external general entity within an existing parsing context 9031 * An external general parsed entity is well-formed if it matches the 9032 * production labeled extParsedEnt. 9033 * 9034 * [78] extParsedEnt ::= TextDecl? content 9035 * 9036 * Returns 0 if the entity is well formed, -1 in case of args problem and 9037 * the parser error code otherwise 9038 */ 9039 9040int 9041xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 9042 const xmlChar *ID, xmlNodePtr *lst) { 9043 xmlParserCtxtPtr ctxt; 9044 xmlDocPtr newDoc; 9045 xmlSAXHandlerPtr oldsax = NULL; 9046 int ret = 0; 9047 xmlChar start[4]; 9048 xmlCharEncoding enc; 9049 9050 if (ctx->depth > 40) { 9051 return(XML_ERR_ENTITY_LOOP); 9052 } 9053 9054 if (lst != NULL) 9055 *lst = NULL; 9056 if ((URL == NULL) && (ID == NULL)) 9057 return(-1); 9058 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 9059 return(-1); 9060 9061 9062 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9063 if (ctxt == NULL) return(-1); 9064 ctxt->userData = ctxt; 9065 oldsax = ctxt->sax; 9066 ctxt->sax = ctx->sax; 9067 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9068 if (newDoc == NULL) { 9069 xmlFreeParserCtxt(ctxt); 9070 return(-1); 9071 } 9072 if (ctx->myDoc != NULL) { 9073 newDoc->intSubset = ctx->myDoc->intSubset; 9074 newDoc->extSubset = ctx->myDoc->extSubset; 9075 } 9076 if (ctx->myDoc->URL != NULL) { 9077 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 9078 } 9079 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9080 if (newDoc->children == NULL) { 9081 ctxt->sax = oldsax; 9082 xmlFreeParserCtxt(ctxt); 9083 newDoc->intSubset = NULL; 9084 newDoc->extSubset = NULL; 9085 xmlFreeDoc(newDoc); 9086 return(-1); 9087 } 9088 nodePush(ctxt, newDoc->children); 9089 if (ctx->myDoc == NULL) { 9090 ctxt->myDoc = newDoc; 9091 } else { 9092 ctxt->myDoc = ctx->myDoc; 9093 newDoc->children->doc = ctx->myDoc; 9094 } 9095 9096 /* 9097 * Get the 4 first bytes and decode the charset 9098 * if enc != XML_CHAR_ENCODING_NONE 9099 * plug some encoding conversion routines. 9100 */ 9101 GROW 9102 start[0] = RAW; 9103 start[1] = NXT(1); 9104 start[2] = NXT(2); 9105 start[3] = NXT(3); 9106 enc = xmlDetectCharEncoding(start, 4); 9107 if (enc != XML_CHAR_ENCODING_NONE) { 9108 xmlSwitchEncoding(ctxt, enc); 9109 } 9110 9111 /* 9112 * Parse a possible text declaration first 9113 */ 9114 if ((RAW == '<') && (NXT(1) == '?') && 9115 (NXT(2) == 'x') && (NXT(3) == 'm') && 9116 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9117 xmlParseTextDecl(ctxt); 9118 } 9119 9120 /* 9121 * Doing validity checking on chunk doesn't make sense 9122 */ 9123 ctxt->instate = XML_PARSER_CONTENT; 9124 ctxt->validate = ctx->validate; 9125 ctxt->loadsubset = ctx->loadsubset; 9126 ctxt->depth = ctx->depth + 1; 9127 ctxt->replaceEntities = ctx->replaceEntities; 9128 if (ctxt->validate) { 9129 ctxt->vctxt.error = ctx->vctxt.error; 9130 ctxt->vctxt.warning = ctx->vctxt.warning; 9131 } else { 9132 ctxt->vctxt.error = NULL; 9133 ctxt->vctxt.warning = NULL; 9134 } 9135 ctxt->vctxt.nodeTab = NULL; 9136 ctxt->vctxt.nodeNr = 0; 9137 ctxt->vctxt.nodeMax = 0; 9138 ctxt->vctxt.node = NULL; 9139 9140 xmlParseContent(ctxt); 9141 9142 if ((RAW == '<') && (NXT(1) == '/')) { 9143 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9145 ctxt->sax->error(ctxt->userData, 9146 "chunk is not well balanced\n"); 9147 ctxt->wellFormed = 0; 9148 ctxt->disableSAX = 1; 9149 } else if (RAW != 0) { 9150 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9152 ctxt->sax->error(ctxt->userData, 9153 "extra content at the end of well balanced chunk\n"); 9154 ctxt->wellFormed = 0; 9155 ctxt->disableSAX = 1; 9156 } 9157 if (ctxt->node != newDoc->children) { 9158 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9160 ctxt->sax->error(ctxt->userData, 9161 "chunk is not well balanced\n"); 9162 ctxt->wellFormed = 0; 9163 ctxt->disableSAX = 1; 9164 } 9165 9166 if (!ctxt->wellFormed) { 9167 if (ctxt->errNo == 0) 9168 ret = 1; 9169 else 9170 ret = ctxt->errNo; 9171 } else { 9172 if (lst != NULL) { 9173 xmlNodePtr cur; 9174 9175 /* 9176 * Return the newly created nodeset after unlinking it from 9177 * they pseudo parent. 9178 */ 9179 cur = newDoc->children->children; 9180 *lst = cur; 9181 while (cur != NULL) { 9182 cur->parent = NULL; 9183 cur = cur->next; 9184 } 9185 newDoc->children->children = NULL; 9186 } 9187 ret = 0; 9188 } 9189 ctxt->sax = oldsax; 9190 xmlFreeParserCtxt(ctxt); 9191 newDoc->intSubset = NULL; 9192 newDoc->extSubset = NULL; 9193 xmlFreeDoc(newDoc); 9194 9195 return(ret); 9196} 9197 9198/** 9199 * xmlParseExternalEntityPrivate: 9200 * @doc: the document the chunk pertains to 9201 * @oldctxt: the previous parser context if available 9202 * @sax: the SAX handler bloc (possibly NULL) 9203 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9204 * @depth: Used for loop detection, use 0 9205 * @URL: the URL for the entity to load 9206 * @ID: the System ID for the entity to load 9207 * @list: the return value for the set of parsed nodes 9208 * 9209 * Private version of xmlParseExternalEntity() 9210 * 9211 * Returns 0 if the entity is well formed, -1 in case of args problem and 9212 * the parser error code otherwise 9213 */ 9214 9215static int 9216xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 9217 xmlSAXHandlerPtr sax, 9218 void *user_data, int depth, const xmlChar *URL, 9219 const xmlChar *ID, xmlNodePtr *list) { 9220 xmlParserCtxtPtr ctxt; 9221 xmlDocPtr newDoc; 9222 xmlSAXHandlerPtr oldsax = NULL; 9223 int ret = 0; 9224 xmlChar start[4]; 9225 xmlCharEncoding enc; 9226 9227 if (depth > 40) { 9228 return(XML_ERR_ENTITY_LOOP); 9229 } 9230 9231 9232 9233 if (list != NULL) 9234 *list = NULL; 9235 if ((URL == NULL) && (ID == NULL)) 9236 return(-1); 9237 if (doc == NULL) /* @@ relax but check for dereferences */ 9238 return(-1); 9239 9240 9241 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9242 if (ctxt == NULL) return(-1); 9243 ctxt->userData = ctxt; 9244 if (oldctxt != NULL) { 9245 ctxt->_private = oldctxt->_private; 9246 ctxt->loadsubset = oldctxt->loadsubset; 9247 ctxt->validate = oldctxt->validate; 9248 ctxt->external = oldctxt->external; 9249 } else { 9250 /* 9251 * Doing validity checking on chunk without context 9252 * doesn't make sense 9253 */ 9254 ctxt->_private = NULL; 9255 ctxt->validate = 0; 9256 ctxt->external = 2; 9257 ctxt->loadsubset = 0; 9258 } 9259 if (sax != NULL) { 9260 oldsax = ctxt->sax; 9261 ctxt->sax = sax; 9262 if (user_data != NULL) 9263 ctxt->userData = user_data; 9264 } 9265 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9266 if (newDoc == NULL) { 9267 xmlFreeParserCtxt(ctxt); 9268 return(-1); 9269 } 9270 if (doc != NULL) { 9271 newDoc->intSubset = doc->intSubset; 9272 newDoc->extSubset = doc->extSubset; 9273 } 9274 if (doc->URL != NULL) { 9275 newDoc->URL = xmlStrdup(doc->URL); 9276 } 9277 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9278 if (newDoc->children == NULL) { 9279 if (sax != NULL) 9280 ctxt->sax = oldsax; 9281 xmlFreeParserCtxt(ctxt); 9282 newDoc->intSubset = NULL; 9283 newDoc->extSubset = NULL; 9284 xmlFreeDoc(newDoc); 9285 return(-1); 9286 } 9287 nodePush(ctxt, newDoc->children); 9288 if (doc == NULL) { 9289 ctxt->myDoc = newDoc; 9290 } else { 9291 ctxt->myDoc = doc; 9292 newDoc->children->doc = doc; 9293 } 9294 9295 /* 9296 * Get the 4 first bytes and decode the charset 9297 * if enc != XML_CHAR_ENCODING_NONE 9298 * plug some encoding conversion routines. 9299 */ 9300 GROW; 9301 start[0] = RAW; 9302 start[1] = NXT(1); 9303 start[2] = NXT(2); 9304 start[3] = NXT(3); 9305 enc = xmlDetectCharEncoding(start, 4); 9306 if (enc != XML_CHAR_ENCODING_NONE) { 9307 xmlSwitchEncoding(ctxt, enc); 9308 } 9309 9310 /* 9311 * Parse a possible text declaration first 9312 */ 9313 if ((RAW == '<') && (NXT(1) == '?') && 9314 (NXT(2) == 'x') && (NXT(3) == 'm') && 9315 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9316 xmlParseTextDecl(ctxt); 9317 } 9318 9319 ctxt->instate = XML_PARSER_CONTENT; 9320 ctxt->depth = depth; 9321 9322 xmlParseContent(ctxt); 9323 9324 if ((RAW == '<') && (NXT(1) == '/')) { 9325 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9327 ctxt->sax->error(ctxt->userData, 9328 "chunk is not well balanced\n"); 9329 ctxt->wellFormed = 0; 9330 ctxt->disableSAX = 1; 9331 } else if (RAW != 0) { 9332 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9334 ctxt->sax->error(ctxt->userData, 9335 "extra content at the end of well balanced chunk\n"); 9336 ctxt->wellFormed = 0; 9337 ctxt->disableSAX = 1; 9338 } 9339 if (ctxt->node != newDoc->children) { 9340 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9342 ctxt->sax->error(ctxt->userData, 9343 "chunk is not well balanced\n"); 9344 ctxt->wellFormed = 0; 9345 ctxt->disableSAX = 1; 9346 } 9347 9348 if (!ctxt->wellFormed) { 9349 if (ctxt->errNo == 0) 9350 ret = 1; 9351 else 9352 ret = ctxt->errNo; 9353 } else { 9354 if (list != NULL) { 9355 xmlNodePtr cur; 9356 9357 /* 9358 * Return the newly created nodeset after unlinking it from 9359 * they pseudo parent. 9360 */ 9361 cur = newDoc->children->children; 9362 *list = cur; 9363 while (cur != NULL) { 9364 cur->parent = NULL; 9365 cur = cur->next; 9366 } 9367 newDoc->children->children = NULL; 9368 } 9369 ret = 0; 9370 } 9371 if (sax != NULL) 9372 ctxt->sax = oldsax; 9373 xmlFreeParserCtxt(ctxt); 9374 newDoc->intSubset = NULL; 9375 newDoc->extSubset = NULL; 9376 xmlFreeDoc(newDoc); 9377 9378 return(ret); 9379} 9380 9381/** 9382 * xmlParseExternalEntity: 9383 * @doc: the document the chunk pertains to 9384 * @sax: the SAX handler bloc (possibly NULL) 9385 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9386 * @depth: Used for loop detection, use 0 9387 * @URL: the URL for the entity to load 9388 * @ID: the System ID for the entity to load 9389 * @lst: the return value for the set of parsed nodes 9390 * 9391 * Parse an external general entity 9392 * An external general parsed entity is well-formed if it matches the 9393 * production labeled extParsedEnt. 9394 * 9395 * [78] extParsedEnt ::= TextDecl? content 9396 * 9397 * Returns 0 if the entity is well formed, -1 in case of args problem and 9398 * the parser error code otherwise 9399 */ 9400 9401int 9402xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 9403 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 9404 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 9405 ID, lst)); 9406} 9407 9408/** 9409 * xmlParseBalancedChunkMemory: 9410 * @doc: the document the chunk pertains to 9411 * @sax: the SAX handler bloc (possibly NULL) 9412 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9413 * @depth: Used for loop detection, use 0 9414 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9415 * @lst: the return value for the set of parsed nodes 9416 * 9417 * Parse a well-balanced chunk of an XML document 9418 * called by the parser 9419 * The allowed sequence for the Well Balanced Chunk is the one defined by 9420 * the content production in the XML grammar: 9421 * 9422 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9423 * 9424 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9425 * the parser error code otherwise 9426 */ 9427 9428int 9429xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9430 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 9431 xmlParserCtxtPtr ctxt; 9432 xmlDocPtr newDoc; 9433 xmlSAXHandlerPtr oldsax = NULL; 9434 int size; 9435 int ret = 0; 9436 9437 if (depth > 40) { 9438 return(XML_ERR_ENTITY_LOOP); 9439 } 9440 9441 9442 if (lst != NULL) 9443 *lst = NULL; 9444 if (string == NULL) 9445 return(-1); 9446 9447 size = xmlStrlen(string); 9448 9449 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9450 if (ctxt == NULL) return(-1); 9451 ctxt->userData = ctxt; 9452 if (sax != NULL) { 9453 oldsax = ctxt->sax; 9454 ctxt->sax = sax; 9455 if (user_data != NULL) 9456 ctxt->userData = user_data; 9457 } 9458 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9459 if (newDoc == NULL) { 9460 xmlFreeParserCtxt(ctxt); 9461 return(-1); 9462 } 9463 if (doc != NULL) { 9464 newDoc->intSubset = doc->intSubset; 9465 newDoc->extSubset = doc->extSubset; 9466 } 9467 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9468 if (newDoc->children == NULL) { 9469 if (sax != NULL) 9470 ctxt->sax = oldsax; 9471 xmlFreeParserCtxt(ctxt); 9472 newDoc->intSubset = NULL; 9473 newDoc->extSubset = NULL; 9474 xmlFreeDoc(newDoc); 9475 return(-1); 9476 } 9477 nodePush(ctxt, newDoc->children); 9478 if (doc == NULL) { 9479 ctxt->myDoc = newDoc; 9480 } else { 9481 ctxt->myDoc = doc; 9482 newDoc->children->doc = doc; 9483 } 9484 ctxt->instate = XML_PARSER_CONTENT; 9485 ctxt->depth = depth; 9486 9487 /* 9488 * Doing validity checking on chunk doesn't make sense 9489 */ 9490 ctxt->validate = 0; 9491 ctxt->loadsubset = 0; 9492 9493 xmlParseContent(ctxt); 9494 9495 if ((RAW == '<') && (NXT(1) == '/')) { 9496 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9498 ctxt->sax->error(ctxt->userData, 9499 "chunk is not well balanced\n"); 9500 ctxt->wellFormed = 0; 9501 ctxt->disableSAX = 1; 9502 } else if (RAW != 0) { 9503 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9505 ctxt->sax->error(ctxt->userData, 9506 "extra content at the end of well balanced chunk\n"); 9507 ctxt->wellFormed = 0; 9508 ctxt->disableSAX = 1; 9509 } 9510 if (ctxt->node != newDoc->children) { 9511 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9513 ctxt->sax->error(ctxt->userData, 9514 "chunk is not well balanced\n"); 9515 ctxt->wellFormed = 0; 9516 ctxt->disableSAX = 1; 9517 } 9518 9519 if (!ctxt->wellFormed) { 9520 if (ctxt->errNo == 0) 9521 ret = 1; 9522 else 9523 ret = ctxt->errNo; 9524 } else { 9525 if (lst != NULL) { 9526 xmlNodePtr cur; 9527 9528 /* 9529 * Return the newly created nodeset after unlinking it from 9530 * they pseudo parent. 9531 */ 9532 cur = newDoc->children->children; 9533 *lst = cur; 9534 while (cur != NULL) { 9535 cur->parent = NULL; 9536 cur = cur->next; 9537 } 9538 newDoc->children->children = NULL; 9539 } 9540 ret = 0; 9541 } 9542 if (sax != NULL) 9543 ctxt->sax = oldsax; 9544 xmlFreeParserCtxt(ctxt); 9545 newDoc->intSubset = NULL; 9546 newDoc->extSubset = NULL; 9547 xmlFreeDoc(newDoc); 9548 9549 return(ret); 9550} 9551 9552/** 9553 * xmlSAXParseEntity: 9554 * @sax: the SAX handler block 9555 * @filename: the filename 9556 * 9557 * parse an XML external entity out of context and build a tree. 9558 * It use the given SAX function block to handle the parsing callback. 9559 * If sax is NULL, fallback to the default DOM tree building routines. 9560 * 9561 * [78] extParsedEnt ::= TextDecl? content 9562 * 9563 * This correspond to a "Well Balanced" chunk 9564 * 9565 * Returns the resulting document tree 9566 */ 9567 9568xmlDocPtr 9569xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9570 xmlDocPtr ret; 9571 xmlParserCtxtPtr ctxt; 9572 char *directory = NULL; 9573 9574 ctxt = xmlCreateFileParserCtxt(filename); 9575 if (ctxt == NULL) { 9576 return(NULL); 9577 } 9578 if (sax != NULL) { 9579 if (ctxt->sax != NULL) 9580 xmlFree(ctxt->sax); 9581 ctxt->sax = sax; 9582 ctxt->userData = NULL; 9583 } 9584 9585 if ((ctxt->directory == NULL) && (directory == NULL)) 9586 directory = xmlParserGetDirectory(filename); 9587 9588 xmlParseExtParsedEnt(ctxt); 9589 9590 if (ctxt->wellFormed) 9591 ret = ctxt->myDoc; 9592 else { 9593 ret = NULL; 9594 xmlFreeDoc(ctxt->myDoc); 9595 ctxt->myDoc = NULL; 9596 } 9597 if (sax != NULL) 9598 ctxt->sax = NULL; 9599 xmlFreeParserCtxt(ctxt); 9600 9601 return(ret); 9602} 9603 9604/** 9605 * xmlParseEntity: 9606 * @filename: the filename 9607 * 9608 * parse an XML external entity out of context and build a tree. 9609 * 9610 * [78] extParsedEnt ::= TextDecl? content 9611 * 9612 * This correspond to a "Well Balanced" chunk 9613 * 9614 * Returns the resulting document tree 9615 */ 9616 9617xmlDocPtr 9618xmlParseEntity(const char *filename) { 9619 return(xmlSAXParseEntity(NULL, filename)); 9620} 9621 9622/** 9623 * xmlCreateEntityParserCtxt: 9624 * @URL: the entity URL 9625 * @ID: the entity PUBLIC ID 9626 * @base: a posible base for the target URI 9627 * 9628 * Create a parser context for an external entity 9629 * Automatic support for ZLIB/Compress compressed document is provided 9630 * by default if found at compile-time. 9631 * 9632 * Returns the new parser context or NULL 9633 */ 9634xmlParserCtxtPtr 9635xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9636 const xmlChar *base) { 9637 xmlParserCtxtPtr ctxt; 9638 xmlParserInputPtr inputStream; 9639 char *directory = NULL; 9640 xmlChar *uri; 9641 9642 ctxt = xmlNewParserCtxt(); 9643 if (ctxt == NULL) { 9644 return(NULL); 9645 } 9646 9647 uri = xmlBuildURI(URL, base); 9648 9649 if (uri == NULL) { 9650 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9651 if (inputStream == NULL) { 9652 xmlFreeParserCtxt(ctxt); 9653 return(NULL); 9654 } 9655 9656 inputPush(ctxt, inputStream); 9657 9658 if ((ctxt->directory == NULL) && (directory == NULL)) 9659 directory = xmlParserGetDirectory((char *)URL); 9660 if ((ctxt->directory == NULL) && (directory != NULL)) 9661 ctxt->directory = directory; 9662 } else { 9663 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9664 if (inputStream == NULL) { 9665 xmlFree(uri); 9666 xmlFreeParserCtxt(ctxt); 9667 return(NULL); 9668 } 9669 9670 inputPush(ctxt, inputStream); 9671 9672 if ((ctxt->directory == NULL) && (directory == NULL)) 9673 directory = xmlParserGetDirectory((char *)uri); 9674 if ((ctxt->directory == NULL) && (directory != NULL)) 9675 ctxt->directory = directory; 9676 xmlFree(uri); 9677 } 9678 9679 return(ctxt); 9680} 9681 9682/************************************************************************ 9683 * * 9684 * Front ends when parsing from a file * 9685 * * 9686 ************************************************************************/ 9687 9688/** 9689 * xmlCreateFileParserCtxt: 9690 * @filename: the filename 9691 * 9692 * Create a parser context for a file content. 9693 * Automatic support for ZLIB/Compress compressed document is provided 9694 * by default if found at compile-time. 9695 * 9696 * Returns the new parser context or NULL 9697 */ 9698xmlParserCtxtPtr 9699xmlCreateFileParserCtxt(const char *filename) 9700{ 9701 xmlParserCtxtPtr ctxt; 9702 xmlParserInputPtr inputStream; 9703 char *directory = NULL; 9704 9705 ctxt = xmlNewParserCtxt(); 9706 if (ctxt == NULL) { 9707 if (xmlDefaultSAXHandler.error != NULL) { 9708 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9709 } 9710 return(NULL); 9711 } 9712 9713 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 9714 if (inputStream == NULL) { 9715 xmlFreeParserCtxt(ctxt); 9716 return(NULL); 9717 } 9718 9719 inputPush(ctxt, inputStream); 9720 if ((ctxt->directory == NULL) && (directory == NULL)) 9721 directory = xmlParserGetDirectory(filename); 9722 if ((ctxt->directory == NULL) && (directory != NULL)) 9723 ctxt->directory = directory; 9724 9725 return(ctxt); 9726} 9727 9728/** 9729 * xmlSAXParseFile: 9730 * @sax: the SAX handler block 9731 * @filename: the filename 9732 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9733 * documents 9734 * 9735 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9736 * compressed document is provided by default if found at compile-time. 9737 * It use the given SAX function block to handle the parsing callback. 9738 * If sax is NULL, fallback to the default DOM tree building routines. 9739 * 9740 * Returns the resulting document tree 9741 */ 9742 9743xmlDocPtr 9744xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9745 int recovery) { 9746 xmlDocPtr ret; 9747 xmlParserCtxtPtr ctxt; 9748 char *directory = NULL; 9749 9750 ctxt = xmlCreateFileParserCtxt(filename); 9751 if (ctxt == NULL) { 9752 return(NULL); 9753 } 9754 if (sax != NULL) { 9755 if (ctxt->sax != NULL) 9756 xmlFree(ctxt->sax); 9757 ctxt->sax = sax; 9758 } 9759 9760 if ((ctxt->directory == NULL) && (directory == NULL)) 9761 directory = xmlParserGetDirectory(filename); 9762 if ((ctxt->directory == NULL) && (directory != NULL)) 9763 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9764 9765 xmlParseDocument(ctxt); 9766 9767 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9768 else { 9769 ret = NULL; 9770 xmlFreeDoc(ctxt->myDoc); 9771 ctxt->myDoc = NULL; 9772 } 9773 if (sax != NULL) 9774 ctxt->sax = NULL; 9775 xmlFreeParserCtxt(ctxt); 9776 9777 return(ret); 9778} 9779 9780/** 9781 * xmlRecoverDoc: 9782 * @cur: a pointer to an array of xmlChar 9783 * 9784 * parse an XML in-memory document and build a tree. 9785 * In the case the document is not Well Formed, a tree is built anyway 9786 * 9787 * Returns the resulting document tree 9788 */ 9789 9790xmlDocPtr 9791xmlRecoverDoc(xmlChar *cur) { 9792 return(xmlSAXParseDoc(NULL, cur, 1)); 9793} 9794 9795/** 9796 * xmlParseFile: 9797 * @filename: the filename 9798 * 9799 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9800 * compressed document is provided by default if found at compile-time. 9801 * 9802 * Returns the resulting document tree if the file was wellformed, 9803 * NULL otherwise. 9804 */ 9805 9806xmlDocPtr 9807xmlParseFile(const char *filename) { 9808 return(xmlSAXParseFile(NULL, filename, 0)); 9809} 9810 9811/** 9812 * xmlRecoverFile: 9813 * @filename: the filename 9814 * 9815 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9816 * compressed document is provided by default if found at compile-time. 9817 * In the case the document is not Well Formed, a tree is built anyway 9818 * 9819 * Returns the resulting document tree 9820 */ 9821 9822xmlDocPtr 9823xmlRecoverFile(const char *filename) { 9824 return(xmlSAXParseFile(NULL, filename, 1)); 9825} 9826 9827 9828/** 9829 * xmlSetupParserForBuffer: 9830 * @ctxt: an XML parser context 9831 * @buffer: a xmlChar * buffer 9832 * @filename: a file name 9833 * 9834 * Setup the parser context to parse a new buffer; Clears any prior 9835 * contents from the parser context. The buffer parameter must not be 9836 * NULL, but the filename parameter can be 9837 */ 9838void 9839xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9840 const char* filename) 9841{ 9842 xmlParserInputPtr input; 9843 9844 input = xmlNewInputStream(ctxt); 9845 if (input == NULL) { 9846 perror("malloc"); 9847 xmlFree(ctxt); 9848 return; 9849 } 9850 9851 xmlClearParserCtxt(ctxt); 9852 if (filename != NULL) 9853 input->filename = xmlMemStrdup(filename); 9854 input->base = buffer; 9855 input->cur = buffer; 9856 input->end = &buffer[xmlStrlen(buffer)]; 9857 inputPush(ctxt, input); 9858} 9859 9860/** 9861 * xmlSAXUserParseFile: 9862 * @sax: a SAX handler 9863 * @user_data: The user data returned on SAX callbacks 9864 * @filename: a file name 9865 * 9866 * parse an XML file and call the given SAX handler routines. 9867 * Automatic support for ZLIB/Compress compressed document is provided 9868 * 9869 * Returns 0 in case of success or a error number otherwise 9870 */ 9871int 9872xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 9873 const char *filename) { 9874 int ret = 0; 9875 xmlParserCtxtPtr ctxt; 9876 9877 ctxt = xmlCreateFileParserCtxt(filename); 9878 if (ctxt == NULL) return -1; 9879 if (ctxt->sax != &xmlDefaultSAXHandler) 9880 xmlFree(ctxt->sax); 9881 ctxt->sax = sax; 9882 if (user_data != NULL) 9883 ctxt->userData = user_data; 9884 9885 xmlParseDocument(ctxt); 9886 9887 if (ctxt->wellFormed) 9888 ret = 0; 9889 else { 9890 if (ctxt->errNo != 0) 9891 ret = ctxt->errNo; 9892 else 9893 ret = -1; 9894 } 9895 if (sax != NULL) 9896 ctxt->sax = NULL; 9897 xmlFreeParserCtxt(ctxt); 9898 9899 return ret; 9900} 9901 9902/************************************************************************ 9903 * * 9904 * Front ends when parsing from memory * 9905 * * 9906 ************************************************************************/ 9907 9908/** 9909 * xmlCreateMemoryParserCtxt: 9910 * @buffer: a pointer to a char array 9911 * @size: the size of the array 9912 * 9913 * Create a parser context for an XML in-memory document. 9914 * 9915 * Returns the new parser context or NULL 9916 */ 9917xmlParserCtxtPtr 9918xmlCreateMemoryParserCtxt(const char *buffer, int size) { 9919 xmlParserCtxtPtr ctxt; 9920 xmlParserInputPtr input; 9921 xmlParserInputBufferPtr buf; 9922 9923 if (buffer == NULL) 9924 return(NULL); 9925 if (size <= 0) 9926 return(NULL); 9927 9928 ctxt = xmlNewParserCtxt(); 9929 if (ctxt == NULL) 9930 return(NULL); 9931 9932 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 9933 if (buf == NULL) return(NULL); 9934 9935 input = xmlNewInputStream(ctxt); 9936 if (input == NULL) { 9937 xmlFreeParserCtxt(ctxt); 9938 return(NULL); 9939 } 9940 9941 input->filename = NULL; 9942 input->buf = buf; 9943 input->base = input->buf->buffer->content; 9944 input->cur = input->buf->buffer->content; 9945 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 9946 9947 inputPush(ctxt, input); 9948 return(ctxt); 9949} 9950 9951/** 9952 * xmlSAXParseMemory: 9953 * @sax: the SAX handler block 9954 * @buffer: an pointer to a char array 9955 * @size: the size of the array 9956 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 9957 * documents 9958 * 9959 * parse an XML in-memory block and use the given SAX function block 9960 * to handle the parsing callback. If sax is NULL, fallback to the default 9961 * DOM tree building routines. 9962 * 9963 * Returns the resulting document tree 9964 */ 9965xmlDocPtr 9966xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 9967 int size, int recovery) { 9968 xmlDocPtr ret; 9969 xmlParserCtxtPtr ctxt; 9970 9971 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9972 if (ctxt == NULL) return(NULL); 9973 if (sax != NULL) { 9974 ctxt->sax = sax; 9975 } 9976 9977 xmlParseDocument(ctxt); 9978 9979 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9980 else { 9981 ret = NULL; 9982 xmlFreeDoc(ctxt->myDoc); 9983 ctxt->myDoc = NULL; 9984 } 9985 if (sax != NULL) 9986 ctxt->sax = NULL; 9987 xmlFreeParserCtxt(ctxt); 9988 9989 return(ret); 9990} 9991 9992/** 9993 * xmlParseMemory: 9994 * @buffer: an pointer to a char array 9995 * @size: the size of the array 9996 * 9997 * parse an XML in-memory block and build a tree. 9998 * 9999 * Returns the resulting document tree 10000 */ 10001 10002xmlDocPtr xmlParseMemory(const char *buffer, int size) { 10003 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 10004} 10005 10006/** 10007 * xmlRecoverMemory: 10008 * @buffer: an pointer to a char array 10009 * @size: the size of the array 10010 * 10011 * parse an XML in-memory block and build a tree. 10012 * In the case the document is not Well Formed, a tree is built anyway 10013 * 10014 * Returns the resulting document tree 10015 */ 10016 10017xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 10018 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 10019} 10020 10021/** 10022 * xmlSAXUserParseMemory: 10023 * @sax: a SAX handler 10024 * @user_data: The user data returned on SAX callbacks 10025 * @buffer: an in-memory XML document input 10026 * @size: the length of the XML document in bytes 10027 * 10028 * A better SAX parsing routine. 10029 * parse an XML in-memory buffer and call the given SAX handler routines. 10030 * 10031 * Returns 0 in case of success or a error number otherwise 10032 */ 10033int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 10034 const char *buffer, int size) { 10035 int ret = 0; 10036 xmlParserCtxtPtr ctxt; 10037 xmlSAXHandlerPtr oldsax = NULL; 10038 10039 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10040 if (ctxt == NULL) return -1; 10041 if (sax != NULL) { 10042 oldsax = ctxt->sax; 10043 ctxt->sax = sax; 10044 } 10045 if (user_data != NULL) 10046 ctxt->userData = user_data; 10047 10048 xmlParseDocument(ctxt); 10049 10050 if (ctxt->wellFormed) 10051 ret = 0; 10052 else { 10053 if (ctxt->errNo != 0) 10054 ret = ctxt->errNo; 10055 else 10056 ret = -1; 10057 } 10058 if (sax != NULL) { 10059 ctxt->sax = oldsax; 10060 } 10061 xmlFreeParserCtxt(ctxt); 10062 10063 return ret; 10064} 10065 10066/** 10067 * xmlCreateDocParserCtxt: 10068 * @cur: a pointer to an array of xmlChar 10069 * 10070 * Creates a parser context for an XML in-memory document. 10071 * 10072 * Returns the new parser context or NULL 10073 */ 10074xmlParserCtxtPtr 10075xmlCreateDocParserCtxt(xmlChar *cur) { 10076 int len; 10077 10078 if (cur == NULL) 10079 return(NULL); 10080 len = xmlStrlen(cur); 10081 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 10082} 10083 10084/** 10085 * xmlSAXParseDoc: 10086 * @sax: the SAX handler block 10087 * @cur: a pointer to an array of xmlChar 10088 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10089 * documents 10090 * 10091 * parse an XML in-memory document and build a tree. 10092 * It use the given SAX function block to handle the parsing callback. 10093 * If sax is NULL, fallback to the default DOM tree building routines. 10094 * 10095 * Returns the resulting document tree 10096 */ 10097 10098xmlDocPtr 10099xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 10100 xmlDocPtr ret; 10101 xmlParserCtxtPtr ctxt; 10102 10103 if (cur == NULL) return(NULL); 10104 10105 10106 ctxt = xmlCreateDocParserCtxt(cur); 10107 if (ctxt == NULL) return(NULL); 10108 if (sax != NULL) { 10109 ctxt->sax = sax; 10110 ctxt->userData = NULL; 10111 } 10112 10113 xmlParseDocument(ctxt); 10114 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10115 else { 10116 ret = NULL; 10117 xmlFreeDoc(ctxt->myDoc); 10118 ctxt->myDoc = NULL; 10119 } 10120 if (sax != NULL) 10121 ctxt->sax = NULL; 10122 xmlFreeParserCtxt(ctxt); 10123 10124 return(ret); 10125} 10126 10127/** 10128 * xmlParseDoc: 10129 * @cur: a pointer to an array of xmlChar 10130 * 10131 * parse an XML in-memory document and build a tree. 10132 * 10133 * Returns the resulting document tree 10134 */ 10135 10136xmlDocPtr 10137xmlParseDoc(xmlChar *cur) { 10138 return(xmlSAXParseDoc(NULL, cur, 0)); 10139} 10140 10141 10142/************************************************************************ 10143 * * 10144 * Miscellaneous * 10145 * * 10146 ************************************************************************/ 10147 10148#ifdef LIBXML_XPATH_ENABLED 10149#include <libxml/xpath.h> 10150#endif 10151 10152static int xmlParserInitialized = 0; 10153 10154/** 10155 * xmlInitParser: 10156 * 10157 * Initialization function for the XML parser. 10158 * This is not reentrant. Call once before processing in case of 10159 * use in multithreaded programs. 10160 */ 10161 10162void 10163xmlInitParser(void) { 10164 if (xmlParserInitialized) return; 10165 10166 xmlInitCharEncodingHandlers(); 10167 xmlInitializePredefinedEntities(); 10168 xmlDefaultSAXHandlerInit(); 10169 xmlRegisterDefaultInputCallbacks(); 10170 xmlRegisterDefaultOutputCallbacks(); 10171#ifdef LIBXML_HTML_ENABLED 10172 htmlInitAutoClose(); 10173 htmlDefaultSAXHandlerInit(); 10174#endif 10175#ifdef LIBXML_XPATH_ENABLED 10176 xmlXPathInit(); 10177#endif 10178 xmlParserInitialized = 1; 10179} 10180 10181/** 10182 * xmlCleanupParser: 10183 * 10184 * Cleanup function for the XML parser. It tries to reclaim all 10185 * parsing related global memory allocated for the parser processing. 10186 * It doesn't deallocate any document related memory. Calling this 10187 * function should not prevent reusing the parser. 10188 */ 10189 10190void 10191xmlCleanupParser(void) { 10192 xmlParserInitialized = 0; 10193 xmlCleanupCharEncodingHandlers(); 10194 xmlCleanupPredefinedEntities(); 10195#ifdef LIBXML_CATALOG_ENABLED 10196 xmlCatalogCleanup(); 10197#endif 10198} 10199 10200