parser.c revision 878eab04c07a090c7b3aeb182993b579e0ea0195
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#include "libxml.h" 34 35#if defined(WIN32) && !defined (__CYGWIN__) 36#define XML_DIR_SEP '\\' 37#else 38#define XML_DIR_SEP '/' 39#endif 40 41#include <stdlib.h> 42#include <string.h> 43#include <libxml/xmlmemory.h> 44#include <libxml/threads.h> 45#include <libxml/globals.h> 46#include <libxml/tree.h> 47#include <libxml/parser.h> 48#include <libxml/parserInternals.h> 49#include <libxml/valid.h> 50#include <libxml/entities.h> 51#include <libxml/xmlerror.h> 52#include <libxml/encoding.h> 53#include <libxml/xmlIO.h> 54#include <libxml/uri.h> 55#ifdef LIBXML_CATALOG_ENABLED 56#include <libxml/catalog.h> 57#endif 58 59#ifdef HAVE_CTYPE_H 60#include <ctype.h> 61#endif 62#ifdef HAVE_STDLIB_H 63#include <stdlib.h> 64#endif 65#ifdef HAVE_SYS_STAT_H 66#include <sys/stat.h> 67#endif 68#ifdef HAVE_FCNTL_H 69#include <fcntl.h> 70#endif 71#ifdef HAVE_UNISTD_H 72#include <unistd.h> 73#endif 74#ifdef HAVE_ZLIB_H 75#include <zlib.h> 76#endif 77 78 79#define XML_PARSER_BIG_BUFFER_SIZE 300 80#define XML_PARSER_BUFFER_SIZE 100 81 82/* 83 * List of XML prefixed PI allowed by W3C specs 84 */ 85 86static const char *xmlW3CPIs[] = { 87 "xml-stylesheet", 88 NULL 89}; 90 91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 92xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 93 const xmlChar **str); 94 95static int 96xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 97 xmlSAXHandlerPtr sax, 98 void *user_data, int depth, const xmlChar *URL, 99 const xmlChar *ID, xmlNodePtr *list); 100 101static void 102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 103 xmlNodePtr lastNode); 104 105/************************************************************************ 106 * * 107 * Parser stacks related functions and macros * 108 * * 109 ************************************************************************/ 110 111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 112 const xmlChar ** str); 113 114/* 115 * Generic function for accessing stacks in the Parser Context 116 */ 117 118#define PUSH_AND_POP(scope, type, name) \ 119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 120 if (ctxt->name##Nr >= ctxt->name##Max) { \ 121 ctxt->name##Max *= 2; \ 122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 124 if (ctxt->name##Tab == NULL) { \ 125 xmlGenericError(xmlGenericErrorContext, \ 126 "realloc failed !\n"); \ 127 return(0); \ 128 } \ 129 } \ 130 ctxt->name##Tab[ctxt->name##Nr] = value; \ 131 ctxt->name = value; \ 132 return(ctxt->name##Nr++); \ 133} \ 134scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 135 type ret; \ 136 if (ctxt->name##Nr <= 0) return(0); \ 137 ctxt->name##Nr--; \ 138 if (ctxt->name##Nr > 0) \ 139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 140 else \ 141 ctxt->name = NULL; \ 142 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 143 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 144 return(ret); \ 145} \ 146 147/** 148 * inputPop: 149 * @ctxt: an XML parser context 150 * 151 * Pops the top parser input from the input stack 152 * 153 * Returns the input just removed 154 */ 155/** 156 * inputPush: 157 * @ctxt: an XML parser context 158 * @value: the parser input 159 * 160 * Pushes a new parser input on top of the input stack 161 * 162 * Returns 0 in case of error, the index in the stack otherwise 163 */ 164/** 165 * namePop: 166 * @ctxt: an XML parser context 167 * 168 * Pops the top element name from the name stack 169 * 170 * Returns the name just removed 171 */ 172/** 173 * namePush: 174 * @ctxt: an XML parser context 175 * @value: the element name 176 * 177 * Pushes a new element name on top of the name stack 178 * 179 * Returns 0 in case of error, the index in the stack otherwise 180 */ 181/** 182 * nodePop: 183 * @ctxt: an XML parser context 184 * 185 * Pops the top element node from the node stack 186 * 187 * Returns the node just removed 188 */ 189/** 190 * nodePush: 191 * @ctxt: an XML parser context 192 * @value: the element node 193 * 194 * Pushes a new element node on top of the node stack 195 * 196 * Returns 0 in case of error, the index in the stack otherwise 197 */ 198/* 199 * Those macros actually generate the functions 200 */ 201PUSH_AND_POP(extern, xmlParserInputPtr, input) 202PUSH_AND_POP(extern, xmlNodePtr, node) 203PUSH_AND_POP(extern, xmlChar*, name) 204 205static int spacePush(xmlParserCtxtPtr ctxt, int val) { 206 if (ctxt->spaceNr >= ctxt->spaceMax) { 207 ctxt->spaceMax *= 2; 208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 210 if (ctxt->spaceTab == NULL) { 211 xmlGenericError(xmlGenericErrorContext, 212 "realloc failed !\n"); 213 return(0); 214 } 215 } 216 ctxt->spaceTab[ctxt->spaceNr] = val; 217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 218 return(ctxt->spaceNr++); 219} 220 221static int spacePop(xmlParserCtxtPtr ctxt) { 222 int ret; 223 if (ctxt->spaceNr <= 0) return(0); 224 ctxt->spaceNr--; 225 if (ctxt->spaceNr > 0) 226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 227 else 228 ctxt->space = NULL; 229 ret = ctxt->spaceTab[ctxt->spaceNr]; 230 ctxt->spaceTab[ctxt->spaceNr] = -1; 231 return(ret); 232} 233 234/* 235 * Macros for accessing the content. Those should be used only by the parser, 236 * and not exported. 237 * 238 * Dirty macros, i.e. one often need to make assumption on the context to 239 * use them 240 * 241 * CUR_PTR return the current pointer to the xmlChar to be parsed. 242 * To be used with extreme caution since operations consuming 243 * characters may move the input buffer to a different location ! 244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 245 * This should be used internally by the parser 246 * only to compare to ASCII values otherwise it would break when 247 * running with UTF-8 encoding. 248 * RAW same as CUR but in the input buffer, bypass any token 249 * extraction that may have been done 250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 251 * to compare on ASCII based substring. 252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 253 * strings within the parser. 254 * 255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 256 * 257 * NEXT Skip to the next character, this does the proper decoding 258 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 259 * NEXTL(l) Skip l xmlChar in the input buffer 260 * CUR_CHAR(l) returns the current unicode character (int), set l 261 * to the number of xmlChars used for the encoding [0-5]. 262 * CUR_SCHAR same but operate on a string instead of the context 263 * COPY_BUF copy the current unicode char to the target buffer, increment 264 * the index 265 * GROW, SHRINK handling of input buffers 266 */ 267 268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 270#define NXT(val) ctxt->input->cur[(val)] 271#define CUR_PTR ctxt->input->cur 272 273#define SKIP(val) do { \ 274 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 276 if ((*ctxt->input->cur == 0) && \ 277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 278 xmlPopInput(ctxt); \ 279 } while (0) 280 281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ 282 xmlParserInputShrink(ctxt->input); \ 283 if ((*ctxt->input->cur == 0) && \ 284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 285 xmlPopInput(ctxt); \ 286 } 287 288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ 289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 290 if ((*ctxt->input->cur == 0) && \ 291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 292 xmlPopInput(ctxt); \ 293 } 294 295#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 296 297#define NEXT xmlNextChar(ctxt) 298 299#define NEXT1 { \ 300 ctxt->input->cur++; \ 301 ctxt->nbChars++; \ 302 if (*ctxt->input->cur == 0) \ 303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 304 } 305 306#define NEXTL(l) do { \ 307 if (*(ctxt->input->cur) == '\n') { \ 308 ctxt->input->line++; ctxt->input->col = 1; \ 309 } else ctxt->input->col++; \ 310 ctxt->token = 0; ctxt->input->cur += l; \ 311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 312 } while (0) 313 314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 316 317#define COPY_BUF(l,b,i,v) \ 318 if (l == 1) b[i++] = (xmlChar) v; \ 319 else i += xmlCopyCharMultiByte(&b[i],v) 320 321/** 322 * xmlSkipBlankChars: 323 * @ctxt: the XML parser context 324 * 325 * skip all blanks character found at that point in the input streams. 326 * It pops up finished entities in the process if allowable at that point. 327 * 328 * Returns the number of space chars skipped 329 */ 330 331int 332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 333 int res = 0; 334 335 if (ctxt->token != 0) { 336 if (!IS_BLANK(ctxt->token)) 337 return(0); 338 ctxt->token = 0; 339 res++; 340 } 341 /* 342 * It's Okay to use CUR/NEXT here since all the blanks are on 343 * the ASCII range. 344 */ 345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 346 const xmlChar *cur; 347 /* 348 * if we are in the document content, go really fast 349 */ 350 cur = ctxt->input->cur; 351 while (IS_BLANK(*cur)) { 352 if (*cur == '\n') { 353 ctxt->input->line++; ctxt->input->col = 1; 354 } 355 cur++; 356 res++; 357 if (*cur == 0) { 358 ctxt->input->cur = cur; 359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 360 cur = ctxt->input->cur; 361 } 362 } 363 ctxt->input->cur = cur; 364 } else { 365 int cur; 366 do { 367 cur = CUR; 368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 369 NEXT; 370 cur = CUR; 371 res++; 372 } 373 while ((cur == 0) && (ctxt->inputNr > 1) && 374 (ctxt->instate != XML_PARSER_COMMENT)) { 375 xmlPopInput(ctxt); 376 cur = CUR; 377 } 378 /* 379 * Need to handle support of entities branching here 380 */ 381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 383 } 384 return(res); 385} 386 387/************************************************************************ 388 * * 389 * Commodity functions to handle entities * 390 * * 391 ************************************************************************/ 392 393/** 394 * xmlPopInput: 395 * @ctxt: an XML parser context 396 * 397 * xmlPopInput: the current input pointed by ctxt->input came to an end 398 * pop it and return the next char. 399 * 400 * Returns the current xmlChar in the parser context 401 */ 402xmlChar 403xmlPopInput(xmlParserCtxtPtr ctxt) { 404 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 405 if (xmlParserDebugEntities) 406 xmlGenericError(xmlGenericErrorContext, 407 "Popping input %d\n", ctxt->inputNr); 408 xmlFreeInputStream(inputPop(ctxt)); 409 if ((*ctxt->input->cur == 0) && 410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 411 return(xmlPopInput(ctxt)); 412 return(CUR); 413} 414 415/** 416 * xmlPushInput: 417 * @ctxt: an XML parser context 418 * @input: an XML parser input fragment (entity, XML fragment ...). 419 * 420 * xmlPushInput: switch to a new input stream which is stacked on top 421 * of the previous one(s). 422 */ 423void 424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 425 if (input == NULL) return; 426 427 if (xmlParserDebugEntities) { 428 if ((ctxt->input != NULL) && (ctxt->input->filename)) 429 xmlGenericError(xmlGenericErrorContext, 430 "%s(%d): ", ctxt->input->filename, 431 ctxt->input->line); 432 xmlGenericError(xmlGenericErrorContext, 433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 434 } 435 inputPush(ctxt, input); 436 GROW; 437} 438 439/** 440 * xmlParseCharRef: 441 * @ctxt: an XML parser context 442 * 443 * parse Reference declarations 444 * 445 * [66] CharRef ::= '&#' [0-9]+ ';' | 446 * '&#x' [0-9a-fA-F]+ ';' 447 * 448 * [ WFC: Legal Character ] 449 * Characters referred to using character references must match the 450 * production for Char. 451 * 452 * Returns the value parsed (as an int), 0 in case of error 453 */ 454int 455xmlParseCharRef(xmlParserCtxtPtr ctxt) { 456 unsigned int val = 0; 457 int count = 0; 458 459 if (ctxt->token != 0) { 460 val = ctxt->token; 461 ctxt->token = 0; 462 return(val); 463 } 464 /* 465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 466 */ 467 if ((RAW == '&') && (NXT(1) == '#') && 468 (NXT(2) == 'x')) { 469 SKIP(3); 470 GROW; 471 while (RAW != ';') { /* loop blocked by count */ 472 if (count++ > 20) { 473 count = 0; 474 GROW; 475 } 476 if ((RAW >= '0') && (RAW <= '9')) 477 val = val * 16 + (CUR - '0'); 478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 479 val = val * 16 + (CUR - 'a') + 10; 480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 481 val = val * 16 + (CUR - 'A') + 10; 482 else { 483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 485 ctxt->sax->error(ctxt->userData, 486 "xmlParseCharRef: invalid hexadecimal value\n"); 487 ctxt->wellFormed = 0; 488 ctxt->disableSAX = 1; 489 val = 0; 490 break; 491 } 492 NEXT; 493 count++; 494 } 495 if (RAW == ';') { 496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 497 ctxt->nbChars ++; 498 ctxt->input->cur++; 499 } 500 } else if ((RAW == '&') && (NXT(1) == '#')) { 501 SKIP(2); 502 GROW; 503 while (RAW != ';') { /* loop blocked by count */ 504 if (count++ > 20) { 505 count = 0; 506 GROW; 507 } 508 if ((RAW >= '0') && (RAW <= '9')) 509 val = val * 10 + (CUR - '0'); 510 else { 511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 513 ctxt->sax->error(ctxt->userData, 514 "xmlParseCharRef: invalid decimal value\n"); 515 ctxt->wellFormed = 0; 516 ctxt->disableSAX = 1; 517 val = 0; 518 break; 519 } 520 NEXT; 521 count++; 522 } 523 if (RAW == ';') { 524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 525 ctxt->nbChars ++; 526 ctxt->input->cur++; 527 } 528 } else { 529 ctxt->errNo = XML_ERR_INVALID_CHARREF; 530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 531 ctxt->sax->error(ctxt->userData, 532 "xmlParseCharRef: invalid value\n"); 533 ctxt->wellFormed = 0; 534 ctxt->disableSAX = 1; 535 } 536 537 /* 538 * [ WFC: Legal Character ] 539 * Characters referred to using character references must match the 540 * production for Char. 541 */ 542 if (IS_CHAR(val)) { 543 return(val); 544 } else { 545 ctxt->errNo = XML_ERR_INVALID_CHAR; 546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 547 ctxt->sax->error(ctxt->userData, 548 "xmlParseCharRef: invalid xmlChar value %d\n", 549 val); 550 ctxt->wellFormed = 0; 551 ctxt->disableSAX = 1; 552 } 553 return(0); 554} 555 556/** 557 * xmlParseStringCharRef: 558 * @ctxt: an XML parser context 559 * @str: a pointer to an index in the string 560 * 561 * parse Reference declarations, variant parsing from a string rather 562 * than an an input flow. 563 * 564 * [66] CharRef ::= '&#' [0-9]+ ';' | 565 * '&#x' [0-9a-fA-F]+ ';' 566 * 567 * [ WFC: Legal Character ] 568 * Characters referred to using character references must match the 569 * production for Char. 570 * 571 * Returns the value parsed (as an int), 0 in case of error, str will be 572 * updated to the current value of the index 573 */ 574static int 575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 576 const xmlChar *ptr; 577 xmlChar cur; 578 int val = 0; 579 580 if ((str == NULL) || (*str == NULL)) return(0); 581 ptr = *str; 582 cur = *ptr; 583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 584 ptr += 3; 585 cur = *ptr; 586 while (cur != ';') { /* Non input consuming loop */ 587 if ((cur >= '0') && (cur <= '9')) 588 val = val * 16 + (cur - '0'); 589 else if ((cur >= 'a') && (cur <= 'f')) 590 val = val * 16 + (cur - 'a') + 10; 591 else if ((cur >= 'A') && (cur <= 'F')) 592 val = val * 16 + (cur - 'A') + 10; 593 else { 594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 596 ctxt->sax->error(ctxt->userData, 597 "xmlParseStringCharRef: invalid hexadecimal value\n"); 598 ctxt->wellFormed = 0; 599 ctxt->disableSAX = 1; 600 val = 0; 601 break; 602 } 603 ptr++; 604 cur = *ptr; 605 } 606 if (cur == ';') 607 ptr++; 608 } else if ((cur == '&') && (ptr[1] == '#')){ 609 ptr += 2; 610 cur = *ptr; 611 while (cur != ';') { /* Non input consuming loops */ 612 if ((cur >= '0') && (cur <= '9')) 613 val = val * 10 + (cur - '0'); 614 else { 615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 617 ctxt->sax->error(ctxt->userData, 618 "xmlParseStringCharRef: invalid decimal value\n"); 619 ctxt->wellFormed = 0; 620 ctxt->disableSAX = 1; 621 val = 0; 622 break; 623 } 624 ptr++; 625 cur = *ptr; 626 } 627 if (cur == ';') 628 ptr++; 629 } else { 630 ctxt->errNo = XML_ERR_INVALID_CHARREF; 631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 632 ctxt->sax->error(ctxt->userData, 633 "xmlParseStringCharRef: invalid value\n"); 634 ctxt->wellFormed = 0; 635 ctxt->disableSAX = 1; 636 return(0); 637 } 638 *str = ptr; 639 640 /* 641 * [ WFC: Legal Character ] 642 * Characters referred to using character references must match the 643 * production for Char. 644 */ 645 if (IS_CHAR(val)) { 646 return(val); 647 } else { 648 ctxt->errNo = XML_ERR_INVALID_CHAR; 649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 650 ctxt->sax->error(ctxt->userData, 651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val); 652 ctxt->wellFormed = 0; 653 ctxt->disableSAX = 1; 654 } 655 return(0); 656} 657 658/** 659 * xmlParserHandlePEReference: 660 * @ctxt: the parser context 661 * 662 * [69] PEReference ::= '%' Name ';' 663 * 664 * [ WFC: No Recursion ] 665 * A parsed entity must not contain a recursive 666 * reference to itself, either directly or indirectly. 667 * 668 * [ WFC: Entity Declared ] 669 * In a document without any DTD, a document with only an internal DTD 670 * subset which contains no parameter entity references, or a document 671 * with "standalone='yes'", ... ... The declaration of a parameter 672 * entity must precede any reference to it... 673 * 674 * [ VC: Entity Declared ] 675 * In a document with an external subset or external parameter entities 676 * with "standalone='no'", ... ... The declaration of a parameter entity 677 * must precede any reference to it... 678 * 679 * [ WFC: In DTD ] 680 * Parameter-entity references may only appear in the DTD. 681 * NOTE: misleading but this is handled. 682 * 683 * A PEReference may have been detected in the current input stream 684 * the handling is done accordingly to 685 * http://www.w3.org/TR/REC-xml#entproc 686 * i.e. 687 * - Included in literal in entity values 688 * - Included as Parameter Entity reference within DTDs 689 */ 690void 691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 692 xmlChar *name; 693 xmlEntityPtr entity = NULL; 694 xmlParserInputPtr input; 695 696 if (ctxt->token != 0) { 697 return; 698 } 699 if (RAW != '%') return; 700 switch(ctxt->instate) { 701 case XML_PARSER_CDATA_SECTION: 702 return; 703 case XML_PARSER_COMMENT: 704 return; 705 case XML_PARSER_START_TAG: 706 return; 707 case XML_PARSER_END_TAG: 708 return; 709 case XML_PARSER_EOF: 710 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 713 ctxt->wellFormed = 0; 714 ctxt->disableSAX = 1; 715 return; 716 case XML_PARSER_PROLOG: 717 case XML_PARSER_START: 718 case XML_PARSER_MISC: 719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 722 ctxt->wellFormed = 0; 723 ctxt->disableSAX = 1; 724 return; 725 case XML_PARSER_ENTITY_DECL: 726 case XML_PARSER_CONTENT: 727 case XML_PARSER_ATTRIBUTE_VALUE: 728 case XML_PARSER_PI: 729 case XML_PARSER_SYSTEM_LITERAL: 730 case XML_PARSER_PUBLIC_LITERAL: 731 /* we just ignore it there */ 732 return; 733 case XML_PARSER_EPILOG: 734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 737 ctxt->wellFormed = 0; 738 ctxt->disableSAX = 1; 739 return; 740 case XML_PARSER_ENTITY_VALUE: 741 /* 742 * NOTE: in the case of entity values, we don't do the 743 * substitution here since we need the literal 744 * entity value to be able to save the internal 745 * subset of the document. 746 * This will be handled by xmlStringDecodeEntities 747 */ 748 return; 749 case XML_PARSER_DTD: 750 /* 751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 752 * In the internal DTD subset, parameter-entity references 753 * can occur only where markup declarations can occur, not 754 * within markup declarations. 755 * In that case this is handled in xmlParseMarkupDecl 756 */ 757 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 758 return; 759 break; 760 case XML_PARSER_IGNORE: 761 return; 762 } 763 764 NEXT; 765 name = xmlParseName(ctxt); 766 if (xmlParserDebugEntities) 767 xmlGenericError(xmlGenericErrorContext, 768 "PEReference: %s\n", name); 769 if (name == NULL) { 770 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n"); 773 ctxt->wellFormed = 0; 774 ctxt->disableSAX = 1; 775 } else { 776 if (RAW == ';') { 777 NEXT; 778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 780 if (entity == NULL) { 781 782 /* 783 * [ WFC: Entity Declared ] 784 * In a document without any DTD, a document with only an 785 * internal DTD subset which contains no parameter entity 786 * references, or a document with "standalone='yes'", ... 787 * ... The declaration of a parameter entity must precede 788 * any reference to it... 789 */ 790 if ((ctxt->standalone == 1) || 791 ((ctxt->hasExternalSubset == 0) && 792 (ctxt->hasPErefs == 0))) { 793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 794 ctxt->sax->error(ctxt->userData, 795 "PEReference: %%%s; not found\n", name); 796 ctxt->wellFormed = 0; 797 ctxt->disableSAX = 1; 798 } else { 799 /* 800 * [ VC: Entity Declared ] 801 * In a document with an external subset or external 802 * parameter entities with "standalone='no'", ... 803 * ... The declaration of a parameter entity must precede 804 * any reference to it... 805 */ 806 if ((!ctxt->disableSAX) && 807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 808 ctxt->vctxt.error(ctxt->vctxt.userData, 809 "PEReference: %%%s; not found\n", name); 810 } else if ((!ctxt->disableSAX) && 811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 812 ctxt->sax->warning(ctxt->userData, 813 "PEReference: %%%s; not found\n", name); 814 ctxt->valid = 0; 815 } 816 } else { 817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 819 xmlChar start[4]; 820 xmlCharEncoding enc; 821 822 /* 823 * handle the extra spaces added before and after 824 * c.f. http://www.w3.org/TR/REC-xml#as-PE 825 * this is done independently. 826 */ 827 input = xmlNewEntityInputStream(ctxt, entity); 828 xmlPushInput(ctxt, input); 829 830 /* 831 * Get the 4 first bytes and decode the charset 832 * if enc != XML_CHAR_ENCODING_NONE 833 * plug some encoding conversion routines. 834 */ 835 GROW 836 start[0] = RAW; 837 start[1] = NXT(1); 838 start[2] = NXT(2); 839 start[3] = NXT(3); 840 enc = xmlDetectCharEncoding(start, 4); 841 if (enc != XML_CHAR_ENCODING_NONE) { 842 xmlSwitchEncoding(ctxt, enc); 843 } 844 845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 846 (RAW == '<') && (NXT(1) == '?') && 847 (NXT(2) == 'x') && (NXT(3) == 'm') && 848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 849 xmlParseTextDecl(ctxt); 850 } 851 if (ctxt->token == 0) 852 ctxt->token = ' '; 853 } else { 854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 855 ctxt->sax->error(ctxt->userData, 856 "xmlParserHandlePEReference: %s is not a parameter entity\n", 857 name); 858 ctxt->wellFormed = 0; 859 ctxt->disableSAX = 1; 860 } 861 } 862 } else { 863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 865 ctxt->sax->error(ctxt->userData, 866 "xmlParserHandlePEReference: expecting ';'\n"); 867 ctxt->wellFormed = 0; 868 ctxt->disableSAX = 1; 869 } 870 xmlFree(name); 871 } 872} 873 874/* 875 * Macro used to grow the current buffer. 876 */ 877#define growBuffer(buffer) { \ 878 buffer##_size *= 2; \ 879 buffer = (xmlChar *) \ 880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 881 if (buffer == NULL) { \ 882 perror("realloc failed"); \ 883 return(NULL); \ 884 } \ 885} 886 887/** 888 * xmlStringDecodeEntities: 889 * @ctxt: the parser context 890 * @str: the input string 891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 892 * @end: an end marker xmlChar, 0 if none 893 * @end2: an end marker xmlChar, 0 if none 894 * @end3: an end marker xmlChar, 0 if none 895 * 896 * Takes a entity string content and process to do the adequate substitutions. 897 * 898 * [67] Reference ::= EntityRef | CharRef 899 * 900 * [69] PEReference ::= '%' Name ';' 901 * 902 * Returns A newly allocated string with the substitution done. The caller 903 * must deallocate it ! 904 */ 905xmlChar * 906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 907 xmlChar end, xmlChar end2, xmlChar end3) { 908 xmlChar *buffer = NULL; 909 int buffer_size = 0; 910 911 xmlChar *current = NULL; 912 xmlEntityPtr ent; 913 int c,l; 914 int nbchars = 0; 915 916 if (str == NULL) 917 return(NULL); 918 919 if (ctxt->depth > 40) { 920 ctxt->errNo = XML_ERR_ENTITY_LOOP; 921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 922 ctxt->sax->error(ctxt->userData, 923 "Detected entity reference loop\n"); 924 ctxt->wellFormed = 0; 925 ctxt->disableSAX = 1; 926 return(NULL); 927 } 928 929 /* 930 * allocate a translation buffer. 931 */ 932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 934 if (buffer == NULL) { 935 perror("xmlStringDecodeEntities: malloc failed"); 936 return(NULL); 937 } 938 939 /* 940 * OK loop until we reach one of the ending char or a size limit. 941 * we are operating on already parsed values. 942 */ 943 c = CUR_SCHAR(str, l); 944 while ((c != 0) && (c != end) && /* non input consuming loop */ 945 (c != end2) && (c != end3)) { 946 947 if (c == 0) break; 948 if ((c == '&') && (str[1] == '#')) { 949 int val = xmlParseStringCharRef(ctxt, &str); 950 if (val != 0) { 951 COPY_BUF(0,buffer,nbchars,val); 952 } 953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 954 if (xmlParserDebugEntities) 955 xmlGenericError(xmlGenericErrorContext, 956 "String decoding Entity Reference: %.30s\n", 957 str); 958 ent = xmlParseStringEntityRef(ctxt, &str); 959 if ((ent != NULL) && 960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 961 if (ent->content != NULL) { 962 COPY_BUF(0,buffer,nbchars,ent->content[0]); 963 } else { 964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 965 ctxt->sax->error(ctxt->userData, 966 "internal error entity has no content\n"); 967 } 968 } else if ((ent != NULL) && (ent->content != NULL)) { 969 xmlChar *rep; 970 971 ctxt->depth++; 972 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 973 0, 0, 0); 974 ctxt->depth--; 975 if (rep != NULL) { 976 current = rep; 977 while (*current != 0) { /* non input consuming loop */ 978 buffer[nbchars++] = *current++; 979 if (nbchars > 980 buffer_size - XML_PARSER_BUFFER_SIZE) { 981 growBuffer(buffer); 982 } 983 } 984 xmlFree(rep); 985 } 986 } else if (ent != NULL) { 987 int i = xmlStrlen(ent->name); 988 const xmlChar *cur = ent->name; 989 990 buffer[nbchars++] = '&'; 991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 992 growBuffer(buffer); 993 } 994 for (;i > 0;i--) 995 buffer[nbchars++] = *cur++; 996 buffer[nbchars++] = ';'; 997 } 998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 999 if (xmlParserDebugEntities) 1000 xmlGenericError(xmlGenericErrorContext, 1001 "String decoding PE Reference: %.30s\n", str); 1002 ent = xmlParseStringPEReference(ctxt, &str); 1003 if (ent != NULL) { 1004 xmlChar *rep; 1005 1006 ctxt->depth++; 1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1008 0, 0, 0); 1009 ctxt->depth--; 1010 if (rep != NULL) { 1011 current = rep; 1012 while (*current != 0) { /* non input consuming loop */ 1013 buffer[nbchars++] = *current++; 1014 if (nbchars > 1015 buffer_size - XML_PARSER_BUFFER_SIZE) { 1016 growBuffer(buffer); 1017 } 1018 } 1019 xmlFree(rep); 1020 } 1021 } 1022 } else { 1023 COPY_BUF(l,buffer,nbchars,c); 1024 str += l; 1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1026 growBuffer(buffer); 1027 } 1028 } 1029 c = CUR_SCHAR(str, l); 1030 } 1031 buffer[nbchars++] = 0; 1032 return(buffer); 1033} 1034 1035 1036/************************************************************************ 1037 * * 1038 * Commodity functions to handle xmlChars * 1039 * * 1040 ************************************************************************/ 1041 1042/** 1043 * xmlStrndup: 1044 * @cur: the input xmlChar * 1045 * @len: the len of @cur 1046 * 1047 * a strndup for array of xmlChar's 1048 * 1049 * Returns a new xmlChar * or NULL 1050 */ 1051xmlChar * 1052xmlStrndup(const xmlChar *cur, int len) { 1053 xmlChar *ret; 1054 1055 if ((cur == NULL) || (len < 0)) return(NULL); 1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1057 if (ret == NULL) { 1058 xmlGenericError(xmlGenericErrorContext, 1059 "malloc of %ld byte failed\n", 1060 (len + 1) * (long)sizeof(xmlChar)); 1061 return(NULL); 1062 } 1063 memcpy(ret, cur, len * sizeof(xmlChar)); 1064 ret[len] = 0; 1065 return(ret); 1066} 1067 1068/** 1069 * xmlStrdup: 1070 * @cur: the input xmlChar * 1071 * 1072 * a strdup for array of xmlChar's. Since they are supposed to be 1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1074 * a termination mark of '0'. 1075 * 1076 * Returns a new xmlChar * or NULL 1077 */ 1078xmlChar * 1079xmlStrdup(const xmlChar *cur) { 1080 const xmlChar *p = cur; 1081 1082 if (cur == NULL) return(NULL); 1083 while (*p != 0) p++; /* non input consuming */ 1084 return(xmlStrndup(cur, p - cur)); 1085} 1086 1087/** 1088 * xmlCharStrndup: 1089 * @cur: the input char * 1090 * @len: the len of @cur 1091 * 1092 * a strndup for char's to xmlChar's 1093 * 1094 * Returns a new xmlChar * or NULL 1095 */ 1096 1097xmlChar * 1098xmlCharStrndup(const char *cur, int len) { 1099 int i; 1100 xmlChar *ret; 1101 1102 if ((cur == NULL) || (len < 0)) return(NULL); 1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1104 if (ret == NULL) { 1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1106 (len + 1) * (long)sizeof(xmlChar)); 1107 return(NULL); 1108 } 1109 for (i = 0;i < len;i++) 1110 ret[i] = (xmlChar) cur[i]; 1111 ret[len] = 0; 1112 return(ret); 1113} 1114 1115/** 1116 * xmlCharStrdup: 1117 * @cur: the input char * 1118 * @len: the len of @cur 1119 * 1120 * a strdup for char's to xmlChar's 1121 * 1122 * Returns a new xmlChar * or NULL 1123 */ 1124 1125xmlChar * 1126xmlCharStrdup(const char *cur) { 1127 const char *p = cur; 1128 1129 if (cur == NULL) return(NULL); 1130 while (*p != '\0') p++; /* non input consuming */ 1131 return(xmlCharStrndup(cur, p - cur)); 1132} 1133 1134/** 1135 * xmlStrcmp: 1136 * @str1: the first xmlChar * 1137 * @str2: the second xmlChar * 1138 * 1139 * a strcmp for xmlChar's 1140 * 1141 * Returns the integer result of the comparison 1142 */ 1143 1144int 1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1146 register int tmp; 1147 1148 if (str1 == str2) return(0); 1149 if (str1 == NULL) return(-1); 1150 if (str2 == NULL) return(1); 1151 do { 1152 tmp = *str1++ - *str2; 1153 if (tmp != 0) return(tmp); 1154 } while (*str2++ != 0); 1155 return 0; 1156} 1157 1158/** 1159 * xmlStrEqual: 1160 * @str1: the first xmlChar * 1161 * @str2: the second xmlChar * 1162 * 1163 * Check if both string are equal of have same content 1164 * Should be a bit more readable and faster than xmlStrEqual() 1165 * 1166 * Returns 1 if they are equal, 0 if they are different 1167 */ 1168 1169int 1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1171 if (str1 == str2) return(1); 1172 if (str1 == NULL) return(0); 1173 if (str2 == NULL) return(0); 1174 do { 1175 if (*str1++ != *str2) return(0); 1176 } while (*str2++); 1177 return(1); 1178} 1179 1180/** 1181 * xmlStrncmp: 1182 * @str1: the first xmlChar * 1183 * @str2: the second xmlChar * 1184 * @len: the max comparison length 1185 * 1186 * a strncmp for xmlChar's 1187 * 1188 * Returns the integer result of the comparison 1189 */ 1190 1191int 1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1193 register int tmp; 1194 1195 if (len <= 0) return(0); 1196 if (str1 == str2) return(0); 1197 if (str1 == NULL) return(-1); 1198 if (str2 == NULL) return(1); 1199 do { 1200 tmp = *str1++ - *str2; 1201 if (tmp != 0 || --len == 0) return(tmp); 1202 } while (*str2++ != 0); 1203 return 0; 1204} 1205 1206static const xmlChar casemap[256] = { 1207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1239}; 1240 1241/** 1242 * xmlStrcasecmp: 1243 * @str1: the first xmlChar * 1244 * @str2: the second xmlChar * 1245 * 1246 * a strcasecmp for xmlChar's 1247 * 1248 * Returns the integer result of the comparison 1249 */ 1250 1251int 1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1253 register int tmp; 1254 1255 if (str1 == str2) return(0); 1256 if (str1 == NULL) return(-1); 1257 if (str2 == NULL) return(1); 1258 do { 1259 tmp = casemap[*str1++] - casemap[*str2]; 1260 if (tmp != 0) return(tmp); 1261 } while (*str2++ != 0); 1262 return 0; 1263} 1264 1265/** 1266 * xmlStrncasecmp: 1267 * @str1: the first xmlChar * 1268 * @str2: the second xmlChar * 1269 * @len: the max comparison length 1270 * 1271 * a strncasecmp for xmlChar's 1272 * 1273 * Returns the integer result of the comparison 1274 */ 1275 1276int 1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1278 register int tmp; 1279 1280 if (len <= 0) return(0); 1281 if (str1 == str2) return(0); 1282 if (str1 == NULL) return(-1); 1283 if (str2 == NULL) return(1); 1284 do { 1285 tmp = casemap[*str1++] - casemap[*str2]; 1286 if (tmp != 0 || --len == 0) return(tmp); 1287 } while (*str2++ != 0); 1288 return 0; 1289} 1290 1291/** 1292 * xmlStrchr: 1293 * @str: the xmlChar * array 1294 * @val: the xmlChar to search 1295 * 1296 * a strchr for xmlChar's 1297 * 1298 * Returns the xmlChar * for the first occurrence or NULL. 1299 */ 1300 1301const xmlChar * 1302xmlStrchr(const xmlChar *str, xmlChar val) { 1303 if (str == NULL) return(NULL); 1304 while (*str != 0) { /* non input consuming */ 1305 if (*str == val) return((xmlChar *) str); 1306 str++; 1307 } 1308 return(NULL); 1309} 1310 1311/** 1312 * xmlStrstr: 1313 * @str: the xmlChar * array (haystack) 1314 * @val: the xmlChar to search (needle) 1315 * 1316 * a strstr for xmlChar's 1317 * 1318 * Returns the xmlChar * for the first occurrence or NULL. 1319 */ 1320 1321const xmlChar * 1322xmlStrstr(const xmlChar *str, const xmlChar *val) { 1323 int n; 1324 1325 if (str == NULL) return(NULL); 1326 if (val == NULL) return(NULL); 1327 n = xmlStrlen(val); 1328 1329 if (n == 0) return(str); 1330 while (*str != 0) { /* non input consuming */ 1331 if (*str == *val) { 1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1333 } 1334 str++; 1335 } 1336 return(NULL); 1337} 1338 1339/** 1340 * xmlStrcasestr: 1341 * @str: the xmlChar * array (haystack) 1342 * @val: the xmlChar to search (needle) 1343 * 1344 * a case-ignoring strstr for xmlChar's 1345 * 1346 * Returns the xmlChar * for the first occurrence or NULL. 1347 */ 1348 1349const xmlChar * 1350xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1351 int n; 1352 1353 if (str == NULL) return(NULL); 1354 if (val == NULL) return(NULL); 1355 n = xmlStrlen(val); 1356 1357 if (n == 0) return(str); 1358 while (*str != 0) { /* non input consuming */ 1359 if (casemap[*str] == casemap[*val]) 1360 if (!xmlStrncasecmp(str, val, n)) return(str); 1361 str++; 1362 } 1363 return(NULL); 1364} 1365 1366/** 1367 * xmlStrsub: 1368 * @str: the xmlChar * array (haystack) 1369 * @start: the index of the first char (zero based) 1370 * @len: the length of the substring 1371 * 1372 * Extract a substring of a given string 1373 * 1374 * Returns the xmlChar * for the first occurrence or NULL. 1375 */ 1376 1377xmlChar * 1378xmlStrsub(const xmlChar *str, int start, int len) { 1379 int i; 1380 1381 if (str == NULL) return(NULL); 1382 if (start < 0) return(NULL); 1383 if (len < 0) return(NULL); 1384 1385 for (i = 0;i < start;i++) { 1386 if (*str == 0) return(NULL); 1387 str++; 1388 } 1389 if (*str == 0) return(NULL); 1390 return(xmlStrndup(str, len)); 1391} 1392 1393/** 1394 * xmlStrlen: 1395 * @str: the xmlChar * array 1396 * 1397 * length of a xmlChar's string 1398 * 1399 * Returns the number of xmlChar contained in the ARRAY. 1400 */ 1401 1402int 1403xmlStrlen(const xmlChar *str) { 1404 int len = 0; 1405 1406 if (str == NULL) return(0); 1407 while (*str != 0) { /* non input consuming */ 1408 str++; 1409 len++; 1410 } 1411 return(len); 1412} 1413 1414/** 1415 * xmlStrncat: 1416 * @cur: the original xmlChar * array 1417 * @add: the xmlChar * array added 1418 * @len: the length of @add 1419 * 1420 * a strncat for array of xmlChar's, it will extend @cur with the len 1421 * first bytes of @add. 1422 * 1423 * Returns a new xmlChar *, the original @cur is reallocated if needed 1424 * and should not be freed 1425 */ 1426 1427xmlChar * 1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1429 int size; 1430 xmlChar *ret; 1431 1432 if ((add == NULL) || (len == 0)) 1433 return(cur); 1434 if (cur == NULL) 1435 return(xmlStrndup(add, len)); 1436 1437 size = xmlStrlen(cur); 1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1439 if (ret == NULL) { 1440 xmlGenericError(xmlGenericErrorContext, 1441 "xmlStrncat: realloc of %ld byte failed\n", 1442 (size + len + 1) * (long)sizeof(xmlChar)); 1443 return(cur); 1444 } 1445 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1446 ret[size + len] = 0; 1447 return(ret); 1448} 1449 1450/** 1451 * xmlStrcat: 1452 * @cur: the original xmlChar * array 1453 * @add: the xmlChar * array added 1454 * 1455 * a strcat for array of xmlChar's. Since they are supposed to be 1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1457 * a termination mark of '0'. 1458 * 1459 * Returns a new xmlChar * containing the concatenated string. 1460 */ 1461xmlChar * 1462xmlStrcat(xmlChar *cur, const xmlChar *add) { 1463 const xmlChar *p = add; 1464 1465 if (add == NULL) return(cur); 1466 if (cur == NULL) 1467 return(xmlStrdup(add)); 1468 1469 while (*p != 0) p++; /* non input consuming */ 1470 return(xmlStrncat(cur, add, p - add)); 1471} 1472 1473/************************************************************************ 1474 * * 1475 * Commodity functions, cleanup needed ? * 1476 * * 1477 ************************************************************************/ 1478 1479/** 1480 * areBlanks: 1481 * @ctxt: an XML parser context 1482 * @str: a xmlChar * 1483 * @len: the size of @str 1484 * 1485 * Is this a sequence of blank chars that one can ignore ? 1486 * 1487 * Returns 1 if ignorable 0 otherwise. 1488 */ 1489 1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1491 int i, ret; 1492 xmlNodePtr lastChild; 1493 1494 /* 1495 * Don't spend time trying to differentiate them, the same callback is 1496 * used ! 1497 */ 1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 1499 return(0); 1500 1501 /* 1502 * Check for xml:space value. 1503 */ 1504 if (*(ctxt->space) == 1) 1505 return(0); 1506 1507 /* 1508 * Check that the string is made of blanks 1509 */ 1510 for (i = 0;i < len;i++) 1511 if (!(IS_BLANK(str[i]))) return(0); 1512 1513 /* 1514 * Look if the element is mixed content in the DTD if available 1515 */ 1516 if (ctxt->node == NULL) return(0); 1517 if (ctxt->myDoc != NULL) { 1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1519 if (ret == 0) return(1); 1520 if (ret == 1) return(0); 1521 } 1522 1523 /* 1524 * Otherwise, heuristic :-\ 1525 */ 1526 if (RAW != '<') return(0); 1527 if ((ctxt->node->children == NULL) && 1528 (RAW == '<') && (NXT(1) == '/')) return(0); 1529 1530 lastChild = xmlGetLastChild(ctxt->node); 1531 if (lastChild == NULL) { 1532 if ((ctxt->node->type != XML_ELEMENT_NODE) && 1533 (ctxt->node->content != NULL)) return(0); 1534 } else if (xmlNodeIsText(lastChild)) 1535 return(0); 1536 else if ((ctxt->node->children != NULL) && 1537 (xmlNodeIsText(ctxt->node->children))) 1538 return(0); 1539 return(1); 1540} 1541 1542/************************************************************************ 1543 * * 1544 * Extra stuff for namespace support * 1545 * Relates to http://www.w3.org/TR/WD-xml-names * 1546 * * 1547 ************************************************************************/ 1548 1549/** 1550 * xmlSplitQName: 1551 * @ctxt: an XML parser context 1552 * @name: an XML parser context 1553 * @prefix: a xmlChar ** 1554 * 1555 * parse an UTF8 encoded XML qualified name string 1556 * 1557 * [NS 5] QName ::= (Prefix ':')? LocalPart 1558 * 1559 * [NS 6] Prefix ::= NCName 1560 * 1561 * [NS 7] LocalPart ::= NCName 1562 * 1563 * Returns the local part, and prefix is updated 1564 * to get the Prefix if any. 1565 */ 1566 1567xmlChar * 1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1569 xmlChar buf[XML_MAX_NAMELEN + 5]; 1570 xmlChar *buffer = NULL; 1571 int len = 0; 1572 int max = XML_MAX_NAMELEN; 1573 xmlChar *ret = NULL; 1574 const xmlChar *cur = name; 1575 int c; 1576 1577 *prefix = NULL; 1578 1579#ifndef XML_XML_NAMESPACE 1580 /* xml: prefix is not really a namespace */ 1581 if ((cur[0] == 'x') && (cur[1] == 'm') && 1582 (cur[2] == 'l') && (cur[3] == ':')) 1583 return(xmlStrdup(name)); 1584#endif 1585 1586 /* nasty but valid */ 1587 if (cur[0] == ':') 1588 return(xmlStrdup(name)); 1589 1590 c = *cur++; 1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1592 buf[len++] = c; 1593 c = *cur++; 1594 } 1595 if (len >= max) { 1596 /* 1597 * Okay someone managed to make a huge name, so he's ready to pay 1598 * for the processing speed. 1599 */ 1600 max = len * 2; 1601 1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1603 if (buffer == NULL) { 1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1605 ctxt->sax->error(ctxt->userData, 1606 "xmlSplitQName: out of memory\n"); 1607 return(NULL); 1608 } 1609 memcpy(buffer, buf, len); 1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1611 if (len + 10 > max) { 1612 max *= 2; 1613 buffer = (xmlChar *) xmlRealloc(buffer, 1614 max * sizeof(xmlChar)); 1615 if (buffer == NULL) { 1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1617 ctxt->sax->error(ctxt->userData, 1618 "xmlSplitQName: out of memory\n"); 1619 return(NULL); 1620 } 1621 } 1622 buffer[len++] = c; 1623 c = *cur++; 1624 } 1625 buffer[len] = 0; 1626 } 1627 1628 if (buffer == NULL) 1629 ret = xmlStrndup(buf, len); 1630 else { 1631 ret = buffer; 1632 buffer = NULL; 1633 max = XML_MAX_NAMELEN; 1634 } 1635 1636 1637 if (c == ':') { 1638 c = *cur++; 1639 if (c == 0) return(ret); 1640 *prefix = ret; 1641 len = 0; 1642 1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1644 buf[len++] = c; 1645 c = *cur++; 1646 } 1647 if (len >= max) { 1648 /* 1649 * Okay someone managed to make a huge name, so he's ready to pay 1650 * for the processing speed. 1651 */ 1652 max = len * 2; 1653 1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1655 if (buffer == NULL) { 1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1657 ctxt->sax->error(ctxt->userData, 1658 "xmlSplitQName: out of memory\n"); 1659 return(NULL); 1660 } 1661 memcpy(buffer, buf, len); 1662 while (c != 0) { /* tested bigname2.xml */ 1663 if (len + 10 > max) { 1664 max *= 2; 1665 buffer = (xmlChar *) xmlRealloc(buffer, 1666 max * sizeof(xmlChar)); 1667 if (buffer == NULL) { 1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1669 ctxt->sax->error(ctxt->userData, 1670 "xmlSplitQName: out of memory\n"); 1671 return(NULL); 1672 } 1673 } 1674 buffer[len++] = c; 1675 c = *cur++; 1676 } 1677 buffer[len] = 0; 1678 } 1679 1680 if (buffer == NULL) 1681 ret = xmlStrndup(buf, len); 1682 else { 1683 ret = buffer; 1684 } 1685 } 1686 1687 return(ret); 1688} 1689 1690/************************************************************************ 1691 * * 1692 * The parser itself * 1693 * Relates to http://www.w3.org/TR/REC-xml * 1694 * * 1695 ************************************************************************/ 1696 1697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1698/** 1699 * xmlParseName: 1700 * @ctxt: an XML parser context 1701 * 1702 * parse an XML name. 1703 * 1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1705 * CombiningChar | Extender 1706 * 1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1708 * 1709 * [6] Names ::= Name (S Name)* 1710 * 1711 * Returns the Name parsed or NULL 1712 */ 1713 1714xmlChar * 1715xmlParseName(xmlParserCtxtPtr ctxt) { 1716 const xmlChar *in; 1717 xmlChar *ret; 1718 int count = 0; 1719 1720 GROW; 1721 1722 /* 1723 * Accelerator for simple ASCII names 1724 */ 1725 in = ctxt->input->cur; 1726 if (((*in >= 0x61) && (*in <= 0x7A)) || 1727 ((*in >= 0x41) && (*in <= 0x5A)) || 1728 (*in == '_') || (*in == ':')) { 1729 in++; 1730 while (((*in >= 0x61) && (*in <= 0x7A)) || 1731 ((*in >= 0x41) && (*in <= 0x5A)) || 1732 ((*in >= 0x30) && (*in <= 0x39)) || 1733 (*in == '_') || (*in == '-') || 1734 (*in == ':') || (*in == '.')) 1735 in++; 1736 if ((*in > 0) && (*in < 0x80)) { 1737 count = in - ctxt->input->cur; 1738 ret = xmlStrndup(ctxt->input->cur, count); 1739 ctxt->input->cur = in; 1740 return(ret); 1741 } 1742 } 1743 return(xmlParseNameComplex(ctxt)); 1744} 1745 1746static xmlChar * 1747xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1748 xmlChar buf[XML_MAX_NAMELEN + 5]; 1749 int len = 0, l; 1750 int c; 1751 int count = 0; 1752 1753 /* 1754 * Handler for more complex cases 1755 */ 1756 GROW; 1757 c = CUR_CHAR(l); 1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1759 (!IS_LETTER(c) && (c != '_') && 1760 (c != ':'))) { 1761 return(NULL); 1762 } 1763 1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1766 (c == '.') || (c == '-') || 1767 (c == '_') || (c == ':') || 1768 (IS_COMBINING(c)) || 1769 (IS_EXTENDER(c)))) { 1770 if (count++ > 100) { 1771 count = 0; 1772 GROW; 1773 } 1774 COPY_BUF(l,buf,len,c); 1775 NEXTL(l); 1776 c = CUR_CHAR(l); 1777 if (len >= XML_MAX_NAMELEN) { 1778 /* 1779 * Okay someone managed to make a huge name, so he's ready to pay 1780 * for the processing speed. 1781 */ 1782 xmlChar *buffer; 1783 int max = len * 2; 1784 1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1786 if (buffer == NULL) { 1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1788 ctxt->sax->error(ctxt->userData, 1789 "xmlParseNameComplex: out of memory\n"); 1790 return(NULL); 1791 } 1792 memcpy(buffer, buf, len); 1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1794 (c == '.') || (c == '-') || 1795 (c == '_') || (c == ':') || 1796 (IS_COMBINING(c)) || 1797 (IS_EXTENDER(c))) { 1798 if (count++ > 100) { 1799 count = 0; 1800 GROW; 1801 } 1802 if (len + 10 > max) { 1803 max *= 2; 1804 buffer = (xmlChar *) xmlRealloc(buffer, 1805 max * sizeof(xmlChar)); 1806 if (buffer == NULL) { 1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1808 ctxt->sax->error(ctxt->userData, 1809 "xmlParseNameComplex: out of memory\n"); 1810 return(NULL); 1811 } 1812 } 1813 COPY_BUF(l,buffer,len,c); 1814 NEXTL(l); 1815 c = CUR_CHAR(l); 1816 } 1817 buffer[len] = 0; 1818 return(buffer); 1819 } 1820 } 1821 return(xmlStrndup(buf, len)); 1822} 1823 1824/** 1825 * xmlParseStringName: 1826 * @ctxt: an XML parser context 1827 * @str: a pointer to the string pointer (IN/OUT) 1828 * 1829 * parse an XML name. 1830 * 1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1832 * CombiningChar | Extender 1833 * 1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1835 * 1836 * [6] Names ::= Name (S Name)* 1837 * 1838 * Returns the Name parsed or NULL. The @str pointer 1839 * is updated to the current location in the string. 1840 */ 1841 1842static xmlChar * 1843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1844 xmlChar buf[XML_MAX_NAMELEN + 5]; 1845 const xmlChar *cur = *str; 1846 int len = 0, l; 1847 int c; 1848 1849 c = CUR_SCHAR(cur, l); 1850 if (!IS_LETTER(c) && (c != '_') && 1851 (c != ':')) { 1852 return(NULL); 1853 } 1854 1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1856 (c == '.') || (c == '-') || 1857 (c == '_') || (c == ':') || 1858 (IS_COMBINING(c)) || 1859 (IS_EXTENDER(c))) { 1860 COPY_BUF(l,buf,len,c); 1861 cur += l; 1862 c = CUR_SCHAR(cur, l); 1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1864 /* 1865 * Okay someone managed to make a huge name, so he's ready to pay 1866 * for the processing speed. 1867 */ 1868 xmlChar *buffer; 1869 int max = len * 2; 1870 1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1872 if (buffer == NULL) { 1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1874 ctxt->sax->error(ctxt->userData, 1875 "xmlParseStringName: out of memory\n"); 1876 return(NULL); 1877 } 1878 memcpy(buffer, buf, len); 1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1880 (c == '.') || (c == '-') || 1881 (c == '_') || (c == ':') || 1882 (IS_COMBINING(c)) || 1883 (IS_EXTENDER(c))) { 1884 if (len + 10 > max) { 1885 max *= 2; 1886 buffer = (xmlChar *) xmlRealloc(buffer, 1887 max * sizeof(xmlChar)); 1888 if (buffer == NULL) { 1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1890 ctxt->sax->error(ctxt->userData, 1891 "xmlParseStringName: out of memory\n"); 1892 return(NULL); 1893 } 1894 } 1895 COPY_BUF(l,buffer,len,c); 1896 cur += l; 1897 c = CUR_SCHAR(cur, l); 1898 } 1899 buffer[len] = 0; 1900 *str = cur; 1901 return(buffer); 1902 } 1903 } 1904 *str = cur; 1905 return(xmlStrndup(buf, len)); 1906} 1907 1908/** 1909 * xmlParseNmtoken: 1910 * @ctxt: an XML parser context 1911 * 1912 * parse an XML Nmtoken. 1913 * 1914 * [7] Nmtoken ::= (NameChar)+ 1915 * 1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1917 * 1918 * Returns the Nmtoken parsed or NULL 1919 */ 1920 1921xmlChar * 1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1923 xmlChar buf[XML_MAX_NAMELEN + 5]; 1924 int len = 0, l; 1925 int c; 1926 int count = 0; 1927 1928 GROW; 1929 c = CUR_CHAR(l); 1930 1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1932 (c == '.') || (c == '-') || 1933 (c == '_') || (c == ':') || 1934 (IS_COMBINING(c)) || 1935 (IS_EXTENDER(c))) { 1936 if (count++ > 100) { 1937 count = 0; 1938 GROW; 1939 } 1940 COPY_BUF(l,buf,len,c); 1941 NEXTL(l); 1942 c = CUR_CHAR(l); 1943 if (len >= XML_MAX_NAMELEN) { 1944 /* 1945 * Okay someone managed to make a huge token, so he's ready to pay 1946 * for the processing speed. 1947 */ 1948 xmlChar *buffer; 1949 int max = len * 2; 1950 1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1952 if (buffer == NULL) { 1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1954 ctxt->sax->error(ctxt->userData, 1955 "xmlParseNmtoken: out of memory\n"); 1956 return(NULL); 1957 } 1958 memcpy(buffer, buf, len); 1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1960 (c == '.') || (c == '-') || 1961 (c == '_') || (c == ':') || 1962 (IS_COMBINING(c)) || 1963 (IS_EXTENDER(c))) { 1964 if (count++ > 100) { 1965 count = 0; 1966 GROW; 1967 } 1968 if (len + 10 > max) { 1969 max *= 2; 1970 buffer = (xmlChar *) xmlRealloc(buffer, 1971 max * sizeof(xmlChar)); 1972 if (buffer == NULL) { 1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1974 ctxt->sax->error(ctxt->userData, 1975 "xmlParseNmtoken: out of memory\n"); 1976 return(NULL); 1977 } 1978 } 1979 COPY_BUF(l,buffer,len,c); 1980 NEXTL(l); 1981 c = CUR_CHAR(l); 1982 } 1983 buffer[len] = 0; 1984 return(buffer); 1985 } 1986 } 1987 if (len == 0) 1988 return(NULL); 1989 return(xmlStrndup(buf, len)); 1990} 1991 1992/** 1993 * xmlParseEntityValue: 1994 * @ctxt: an XML parser context 1995 * @orig: if non-NULL store a copy of the original entity value 1996 * 1997 * parse a value for ENTITY declarations 1998 * 1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2000 * "'" ([^%&'] | PEReference | Reference)* "'" 2001 * 2002 * Returns the EntityValue parsed with reference substituted or NULL 2003 */ 2004 2005xmlChar * 2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2007 xmlChar *buf = NULL; 2008 int len = 0; 2009 int size = XML_PARSER_BUFFER_SIZE; 2010 int c, l; 2011 xmlChar stop; 2012 xmlChar *ret = NULL; 2013 const xmlChar *cur = NULL; 2014 xmlParserInputPtr input; 2015 2016 if (RAW == '"') stop = '"'; 2017 else if (RAW == '\'') stop = '\''; 2018 else { 2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2022 ctxt->wellFormed = 0; 2023 ctxt->disableSAX = 1; 2024 return(NULL); 2025 } 2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2027 if (buf == NULL) { 2028 xmlGenericError(xmlGenericErrorContext, 2029 "malloc of %d byte failed\n", size); 2030 return(NULL); 2031 } 2032 2033 /* 2034 * The content of the entity definition is copied in a buffer. 2035 */ 2036 2037 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2038 input = ctxt->input; 2039 GROW; 2040 NEXT; 2041 c = CUR_CHAR(l); 2042 /* 2043 * NOTE: 4.4.5 Included in Literal 2044 * When a parameter entity reference appears in a literal entity 2045 * value, ... a single or double quote character in the replacement 2046 * text is always treated as a normal data character and will not 2047 * terminate the literal. 2048 * In practice it means we stop the loop only when back at parsing 2049 * the initial entity and the quote is found 2050 */ 2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2052 (ctxt->input != input))) { 2053 if (len + 5 >= size) { 2054 size *= 2; 2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2056 if (buf == NULL) { 2057 xmlGenericError(xmlGenericErrorContext, 2058 "realloc of %d byte failed\n", size); 2059 return(NULL); 2060 } 2061 } 2062 COPY_BUF(l,buf,len,c); 2063 NEXTL(l); 2064 /* 2065 * Pop-up of finished entities. 2066 */ 2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2068 xmlPopInput(ctxt); 2069 2070 GROW; 2071 c = CUR_CHAR(l); 2072 if (c == 0) { 2073 GROW; 2074 c = CUR_CHAR(l); 2075 } 2076 } 2077 buf[len] = 0; 2078 2079 /* 2080 * Raise problem w.r.t. '&' and '%' being used in non-entities 2081 * reference constructs. Note Charref will be handled in 2082 * xmlStringDecodeEntities() 2083 */ 2084 cur = buf; 2085 while (*cur != 0) { /* non input consuming */ 2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2087 xmlChar *name; 2088 xmlChar tmp = *cur; 2089 2090 cur++; 2091 name = xmlParseStringName(ctxt, &cur); 2092 if ((name == NULL) || (*cur != ';')) { 2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2095 ctxt->sax->error(ctxt->userData, 2096 "EntityValue: '%c' forbidden except for entities references\n", 2097 tmp); 2098 ctxt->wellFormed = 0; 2099 ctxt->disableSAX = 1; 2100 } 2101 if ((tmp == '%') && (ctxt->inSubset == 1) && 2102 (ctxt->inputNr == 1)) { 2103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2105 ctxt->sax->error(ctxt->userData, 2106 "EntityValue: PEReferences forbidden in internal subset\n", 2107 tmp); 2108 ctxt->wellFormed = 0; 2109 ctxt->disableSAX = 1; 2110 } 2111 if (name != NULL) 2112 xmlFree(name); 2113 } 2114 cur++; 2115 } 2116 2117 /* 2118 * Then PEReference entities are substituted. 2119 */ 2120 if (c != stop) { 2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2124 ctxt->wellFormed = 0; 2125 ctxt->disableSAX = 1; 2126 xmlFree(buf); 2127 } else { 2128 NEXT; 2129 /* 2130 * NOTE: 4.4.7 Bypassed 2131 * When a general entity reference appears in the EntityValue in 2132 * an entity declaration, it is bypassed and left as is. 2133 * so XML_SUBSTITUTE_REF is not set here. 2134 */ 2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2136 0, 0, 0); 2137 if (orig != NULL) 2138 *orig = buf; 2139 else 2140 xmlFree(buf); 2141 } 2142 2143 return(ret); 2144} 2145 2146/** 2147 * xmlParseAttValue: 2148 * @ctxt: an XML parser context 2149 * 2150 * parse a value for an attribute 2151 * Note: the parser won't do substitution of entities here, this 2152 * will be handled later in xmlStringGetNodeList 2153 * 2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2155 * "'" ([^<&'] | Reference)* "'" 2156 * 2157 * 3.3.3 Attribute-Value Normalization: 2158 * Before the value of an attribute is passed to the application or 2159 * checked for validity, the XML processor must normalize it as follows: 2160 * - a character reference is processed by appending the referenced 2161 * character to the attribute value 2162 * - an entity reference is processed by recursively processing the 2163 * replacement text of the entity 2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2165 * appending #x20 to the normalized value, except that only a single 2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2167 * parsed entity or the literal entity value of an internal parsed entity 2168 * - other characters are processed by appending them to the normalized value 2169 * If the declared value is not CDATA, then the XML processor must further 2170 * process the normalized attribute value by discarding any leading and 2171 * trailing space (#x20) characters, and by replacing sequences of space 2172 * (#x20) characters by a single space (#x20) character. 2173 * All attributes for which no declaration has been read should be treated 2174 * by a non-validating parser as if declared CDATA. 2175 * 2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2177 */ 2178 2179xmlChar * 2180xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2181 xmlChar limit = 0; 2182 xmlChar *buf = NULL; 2183 int len = 0; 2184 int buf_size = 0; 2185 int c, l; 2186 xmlChar *current = NULL; 2187 xmlEntityPtr ent; 2188 2189 2190 SHRINK; 2191 if (NXT(0) == '"') { 2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2193 limit = '"'; 2194 NEXT; 2195 } else if (NXT(0) == '\'') { 2196 limit = '\''; 2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2198 NEXT; 2199 } else { 2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2203 ctxt->wellFormed = 0; 2204 ctxt->disableSAX = 1; 2205 return(NULL); 2206 } 2207 2208 /* 2209 * allocate a translation buffer. 2210 */ 2211 buf_size = XML_PARSER_BUFFER_SIZE; 2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2213 if (buf == NULL) { 2214 perror("xmlParseAttValue: malloc failed"); 2215 return(NULL); 2216 } 2217 2218 /* 2219 * OK loop until we reach one of the ending char or a size limit. 2220 */ 2221 c = CUR_CHAR(l); 2222 while (((NXT(0) != limit) && /* checked */ 2223 (c != '<')) || (ctxt->token != 0)) { 2224 if (c == 0) break; 2225 if (ctxt->token == '&') { 2226 if (ctxt->replaceEntities) { 2227 if (len > buf_size - 10) { 2228 growBuffer(buf); 2229 } 2230 buf[len++] = '&'; 2231 } else { 2232 /* 2233 * The reparsing will be done in xmlStringGetNodeList() 2234 * called by the attribute() function in SAX.c 2235 */ 2236 static xmlChar buffer[6] = "&"; 2237 2238 if (len > buf_size - 10) { 2239 growBuffer(buf); 2240 } 2241 current = &buffer[0]; 2242 while (*current != 0) { /* non input consuming */ 2243 buf[len++] = *current++; 2244 } 2245 ctxt->token = 0; 2246 } 2247 } else if (c == '&') { 2248 if (NXT(1) == '#') { 2249 int val = xmlParseCharRef(ctxt); 2250 if (val == '&') { 2251 if (ctxt->replaceEntities) { 2252 if (len > buf_size - 10) { 2253 growBuffer(buf); 2254 } 2255 buf[len++] = '&'; 2256 } else { 2257 /* 2258 * The reparsing will be done in xmlStringGetNodeList() 2259 * called by the attribute() function in SAX.c 2260 */ 2261 static xmlChar buffer[6] = "&"; 2262 2263 if (len > buf_size - 10) { 2264 growBuffer(buf); 2265 } 2266 current = &buffer[0]; 2267 while (*current != 0) { /* non input consuming */ 2268 buf[len++] = *current++; 2269 } 2270 } 2271 } else { 2272 if (len > buf_size - 10) { 2273 growBuffer(buf); 2274 } 2275 len += xmlCopyChar(0, &buf[len], val); 2276 } 2277 } else { 2278 ent = xmlParseEntityRef(ctxt); 2279 if ((ent != NULL) && 2280 (ctxt->replaceEntities != 0)) { 2281 xmlChar *rep; 2282 2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2284 rep = xmlStringDecodeEntities(ctxt, ent->content, 2285 XML_SUBSTITUTE_REF, 0, 0, 0); 2286 if (rep != NULL) { 2287 current = rep; 2288 while (*current != 0) { /* non input consuming */ 2289 buf[len++] = *current++; 2290 if (len > buf_size - 10) { 2291 growBuffer(buf); 2292 } 2293 } 2294 xmlFree(rep); 2295 } 2296 } else { 2297 if (len > buf_size - 10) { 2298 growBuffer(buf); 2299 } 2300 if (ent->content != NULL) 2301 buf[len++] = ent->content[0]; 2302 } 2303 } else if (ent != NULL) { 2304 int i = xmlStrlen(ent->name); 2305 const xmlChar *cur = ent->name; 2306 2307 /* 2308 * This may look absurd but is needed to detect 2309 * entities problems 2310 */ 2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2312 (ent->content != NULL)) { 2313 xmlChar *rep; 2314 rep = xmlStringDecodeEntities(ctxt, ent->content, 2315 XML_SUBSTITUTE_REF, 0, 0, 0); 2316 if (rep != NULL) 2317 xmlFree(rep); 2318 } 2319 2320 /* 2321 * Just output the reference 2322 */ 2323 buf[len++] = '&'; 2324 if (len > buf_size - i - 10) { 2325 growBuffer(buf); 2326 } 2327 for (;i > 0;i--) 2328 buf[len++] = *cur++; 2329 buf[len++] = ';'; 2330 } 2331 } 2332 } else { 2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2334 COPY_BUF(l,buf,len,0x20); 2335 if (len > buf_size - 10) { 2336 growBuffer(buf); 2337 } 2338 } else { 2339 COPY_BUF(l,buf,len,c); 2340 if (len > buf_size - 10) { 2341 growBuffer(buf); 2342 } 2343 } 2344 NEXTL(l); 2345 } 2346 GROW; 2347 c = CUR_CHAR(l); 2348 } 2349 buf[len++] = 0; 2350 if (RAW == '<') { 2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2353 ctxt->sax->error(ctxt->userData, 2354 "Unescaped '<' not allowed in attributes values\n"); 2355 ctxt->wellFormed = 0; 2356 ctxt->disableSAX = 1; 2357 } else if (RAW != limit) { 2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2361 ctxt->wellFormed = 0; 2362 ctxt->disableSAX = 1; 2363 } else 2364 NEXT; 2365 return(buf); 2366} 2367 2368/** 2369 * xmlParseSystemLiteral: 2370 * @ctxt: an XML parser context 2371 * 2372 * parse an XML Literal 2373 * 2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2375 * 2376 * Returns the SystemLiteral parsed or NULL 2377 */ 2378 2379xmlChar * 2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2381 xmlChar *buf = NULL; 2382 int len = 0; 2383 int size = XML_PARSER_BUFFER_SIZE; 2384 int cur, l; 2385 xmlChar stop; 2386 int state = ctxt->instate; 2387 int count = 0; 2388 2389 SHRINK; 2390 if (RAW == '"') { 2391 NEXT; 2392 stop = '"'; 2393 } else if (RAW == '\'') { 2394 NEXT; 2395 stop = '\''; 2396 } else { 2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2399 ctxt->sax->error(ctxt->userData, 2400 "SystemLiteral \" or ' expected\n"); 2401 ctxt->wellFormed = 0; 2402 ctxt->disableSAX = 1; 2403 return(NULL); 2404 } 2405 2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2407 if (buf == NULL) { 2408 xmlGenericError(xmlGenericErrorContext, 2409 "malloc of %d byte failed\n", size); 2410 return(NULL); 2411 } 2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2413 cur = CUR_CHAR(l); 2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2415 if (len + 5 >= size) { 2416 size *= 2; 2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2418 if (buf == NULL) { 2419 xmlGenericError(xmlGenericErrorContext, 2420 "realloc of %d byte failed\n", size); 2421 ctxt->instate = (xmlParserInputState) state; 2422 return(NULL); 2423 } 2424 } 2425 count++; 2426 if (count > 50) { 2427 GROW; 2428 count = 0; 2429 } 2430 COPY_BUF(l,buf,len,cur); 2431 NEXTL(l); 2432 cur = CUR_CHAR(l); 2433 if (cur == 0) { 2434 GROW; 2435 SHRINK; 2436 cur = CUR_CHAR(l); 2437 } 2438 } 2439 buf[len] = 0; 2440 ctxt->instate = (xmlParserInputState) state; 2441 if (!IS_CHAR(cur)) { 2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2445 ctxt->wellFormed = 0; 2446 ctxt->disableSAX = 1; 2447 } else { 2448 NEXT; 2449 } 2450 return(buf); 2451} 2452 2453/** 2454 * xmlParsePubidLiteral: 2455 * @ctxt: an XML parser context 2456 * 2457 * parse an XML public literal 2458 * 2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2460 * 2461 * Returns the PubidLiteral parsed or NULL. 2462 */ 2463 2464xmlChar * 2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2466 xmlChar *buf = NULL; 2467 int len = 0; 2468 int size = XML_PARSER_BUFFER_SIZE; 2469 xmlChar cur; 2470 xmlChar stop; 2471 int count = 0; 2472 xmlParserInputState oldstate = ctxt->instate; 2473 2474 SHRINK; 2475 if (RAW == '"') { 2476 NEXT; 2477 stop = '"'; 2478 } else if (RAW == '\'') { 2479 NEXT; 2480 stop = '\''; 2481 } else { 2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2484 ctxt->sax->error(ctxt->userData, 2485 "SystemLiteral \" or ' expected\n"); 2486 ctxt->wellFormed = 0; 2487 ctxt->disableSAX = 1; 2488 return(NULL); 2489 } 2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2491 if (buf == NULL) { 2492 xmlGenericError(xmlGenericErrorContext, 2493 "malloc of %d byte failed\n", size); 2494 return(NULL); 2495 } 2496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 2497 cur = CUR; 2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2499 if (len + 1 >= size) { 2500 size *= 2; 2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2502 if (buf == NULL) { 2503 xmlGenericError(xmlGenericErrorContext, 2504 "realloc of %d byte failed\n", size); 2505 return(NULL); 2506 } 2507 } 2508 buf[len++] = cur; 2509 count++; 2510 if (count > 50) { 2511 GROW; 2512 count = 0; 2513 } 2514 NEXT; 2515 cur = CUR; 2516 if (cur == 0) { 2517 GROW; 2518 SHRINK; 2519 cur = CUR; 2520 } 2521 } 2522 buf[len] = 0; 2523 if (cur != stop) { 2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2527 ctxt->wellFormed = 0; 2528 ctxt->disableSAX = 1; 2529 } else { 2530 NEXT; 2531 } 2532 ctxt->instate = oldstate; 2533 return(buf); 2534} 2535 2536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2537/** 2538 * xmlParseCharData: 2539 * @ctxt: an XML parser context 2540 * @cdata: int indicating whether we are within a CDATA section 2541 * 2542 * parse a CharData section. 2543 * if we are within a CDATA section ']]>' marks an end of section. 2544 * 2545 * The right angle bracket (>) may be represented using the string ">", 2546 * and must, for compatibility, be escaped using ">" or a character 2547 * reference when it appears in the string "]]>" in content, when that 2548 * string is not marking the end of a CDATA section. 2549 * 2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2551 */ 2552 2553void 2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2555 const xmlChar *in; 2556 int nbchar = 0; 2557 int line = ctxt->input->line; 2558 int col = ctxt->input->col; 2559 2560 SHRINK; 2561 GROW; 2562 /* 2563 * Accelerated common case where input don't need to be 2564 * modified before passing it to the handler. 2565 */ 2566 if ((ctxt->token == 0) && (!cdata)) { 2567 in = ctxt->input->cur; 2568 do { 2569get_more: 2570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') && 2571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2572 in++; 2573 if (*in == 0xA) { 2574 ctxt->input->line++; 2575 in++; 2576 while (*in == 0xA) { 2577 ctxt->input->line++; 2578 in++; 2579 } 2580 goto get_more; 2581 } 2582 if (*in == ']') { 2583 if ((in[1] == ']') && (in[2] == '>')) { 2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2586 ctxt->sax->error(ctxt->userData, 2587 "Sequence ']]>' not allowed in content\n"); 2588 ctxt->input->cur = in; 2589 ctxt->wellFormed = 0; 2590 ctxt->disableSAX = 1; 2591 return; 2592 } 2593 in++; 2594 goto get_more; 2595 } 2596 nbchar = in - ctxt->input->cur; 2597 if (nbchar > 0) { 2598 if (IS_BLANK(*ctxt->input->cur)) { 2599 const xmlChar *tmp = ctxt->input->cur; 2600 ctxt->input->cur = in; 2601 if (areBlanks(ctxt, tmp, nbchar)) { 2602 if (ctxt->sax->ignorableWhitespace != NULL) 2603 ctxt->sax->ignorableWhitespace(ctxt->userData, 2604 tmp, nbchar); 2605 } else { 2606 if (ctxt->sax->characters != NULL) 2607 ctxt->sax->characters(ctxt->userData, 2608 tmp, nbchar); 2609 } 2610 line = ctxt->input->line; 2611 col = ctxt->input->col; 2612 } else { 2613 if (ctxt->sax->characters != NULL) 2614 ctxt->sax->characters(ctxt->userData, 2615 ctxt->input->cur, nbchar); 2616 line = ctxt->input->line; 2617 col = ctxt->input->col; 2618 } 2619 } 2620 ctxt->input->cur = in; 2621 if (*in == 0xD) { 2622 in++; 2623 if (*in == 0xA) { 2624 ctxt->input->cur = in; 2625 in++; 2626 ctxt->input->line++; 2627 continue; /* while */ 2628 } 2629 in--; 2630 } 2631 if (*in == '<') { 2632 return; 2633 } 2634 if (*in == '&') { 2635 return; 2636 } 2637 SHRINK; 2638 GROW; 2639 in = ctxt->input->cur; 2640 } while ((*in >= 0x20) && (*in <= 0x7F)); 2641 nbchar = 0; 2642 } 2643 ctxt->input->line = line; 2644 ctxt->input->col = col; 2645 xmlParseCharDataComplex(ctxt, cdata); 2646} 2647 2648void 2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2651 int nbchar = 0; 2652 int cur, l; 2653 int count = 0; 2654 2655 SHRINK; 2656 GROW; 2657 cur = CUR_CHAR(l); 2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2659 ((cur != '&') || (ctxt->token == '&')) && 2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2661 if ((cur == ']') && (NXT(1) == ']') && 2662 (NXT(2) == '>')) { 2663 if (cdata) break; 2664 else { 2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2667 ctxt->sax->error(ctxt->userData, 2668 "Sequence ']]>' not allowed in content\n"); 2669 /* Should this be relaxed ??? I see a "must here */ 2670 ctxt->wellFormed = 0; 2671 ctxt->disableSAX = 1; 2672 } 2673 } 2674 COPY_BUF(l,buf,nbchar,cur); 2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2676 /* 2677 * OK the segment is to be consumed as chars. 2678 */ 2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2680 if (areBlanks(ctxt, buf, nbchar)) { 2681 if (ctxt->sax->ignorableWhitespace != NULL) 2682 ctxt->sax->ignorableWhitespace(ctxt->userData, 2683 buf, nbchar); 2684 } else { 2685 if (ctxt->sax->characters != NULL) 2686 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2687 } 2688 } 2689 nbchar = 0; 2690 } 2691 count++; 2692 if (count > 50) { 2693 GROW; 2694 count = 0; 2695 } 2696 NEXTL(l); 2697 cur = CUR_CHAR(l); 2698 } 2699 if (nbchar != 0) { 2700 /* 2701 * OK the segment is to be consumed as chars. 2702 */ 2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2704 if (areBlanks(ctxt, buf, nbchar)) { 2705 if (ctxt->sax->ignorableWhitespace != NULL) 2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2707 } else { 2708 if (ctxt->sax->characters != NULL) 2709 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2710 } 2711 } 2712 } 2713} 2714 2715/** 2716 * xmlParseExternalID: 2717 * @ctxt: an XML parser context 2718 * @publicID: a xmlChar** receiving PubidLiteral 2719 * @strict: indicate whether we should restrict parsing to only 2720 * production [75], see NOTE below 2721 * 2722 * Parse an External ID or a Public ID 2723 * 2724 * NOTE: Productions [75] and [83] interact badly since [75] can generate 2725 * 'PUBLIC' S PubidLiteral S SystemLiteral 2726 * 2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2729 * 2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2731 * 2732 * Returns the function returns SystemLiteral and in the second 2733 * case publicID receives PubidLiteral, is strict is off 2734 * it is possible to return NULL and have publicID set. 2735 */ 2736 2737xmlChar * 2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2739 xmlChar *URI = NULL; 2740 2741 SHRINK; 2742 2743 *publicID = NULL; 2744 if ((RAW == 'S') && (NXT(1) == 'Y') && 2745 (NXT(2) == 'S') && (NXT(3) == 'T') && 2746 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2747 SKIP(6); 2748 if (!IS_BLANK(CUR)) { 2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2751 ctxt->sax->error(ctxt->userData, 2752 "Space required after 'SYSTEM'\n"); 2753 ctxt->wellFormed = 0; 2754 ctxt->disableSAX = 1; 2755 } 2756 SKIP_BLANKS; 2757 URI = xmlParseSystemLiteral(ctxt); 2758 if (URI == NULL) { 2759 ctxt->errNo = XML_ERR_URI_REQUIRED; 2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2761 ctxt->sax->error(ctxt->userData, 2762 "xmlParseExternalID: SYSTEM, no URI\n"); 2763 ctxt->wellFormed = 0; 2764 ctxt->disableSAX = 1; 2765 } 2766 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2767 (NXT(2) == 'B') && (NXT(3) == 'L') && 2768 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2769 SKIP(6); 2770 if (!IS_BLANK(CUR)) { 2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2773 ctxt->sax->error(ctxt->userData, 2774 "Space required after 'PUBLIC'\n"); 2775 ctxt->wellFormed = 0; 2776 ctxt->disableSAX = 1; 2777 } 2778 SKIP_BLANKS; 2779 *publicID = xmlParsePubidLiteral(ctxt); 2780 if (*publicID == NULL) { 2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2783 ctxt->sax->error(ctxt->userData, 2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2785 ctxt->wellFormed = 0; 2786 ctxt->disableSAX = 1; 2787 } 2788 if (strict) { 2789 /* 2790 * We don't handle [83] so "S SystemLiteral" is required. 2791 */ 2792 if (!IS_BLANK(CUR)) { 2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2795 ctxt->sax->error(ctxt->userData, 2796 "Space required after the Public Identifier\n"); 2797 ctxt->wellFormed = 0; 2798 ctxt->disableSAX = 1; 2799 } 2800 } else { 2801 /* 2802 * We handle [83] so we return immediately, if 2803 * "S SystemLiteral" is not detected. From a purely parsing 2804 * point of view that's a nice mess. 2805 */ 2806 const xmlChar *ptr; 2807 GROW; 2808 2809 ptr = CUR_PTR; 2810 if (!IS_BLANK(*ptr)) return(NULL); 2811 2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2814 } 2815 SKIP_BLANKS; 2816 URI = xmlParseSystemLiteral(ctxt); 2817 if (URI == NULL) { 2818 ctxt->errNo = XML_ERR_URI_REQUIRED; 2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2820 ctxt->sax->error(ctxt->userData, 2821 "xmlParseExternalID: PUBLIC, no URI\n"); 2822 ctxt->wellFormed = 0; 2823 ctxt->disableSAX = 1; 2824 } 2825 } 2826 return(URI); 2827} 2828 2829/** 2830 * xmlParseComment: 2831 * @ctxt: an XML parser context 2832 * 2833 * Skip an XML (SGML) comment <!-- .... --> 2834 * The spec says that "For compatibility, the string "--" (double-hyphen) 2835 * must not occur within comments. " 2836 * 2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2838 */ 2839void 2840xmlParseComment(xmlParserCtxtPtr ctxt) { 2841 xmlChar *buf = NULL; 2842 int len; 2843 int size = XML_PARSER_BUFFER_SIZE; 2844 int q, ql; 2845 int r, rl; 2846 int cur, l; 2847 xmlParserInputState state; 2848 xmlParserInputPtr input = ctxt->input; 2849 int count = 0; 2850 2851 /* 2852 * Check that there is a comment right here. 2853 */ 2854 if ((RAW != '<') || (NXT(1) != '!') || 2855 (NXT(2) != '-') || (NXT(3) != '-')) return; 2856 2857 state = ctxt->instate; 2858 ctxt->instate = XML_PARSER_COMMENT; 2859 SHRINK; 2860 SKIP(4); 2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2862 if (buf == NULL) { 2863 xmlGenericError(xmlGenericErrorContext, 2864 "malloc of %d byte failed\n", size); 2865 ctxt->instate = state; 2866 return; 2867 } 2868 q = CUR_CHAR(ql); 2869 NEXTL(ql); 2870 r = CUR_CHAR(rl); 2871 NEXTL(rl); 2872 cur = CUR_CHAR(l); 2873 len = 0; 2874 while (IS_CHAR(cur) && /* checked */ 2875 ((cur != '>') || 2876 (r != '-') || (q != '-'))) { 2877 if ((r == '-') && (q == '-')) { 2878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2880 ctxt->sax->error(ctxt->userData, 2881 "Comment must not contain '--' (double-hyphen)`\n"); 2882 ctxt->wellFormed = 0; 2883 ctxt->disableSAX = 1; 2884 } 2885 if (len + 5 >= size) { 2886 size *= 2; 2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2888 if (buf == NULL) { 2889 xmlGenericError(xmlGenericErrorContext, 2890 "realloc of %d byte failed\n", size); 2891 ctxt->instate = state; 2892 return; 2893 } 2894 } 2895 COPY_BUF(ql,buf,len,q); 2896 q = r; 2897 ql = rl; 2898 r = cur; 2899 rl = l; 2900 2901 count++; 2902 if (count > 50) { 2903 GROW; 2904 count = 0; 2905 } 2906 NEXTL(l); 2907 cur = CUR_CHAR(l); 2908 if (cur == 0) { 2909 SHRINK; 2910 GROW; 2911 cur = CUR_CHAR(l); 2912 } 2913 } 2914 buf[len] = 0; 2915 if (!IS_CHAR(cur)) { 2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2918 ctxt->sax->error(ctxt->userData, 2919 "Comment not terminated \n<!--%.50s\n", buf); 2920 ctxt->wellFormed = 0; 2921 ctxt->disableSAX = 1; 2922 xmlFree(buf); 2923 } else { 2924 if (input != ctxt->input) { 2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2927 ctxt->sax->error(ctxt->userData, 2928"Comment doesn't start and stop in the same entity\n"); 2929 ctxt->wellFormed = 0; 2930 ctxt->disableSAX = 1; 2931 } 2932 NEXT; 2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2934 (!ctxt->disableSAX)) 2935 ctxt->sax->comment(ctxt->userData, buf); 2936 xmlFree(buf); 2937 } 2938 ctxt->instate = state; 2939} 2940 2941/** 2942 * xmlParsePITarget: 2943 * @ctxt: an XML parser context 2944 * 2945 * parse the name of a PI 2946 * 2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2948 * 2949 * Returns the PITarget name or NULL 2950 */ 2951 2952xmlChar * 2953xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2954 xmlChar *name; 2955 2956 name = xmlParseName(ctxt); 2957 if ((name != NULL) && 2958 ((name[0] == 'x') || (name[0] == 'X')) && 2959 ((name[1] == 'm') || (name[1] == 'M')) && 2960 ((name[2] == 'l') || (name[2] == 'L'))) { 2961 int i; 2962 if ((name[0] == 'x') && (name[1] == 'm') && 2963 (name[2] == 'l') && (name[3] == 0)) { 2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2966 ctxt->sax->error(ctxt->userData, 2967 "XML declaration allowed only at the start of the document\n"); 2968 ctxt->wellFormed = 0; 2969 ctxt->disableSAX = 1; 2970 return(name); 2971 } else if (name[3] == 0) { 2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2975 ctxt->wellFormed = 0; 2976 ctxt->disableSAX = 1; 2977 return(name); 2978 } 2979 for (i = 0;;i++) { 2980 if (xmlW3CPIs[i] == NULL) break; 2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2982 return(name); 2983 } 2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2986 ctxt->sax->warning(ctxt->userData, 2987 "xmlParsePITarget: invalid name prefix 'xml'\n"); 2988 } 2989 } 2990 return(name); 2991} 2992 2993#ifdef LIBXML_CATALOG_ENABLED 2994/** 2995 * xmlParseCatalogPI: 2996 * @ctxt: an XML parser context 2997 * @catalog: the PI value string 2998 * 2999 * parse an XML Catalog Processing Instruction. 3000 * 3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3002 * 3003 * Occurs only if allowed by the user and if happening in the Misc 3004 * part of the document before any doctype informations 3005 * This will add the given catalog to the parsing context in order 3006 * to be used if there is a resolution need further down in the document 3007 */ 3008 3009static void 3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3011 xmlChar *URL = NULL; 3012 const xmlChar *tmp, *base; 3013 xmlChar marker; 3014 3015 tmp = catalog; 3016 while (IS_BLANK(*tmp)) tmp++; 3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3018 goto error; 3019 tmp += 7; 3020 while (IS_BLANK(*tmp)) tmp++; 3021 if (*tmp != '=') { 3022 return; 3023 } 3024 tmp++; 3025 while (IS_BLANK(*tmp)) tmp++; 3026 marker = *tmp; 3027 if ((marker != '\'') && (marker != '"')) 3028 goto error; 3029 tmp++; 3030 base = tmp; 3031 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3032 if (*tmp == 0) 3033 goto error; 3034 URL = xmlStrndup(base, tmp - base); 3035 tmp++; 3036 while (IS_BLANK(*tmp)) tmp++; 3037 if (*tmp != 0) 3038 goto error; 3039 3040 if (URL != NULL) { 3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3042 xmlFree(URL); 3043 } 3044 return; 3045 3046error: 3047 ctxt->errNo = XML_WAR_CATALOG_PI; 3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3049 ctxt->sax->warning(ctxt->userData, 3050 "Catalog PI syntax error: %s\n", catalog); 3051 if (URL != NULL) 3052 xmlFree(URL); 3053} 3054#endif 3055 3056/** 3057 * xmlParsePI: 3058 * @ctxt: an XML parser context 3059 * 3060 * parse an XML Processing Instruction. 3061 * 3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3063 * 3064 * The processing is transfered to SAX once parsed. 3065 */ 3066 3067void 3068xmlParsePI(xmlParserCtxtPtr ctxt) { 3069 xmlChar *buf = NULL; 3070 int len = 0; 3071 int size = XML_PARSER_BUFFER_SIZE; 3072 int cur, l; 3073 xmlChar *target; 3074 xmlParserInputState state; 3075 int count = 0; 3076 3077 if ((RAW == '<') && (NXT(1) == '?')) { 3078 xmlParserInputPtr input = ctxt->input; 3079 state = ctxt->instate; 3080 ctxt->instate = XML_PARSER_PI; 3081 /* 3082 * this is a Processing Instruction. 3083 */ 3084 SKIP(2); 3085 SHRINK; 3086 3087 /* 3088 * Parse the target name and check for special support like 3089 * namespace. 3090 */ 3091 target = xmlParsePITarget(ctxt); 3092 if (target != NULL) { 3093 if ((RAW == '?') && (NXT(1) == '>')) { 3094 if (input != ctxt->input) { 3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3097 ctxt->sax->error(ctxt->userData, 3098 "PI declaration doesn't start and stop in the same entity\n"); 3099 ctxt->wellFormed = 0; 3100 ctxt->disableSAX = 1; 3101 } 3102 SKIP(2); 3103 3104 /* 3105 * SAX: PI detected. 3106 */ 3107 if ((ctxt->sax) && (!ctxt->disableSAX) && 3108 (ctxt->sax->processingInstruction != NULL)) 3109 ctxt->sax->processingInstruction(ctxt->userData, 3110 target, NULL); 3111 ctxt->instate = state; 3112 xmlFree(target); 3113 return; 3114 } 3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3116 if (buf == NULL) { 3117 xmlGenericError(xmlGenericErrorContext, 3118 "malloc of %d byte failed\n", size); 3119 ctxt->instate = state; 3120 return; 3121 } 3122 cur = CUR; 3123 if (!IS_BLANK(cur)) { 3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3126 ctxt->sax->error(ctxt->userData, 3127 "xmlParsePI: PI %s space expected\n", target); 3128 ctxt->wellFormed = 0; 3129 ctxt->disableSAX = 1; 3130 } 3131 SKIP_BLANKS; 3132 cur = CUR_CHAR(l); 3133 while (IS_CHAR(cur) && /* checked */ 3134 ((cur != '?') || (NXT(1) != '>'))) { 3135 if (len + 5 >= size) { 3136 size *= 2; 3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3138 if (buf == NULL) { 3139 xmlGenericError(xmlGenericErrorContext, 3140 "realloc of %d byte failed\n", size); 3141 ctxt->instate = state; 3142 return; 3143 } 3144 } 3145 count++; 3146 if (count > 50) { 3147 GROW; 3148 count = 0; 3149 } 3150 COPY_BUF(l,buf,len,cur); 3151 NEXTL(l); 3152 cur = CUR_CHAR(l); 3153 if (cur == 0) { 3154 SHRINK; 3155 GROW; 3156 cur = CUR_CHAR(l); 3157 } 3158 } 3159 buf[len] = 0; 3160 if (cur != '?') { 3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3163 ctxt->sax->error(ctxt->userData, 3164 "xmlParsePI: PI %s never end ...\n", target); 3165 ctxt->wellFormed = 0; 3166 ctxt->disableSAX = 1; 3167 } else { 3168 if (input != ctxt->input) { 3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3171 ctxt->sax->error(ctxt->userData, 3172 "PI declaration doesn't start and stop in the same entity\n"); 3173 ctxt->wellFormed = 0; 3174 ctxt->disableSAX = 1; 3175 } 3176 SKIP(2); 3177 3178#ifdef LIBXML_CATALOG_ENABLED 3179 if (((state == XML_PARSER_MISC) || 3180 (state == XML_PARSER_START)) && 3181 (xmlStrEqual(target, XML_CATALOG_PI))) { 3182 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3184 (allow == XML_CATA_ALLOW_ALL)) 3185 xmlParseCatalogPI(ctxt, buf); 3186 } 3187#endif 3188 3189 3190 /* 3191 * SAX: PI detected. 3192 */ 3193 if ((ctxt->sax) && (!ctxt->disableSAX) && 3194 (ctxt->sax->processingInstruction != NULL)) 3195 ctxt->sax->processingInstruction(ctxt->userData, 3196 target, buf); 3197 } 3198 xmlFree(buf); 3199 xmlFree(target); 3200 } else { 3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3203 ctxt->sax->error(ctxt->userData, 3204 "xmlParsePI : no target name\n"); 3205 ctxt->wellFormed = 0; 3206 ctxt->disableSAX = 1; 3207 } 3208 ctxt->instate = state; 3209 } 3210} 3211 3212/** 3213 * xmlParseNotationDecl: 3214 * @ctxt: an XML parser context 3215 * 3216 * parse a notation declaration 3217 * 3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3219 * 3220 * Hence there is actually 3 choices: 3221 * 'PUBLIC' S PubidLiteral 3222 * 'PUBLIC' S PubidLiteral S SystemLiteral 3223 * and 'SYSTEM' S SystemLiteral 3224 * 3225 * See the NOTE on xmlParseExternalID(). 3226 */ 3227 3228void 3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3230 xmlChar *name; 3231 xmlChar *Pubid; 3232 xmlChar *Systemid; 3233 3234 if ((RAW == '<') && (NXT(1) == '!') && 3235 (NXT(2) == 'N') && (NXT(3) == 'O') && 3236 (NXT(4) == 'T') && (NXT(5) == 'A') && 3237 (NXT(6) == 'T') && (NXT(7) == 'I') && 3238 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3239 xmlParserInputPtr input = ctxt->input; 3240 SHRINK; 3241 SKIP(10); 3242 if (!IS_BLANK(CUR)) { 3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3245 ctxt->sax->error(ctxt->userData, 3246 "Space required after '<!NOTATION'\n"); 3247 ctxt->wellFormed = 0; 3248 ctxt->disableSAX = 1; 3249 return; 3250 } 3251 SKIP_BLANKS; 3252 3253 name = xmlParseName(ctxt); 3254 if (name == NULL) { 3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3257 ctxt->sax->error(ctxt->userData, 3258 "NOTATION: Name expected here\n"); 3259 ctxt->wellFormed = 0; 3260 ctxt->disableSAX = 1; 3261 return; 3262 } 3263 if (!IS_BLANK(CUR)) { 3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3266 ctxt->sax->error(ctxt->userData, 3267 "Space required after the NOTATION name'\n"); 3268 ctxt->wellFormed = 0; 3269 ctxt->disableSAX = 1; 3270 return; 3271 } 3272 SKIP_BLANKS; 3273 3274 /* 3275 * Parse the IDs. 3276 */ 3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3278 SKIP_BLANKS; 3279 3280 if (RAW == '>') { 3281 if (input != ctxt->input) { 3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3284 ctxt->sax->error(ctxt->userData, 3285"Notation declaration doesn't start and stop in the same entity\n"); 3286 ctxt->wellFormed = 0; 3287 ctxt->disableSAX = 1; 3288 } 3289 NEXT; 3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3291 (ctxt->sax->notationDecl != NULL)) 3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3293 } else { 3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3296 ctxt->sax->error(ctxt->userData, 3297 "'>' required to close NOTATION declaration\n"); 3298 ctxt->wellFormed = 0; 3299 ctxt->disableSAX = 1; 3300 } 3301 xmlFree(name); 3302 if (Systemid != NULL) xmlFree(Systemid); 3303 if (Pubid != NULL) xmlFree(Pubid); 3304 } 3305} 3306 3307/** 3308 * xmlParseEntityDecl: 3309 * @ctxt: an XML parser context 3310 * 3311 * parse <!ENTITY declarations 3312 * 3313 * [70] EntityDecl ::= GEDecl | PEDecl 3314 * 3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3316 * 3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3318 * 3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3320 * 3321 * [74] PEDef ::= EntityValue | ExternalID 3322 * 3323 * [76] NDataDecl ::= S 'NDATA' S Name 3324 * 3325 * [ VC: Notation Declared ] 3326 * The Name must match the declared name of a notation. 3327 */ 3328 3329void 3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3331 xmlChar *name = NULL; 3332 xmlChar *value = NULL; 3333 xmlChar *URI = NULL, *literal = NULL; 3334 xmlChar *ndata = NULL; 3335 int isParameter = 0; 3336 xmlChar *orig = NULL; 3337 3338 GROW; 3339 if ((RAW == '<') && (NXT(1) == '!') && 3340 (NXT(2) == 'E') && (NXT(3) == 'N') && 3341 (NXT(4) == 'T') && (NXT(5) == 'I') && 3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3343 xmlParserInputPtr input = ctxt->input; 3344 ctxt->instate = XML_PARSER_ENTITY_DECL; 3345 SHRINK; 3346 SKIP(8); 3347 if (!IS_BLANK(CUR)) { 3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3350 ctxt->sax->error(ctxt->userData, 3351 "Space required after '<!ENTITY'\n"); 3352 ctxt->wellFormed = 0; 3353 ctxt->disableSAX = 1; 3354 } 3355 SKIP_BLANKS; 3356 3357 if (RAW == '%') { 3358 NEXT; 3359 if (!IS_BLANK(CUR)) { 3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3362 ctxt->sax->error(ctxt->userData, 3363 "Space required after '%'\n"); 3364 ctxt->wellFormed = 0; 3365 ctxt->disableSAX = 1; 3366 } 3367 SKIP_BLANKS; 3368 isParameter = 1; 3369 } 3370 3371 name = xmlParseName(ctxt); 3372 if (name == NULL) { 3373 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3376 ctxt->wellFormed = 0; 3377 ctxt->disableSAX = 1; 3378 return; 3379 } 3380 if (!IS_BLANK(CUR)) { 3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3383 ctxt->sax->error(ctxt->userData, 3384 "Space required after the entity name\n"); 3385 ctxt->wellFormed = 0; 3386 ctxt->disableSAX = 1; 3387 } 3388 SKIP_BLANKS; 3389 3390 /* 3391 * handle the various case of definitions... 3392 */ 3393 if (isParameter) { 3394 if ((RAW == '"') || (RAW == '\'')) { 3395 value = xmlParseEntityValue(ctxt, &orig); 3396 if (value) { 3397 if ((ctxt->sax != NULL) && 3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3399 ctxt->sax->entityDecl(ctxt->userData, name, 3400 XML_INTERNAL_PARAMETER_ENTITY, 3401 NULL, NULL, value); 3402 } 3403 } else { 3404 URI = xmlParseExternalID(ctxt, &literal, 1); 3405 if ((URI == NULL) && (literal == NULL)) { 3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3408 ctxt->sax->error(ctxt->userData, 3409 "Entity value required\n"); 3410 ctxt->wellFormed = 0; 3411 ctxt->disableSAX = 1; 3412 } 3413 if (URI) { 3414 xmlURIPtr uri; 3415 3416 uri = xmlParseURI((const char *) URI); 3417 if (uri == NULL) { 3418 ctxt->errNo = XML_ERR_INVALID_URI; 3419 if ((ctxt->sax != NULL) && 3420 (!ctxt->disableSAX) && 3421 (ctxt->sax->error != NULL)) 3422 ctxt->sax->error(ctxt->userData, 3423 "Invalid URI: %s\n", URI); 3424 /* 3425 * This really ought to be a well formedness error 3426 * but the XML Core WG decided otherwise c.f. issue 3427 * E26 of the XML erratas. 3428 */ 3429 } else { 3430 if (uri->fragment != NULL) { 3431 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3432 if ((ctxt->sax != NULL) && 3433 (!ctxt->disableSAX) && 3434 (ctxt->sax->error != NULL)) 3435 ctxt->sax->error(ctxt->userData, 3436 "Fragment not allowed: %s\n", URI); 3437 /* 3438 * Okay this is foolish to block those but not 3439 * invalid URIs. 3440 */ 3441 ctxt->wellFormed = 0; 3442 } else { 3443 if ((ctxt->sax != NULL) && 3444 (!ctxt->disableSAX) && 3445 (ctxt->sax->entityDecl != NULL)) 3446 ctxt->sax->entityDecl(ctxt->userData, name, 3447 XML_EXTERNAL_PARAMETER_ENTITY, 3448 literal, URI, NULL); 3449 } 3450 xmlFreeURI(uri); 3451 } 3452 } 3453 } 3454 } else { 3455 if ((RAW == '"') || (RAW == '\'')) { 3456 value = xmlParseEntityValue(ctxt, &orig); 3457 if ((ctxt->sax != NULL) && 3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3459 ctxt->sax->entityDecl(ctxt->userData, name, 3460 XML_INTERNAL_GENERAL_ENTITY, 3461 NULL, NULL, value); 3462 } else { 3463 URI = xmlParseExternalID(ctxt, &literal, 1); 3464 if ((URI == NULL) && (literal == NULL)) { 3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3467 ctxt->sax->error(ctxt->userData, 3468 "Entity value required\n"); 3469 ctxt->wellFormed = 0; 3470 ctxt->disableSAX = 1; 3471 } 3472 if (URI) { 3473 xmlURIPtr uri; 3474 3475 uri = xmlParseURI((const char *)URI); 3476 if (uri == NULL) { 3477 ctxt->errNo = XML_ERR_INVALID_URI; 3478 if ((ctxt->sax != NULL) && 3479 (!ctxt->disableSAX) && 3480 (ctxt->sax->error != NULL)) 3481 ctxt->sax->error(ctxt->userData, 3482 "Invalid URI: %s\n", URI); 3483 /* 3484 * This really ought to be a well formedness error 3485 * but the XML Core WG decided otherwise c.f. issue 3486 * E26 of the XML erratas. 3487 */ 3488 } else { 3489 if (uri->fragment != NULL) { 3490 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3491 if ((ctxt->sax != NULL) && 3492 (!ctxt->disableSAX) && 3493 (ctxt->sax->error != NULL)) 3494 ctxt->sax->error(ctxt->userData, 3495 "Fragment not allowed: %s\n", URI); 3496 /* 3497 * Okay this is foolish to block those but not 3498 * invalid URIs. 3499 */ 3500 ctxt->wellFormed = 0; 3501 } 3502 xmlFreeURI(uri); 3503 } 3504 } 3505 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3508 ctxt->sax->error(ctxt->userData, 3509 "Space required before 'NDATA'\n"); 3510 ctxt->wellFormed = 0; 3511 ctxt->disableSAX = 1; 3512 } 3513 SKIP_BLANKS; 3514 if ((RAW == 'N') && (NXT(1) == 'D') && 3515 (NXT(2) == 'A') && (NXT(3) == 'T') && 3516 (NXT(4) == 'A')) { 3517 SKIP(5); 3518 if (!IS_BLANK(CUR)) { 3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3521 ctxt->sax->error(ctxt->userData, 3522 "Space required after 'NDATA'\n"); 3523 ctxt->wellFormed = 0; 3524 ctxt->disableSAX = 1; 3525 } 3526 SKIP_BLANKS; 3527 ndata = xmlParseName(ctxt); 3528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3529 (ctxt->sax->unparsedEntityDecl != NULL)) 3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3531 literal, URI, ndata); 3532 } else { 3533 if ((ctxt->sax != NULL) && 3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3535 ctxt->sax->entityDecl(ctxt->userData, name, 3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3537 literal, URI, NULL); 3538 } 3539 } 3540 } 3541 SKIP_BLANKS; 3542 if (RAW != '>') { 3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3545 ctxt->sax->error(ctxt->userData, 3546 "xmlParseEntityDecl: entity %s not terminated\n", name); 3547 ctxt->wellFormed = 0; 3548 ctxt->disableSAX = 1; 3549 } else { 3550 if (input != ctxt->input) { 3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3553 ctxt->sax->error(ctxt->userData, 3554"Entity declaration doesn't start and stop in the same entity\n"); 3555 ctxt->wellFormed = 0; 3556 ctxt->disableSAX = 1; 3557 } 3558 NEXT; 3559 } 3560 if (orig != NULL) { 3561 /* 3562 * Ugly mechanism to save the raw entity value. 3563 */ 3564 xmlEntityPtr cur = NULL; 3565 3566 if (isParameter) { 3567 if ((ctxt->sax != NULL) && 3568 (ctxt->sax->getParameterEntity != NULL)) 3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3570 } else { 3571 if ((ctxt->sax != NULL) && 3572 (ctxt->sax->getEntity != NULL)) 3573 cur = ctxt->sax->getEntity(ctxt->userData, name); 3574 } 3575 if (cur != NULL) { 3576 if (cur->orig != NULL) 3577 xmlFree(orig); 3578 else 3579 cur->orig = orig; 3580 } else 3581 xmlFree(orig); 3582 } 3583 if (name != NULL) xmlFree(name); 3584 if (value != NULL) xmlFree(value); 3585 if (URI != NULL) xmlFree(URI); 3586 if (literal != NULL) xmlFree(literal); 3587 if (ndata != NULL) xmlFree(ndata); 3588 } 3589} 3590 3591/** 3592 * xmlParseDefaultDecl: 3593 * @ctxt: an XML parser context 3594 * @value: Receive a possible fixed default value for the attribute 3595 * 3596 * Parse an attribute default declaration 3597 * 3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3599 * 3600 * [ VC: Required Attribute ] 3601 * if the default declaration is the keyword #REQUIRED, then the 3602 * attribute must be specified for all elements of the type in the 3603 * attribute-list declaration. 3604 * 3605 * [ VC: Attribute Default Legal ] 3606 * The declared default value must meet the lexical constraints of 3607 * the declared attribute type c.f. xmlValidateAttributeDecl() 3608 * 3609 * [ VC: Fixed Attribute Default ] 3610 * if an attribute has a default value declared with the #FIXED 3611 * keyword, instances of that attribute must match the default value. 3612 * 3613 * [ WFC: No < in Attribute Values ] 3614 * handled in xmlParseAttValue() 3615 * 3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3617 * or XML_ATTRIBUTE_FIXED. 3618 */ 3619 3620int 3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3622 int val; 3623 xmlChar *ret; 3624 3625 *value = NULL; 3626 if ((RAW == '#') && (NXT(1) == 'R') && 3627 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3628 (NXT(4) == 'U') && (NXT(5) == 'I') && 3629 (NXT(6) == 'R') && (NXT(7) == 'E') && 3630 (NXT(8) == 'D')) { 3631 SKIP(9); 3632 return(XML_ATTRIBUTE_REQUIRED); 3633 } 3634 if ((RAW == '#') && (NXT(1) == 'I') && 3635 (NXT(2) == 'M') && (NXT(3) == 'P') && 3636 (NXT(4) == 'L') && (NXT(5) == 'I') && 3637 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3638 SKIP(8); 3639 return(XML_ATTRIBUTE_IMPLIED); 3640 } 3641 val = XML_ATTRIBUTE_NONE; 3642 if ((RAW == '#') && (NXT(1) == 'F') && 3643 (NXT(2) == 'I') && (NXT(3) == 'X') && 3644 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3645 SKIP(6); 3646 val = XML_ATTRIBUTE_FIXED; 3647 if (!IS_BLANK(CUR)) { 3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3650 ctxt->sax->error(ctxt->userData, 3651 "Space required after '#FIXED'\n"); 3652 ctxt->wellFormed = 0; 3653 ctxt->disableSAX = 1; 3654 } 3655 SKIP_BLANKS; 3656 } 3657 ret = xmlParseAttValue(ctxt); 3658 ctxt->instate = XML_PARSER_DTD; 3659 if (ret == NULL) { 3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3661 ctxt->sax->error(ctxt->userData, 3662 "Attribute default value declaration error\n"); 3663 ctxt->wellFormed = 0; 3664 ctxt->disableSAX = 1; 3665 } else 3666 *value = ret; 3667 return(val); 3668} 3669 3670/** 3671 * xmlParseNotationType: 3672 * @ctxt: an XML parser context 3673 * 3674 * parse an Notation attribute type. 3675 * 3676 * Note: the leading 'NOTATION' S part has already being parsed... 3677 * 3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3679 * 3680 * [ VC: Notation Attributes ] 3681 * Values of this type must match one of the notation names included 3682 * in the declaration; all notation names in the declaration must be declared. 3683 * 3684 * Returns: the notation attribute tree built while parsing 3685 */ 3686 3687xmlEnumerationPtr 3688xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3689 xmlChar *name; 3690 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3691 3692 if (RAW != '(') { 3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3695 ctxt->sax->error(ctxt->userData, 3696 "'(' required to start 'NOTATION'\n"); 3697 ctxt->wellFormed = 0; 3698 ctxt->disableSAX = 1; 3699 return(NULL); 3700 } 3701 SHRINK; 3702 do { 3703 NEXT; 3704 SKIP_BLANKS; 3705 name = xmlParseName(ctxt); 3706 if (name == NULL) { 3707 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3709 ctxt->sax->error(ctxt->userData, 3710 "Name expected in NOTATION declaration\n"); 3711 ctxt->wellFormed = 0; 3712 ctxt->disableSAX = 1; 3713 return(ret); 3714 } 3715 cur = xmlCreateEnumeration(name); 3716 xmlFree(name); 3717 if (cur == NULL) return(ret); 3718 if (last == NULL) ret = last = cur; 3719 else { 3720 last->next = cur; 3721 last = cur; 3722 } 3723 SKIP_BLANKS; 3724 } while (RAW == '|'); 3725 if (RAW != ')') { 3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3728 ctxt->sax->error(ctxt->userData, 3729 "')' required to finish NOTATION declaration\n"); 3730 ctxt->wellFormed = 0; 3731 ctxt->disableSAX = 1; 3732 if ((last != NULL) && (last != ret)) 3733 xmlFreeEnumeration(last); 3734 return(ret); 3735 } 3736 NEXT; 3737 return(ret); 3738} 3739 3740/** 3741 * xmlParseEnumerationType: 3742 * @ctxt: an XML parser context 3743 * 3744 * parse an Enumeration attribute type. 3745 * 3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3747 * 3748 * [ VC: Enumeration ] 3749 * Values of this type must match one of the Nmtoken tokens in 3750 * the declaration 3751 * 3752 * Returns: the enumeration attribute tree built while parsing 3753 */ 3754 3755xmlEnumerationPtr 3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3757 xmlChar *name; 3758 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3759 3760 if (RAW != '(') { 3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3763 ctxt->sax->error(ctxt->userData, 3764 "'(' required to start ATTLIST enumeration\n"); 3765 ctxt->wellFormed = 0; 3766 ctxt->disableSAX = 1; 3767 return(NULL); 3768 } 3769 SHRINK; 3770 do { 3771 NEXT; 3772 SKIP_BLANKS; 3773 name = xmlParseNmtoken(ctxt); 3774 if (name == NULL) { 3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3777 ctxt->sax->error(ctxt->userData, 3778 "NmToken expected in ATTLIST enumeration\n"); 3779 ctxt->wellFormed = 0; 3780 ctxt->disableSAX = 1; 3781 return(ret); 3782 } 3783 cur = xmlCreateEnumeration(name); 3784 xmlFree(name); 3785 if (cur == NULL) return(ret); 3786 if (last == NULL) ret = last = cur; 3787 else { 3788 last->next = cur; 3789 last = cur; 3790 } 3791 SKIP_BLANKS; 3792 } while (RAW == '|'); 3793 if (RAW != ')') { 3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3796 ctxt->sax->error(ctxt->userData, 3797 "')' required to finish ATTLIST enumeration\n"); 3798 ctxt->wellFormed = 0; 3799 ctxt->disableSAX = 1; 3800 return(ret); 3801 } 3802 NEXT; 3803 return(ret); 3804} 3805 3806/** 3807 * xmlParseEnumeratedType: 3808 * @ctxt: an XML parser context 3809 * @tree: the enumeration tree built while parsing 3810 * 3811 * parse an Enumerated attribute type. 3812 * 3813 * [57] EnumeratedType ::= NotationType | Enumeration 3814 * 3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3816 * 3817 * 3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3819 */ 3820 3821int 3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3823 if ((RAW == 'N') && (NXT(1) == 'O') && 3824 (NXT(2) == 'T') && (NXT(3) == 'A') && 3825 (NXT(4) == 'T') && (NXT(5) == 'I') && 3826 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3827 SKIP(8); 3828 if (!IS_BLANK(CUR)) { 3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3831 ctxt->sax->error(ctxt->userData, 3832 "Space required after 'NOTATION'\n"); 3833 ctxt->wellFormed = 0; 3834 ctxt->disableSAX = 1; 3835 return(0); 3836 } 3837 SKIP_BLANKS; 3838 *tree = xmlParseNotationType(ctxt); 3839 if (*tree == NULL) return(0); 3840 return(XML_ATTRIBUTE_NOTATION); 3841 } 3842 *tree = xmlParseEnumerationType(ctxt); 3843 if (*tree == NULL) return(0); 3844 return(XML_ATTRIBUTE_ENUMERATION); 3845} 3846 3847/** 3848 * xmlParseAttributeType: 3849 * @ctxt: an XML parser context 3850 * @tree: the enumeration tree built while parsing 3851 * 3852 * parse the Attribute list def for an element 3853 * 3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3855 * 3856 * [55] StringType ::= 'CDATA' 3857 * 3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3860 * 3861 * Validity constraints for attribute values syntax are checked in 3862 * xmlValidateAttributeValue() 3863 * 3864 * [ VC: ID ] 3865 * Values of type ID must match the Name production. A name must not 3866 * appear more than once in an XML document as a value of this type; 3867 * i.e., ID values must uniquely identify the elements which bear them. 3868 * 3869 * [ VC: One ID per Element Type ] 3870 * No element type may have more than one ID attribute specified. 3871 * 3872 * [ VC: ID Attribute Default ] 3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3874 * 3875 * [ VC: IDREF ] 3876 * Values of type IDREF must match the Name production, and values 3877 * of type IDREFS must match Names; each IDREF Name must match the value 3878 * of an ID attribute on some element in the XML document; i.e. IDREF 3879 * values must match the value of some ID attribute. 3880 * 3881 * [ VC: Entity Name ] 3882 * Values of type ENTITY must match the Name production, values 3883 * of type ENTITIES must match Names; each Entity Name must match the 3884 * name of an unparsed entity declared in the DTD. 3885 * 3886 * [ VC: Name Token ] 3887 * Values of type NMTOKEN must match the Nmtoken production; values 3888 * of type NMTOKENS must match Nmtokens. 3889 * 3890 * Returns the attribute type 3891 */ 3892int 3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3894 SHRINK; 3895 if ((RAW == 'C') && (NXT(1) == 'D') && 3896 (NXT(2) == 'A') && (NXT(3) == 'T') && 3897 (NXT(4) == 'A')) { 3898 SKIP(5); 3899 return(XML_ATTRIBUTE_CDATA); 3900 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3901 (NXT(2) == 'R') && (NXT(3) == 'E') && 3902 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3903 SKIP(6); 3904 return(XML_ATTRIBUTE_IDREFS); 3905 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3906 (NXT(2) == 'R') && (NXT(3) == 'E') && 3907 (NXT(4) == 'F')) { 3908 SKIP(5); 3909 return(XML_ATTRIBUTE_IDREF); 3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3911 SKIP(2); 3912 return(XML_ATTRIBUTE_ID); 3913 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3914 (NXT(2) == 'T') && (NXT(3) == 'I') && 3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3916 SKIP(6); 3917 return(XML_ATTRIBUTE_ENTITY); 3918 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3919 (NXT(2) == 'T') && (NXT(3) == 'I') && 3920 (NXT(4) == 'T') && (NXT(5) == 'I') && 3921 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3922 SKIP(8); 3923 return(XML_ATTRIBUTE_ENTITIES); 3924 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3925 (NXT(2) == 'T') && (NXT(3) == 'O') && 3926 (NXT(4) == 'K') && (NXT(5) == 'E') && 3927 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3928 SKIP(8); 3929 return(XML_ATTRIBUTE_NMTOKENS); 3930 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3931 (NXT(2) == 'T') && (NXT(3) == 'O') && 3932 (NXT(4) == 'K') && (NXT(5) == 'E') && 3933 (NXT(6) == 'N')) { 3934 SKIP(7); 3935 return(XML_ATTRIBUTE_NMTOKEN); 3936 } 3937 return(xmlParseEnumeratedType(ctxt, tree)); 3938} 3939 3940/** 3941 * xmlParseAttributeListDecl: 3942 * @ctxt: an XML parser context 3943 * 3944 * : parse the Attribute list def for an element 3945 * 3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3947 * 3948 * [53] AttDef ::= S Name S AttType S DefaultDecl 3949 * 3950 */ 3951void 3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3953 xmlChar *elemName; 3954 xmlChar *attrName; 3955 xmlEnumerationPtr tree; 3956 3957 if ((RAW == '<') && (NXT(1) == '!') && 3958 (NXT(2) == 'A') && (NXT(3) == 'T') && 3959 (NXT(4) == 'T') && (NXT(5) == 'L') && 3960 (NXT(6) == 'I') && (NXT(7) == 'S') && 3961 (NXT(8) == 'T')) { 3962 xmlParserInputPtr input = ctxt->input; 3963 3964 SKIP(9); 3965 if (!IS_BLANK(CUR)) { 3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3968 ctxt->sax->error(ctxt->userData, 3969 "Space required after '<!ATTLIST'\n"); 3970 ctxt->wellFormed = 0; 3971 ctxt->disableSAX = 1; 3972 } 3973 SKIP_BLANKS; 3974 elemName = xmlParseName(ctxt); 3975 if (elemName == NULL) { 3976 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3978 ctxt->sax->error(ctxt->userData, 3979 "ATTLIST: no name for Element\n"); 3980 ctxt->wellFormed = 0; 3981 ctxt->disableSAX = 1; 3982 return; 3983 } 3984 SKIP_BLANKS; 3985 GROW; 3986 while (RAW != '>') { 3987 const xmlChar *check = CUR_PTR; 3988 int type; 3989 int def; 3990 xmlChar *defaultValue = NULL; 3991 3992 GROW; 3993 tree = NULL; 3994 attrName = xmlParseName(ctxt); 3995 if (attrName == NULL) { 3996 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3998 ctxt->sax->error(ctxt->userData, 3999 "ATTLIST: no name for Attribute\n"); 4000 ctxt->wellFormed = 0; 4001 ctxt->disableSAX = 1; 4002 break; 4003 } 4004 GROW; 4005 if (!IS_BLANK(CUR)) { 4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4008 ctxt->sax->error(ctxt->userData, 4009 "Space required after the attribute name\n"); 4010 ctxt->wellFormed = 0; 4011 ctxt->disableSAX = 1; 4012 if (attrName != NULL) 4013 xmlFree(attrName); 4014 if (defaultValue != NULL) 4015 xmlFree(defaultValue); 4016 break; 4017 } 4018 SKIP_BLANKS; 4019 4020 type = xmlParseAttributeType(ctxt, &tree); 4021 if (type <= 0) { 4022 if (attrName != NULL) 4023 xmlFree(attrName); 4024 if (defaultValue != NULL) 4025 xmlFree(defaultValue); 4026 break; 4027 } 4028 4029 GROW; 4030 if (!IS_BLANK(CUR)) { 4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4033 ctxt->sax->error(ctxt->userData, 4034 "Space required after the attribute type\n"); 4035 ctxt->wellFormed = 0; 4036 ctxt->disableSAX = 1; 4037 if (attrName != NULL) 4038 xmlFree(attrName); 4039 if (defaultValue != NULL) 4040 xmlFree(defaultValue); 4041 if (tree != NULL) 4042 xmlFreeEnumeration(tree); 4043 break; 4044 } 4045 SKIP_BLANKS; 4046 4047 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4048 if (def <= 0) { 4049 if (attrName != NULL) 4050 xmlFree(attrName); 4051 if (defaultValue != NULL) 4052 xmlFree(defaultValue); 4053 if (tree != NULL) 4054 xmlFreeEnumeration(tree); 4055 break; 4056 } 4057 4058 GROW; 4059 if (RAW != '>') { 4060 if (!IS_BLANK(CUR)) { 4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4063 ctxt->sax->error(ctxt->userData, 4064 "Space required after the attribute default value\n"); 4065 ctxt->wellFormed = 0; 4066 ctxt->disableSAX = 1; 4067 if (attrName != NULL) 4068 xmlFree(attrName); 4069 if (defaultValue != NULL) 4070 xmlFree(defaultValue); 4071 if (tree != NULL) 4072 xmlFreeEnumeration(tree); 4073 break; 4074 } 4075 SKIP_BLANKS; 4076 } 4077 if (check == CUR_PTR) { 4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4080 ctxt->sax->error(ctxt->userData, 4081 "xmlParseAttributeListDecl: detected internal error\n"); 4082 if (attrName != NULL) 4083 xmlFree(attrName); 4084 if (defaultValue != NULL) 4085 xmlFree(defaultValue); 4086 if (tree != NULL) 4087 xmlFreeEnumeration(tree); 4088 break; 4089 } 4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4091 (ctxt->sax->attributeDecl != NULL)) 4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4093 type, def, defaultValue, tree); 4094 if (attrName != NULL) 4095 xmlFree(attrName); 4096 if (defaultValue != NULL) 4097 xmlFree(defaultValue); 4098 GROW; 4099 } 4100 if (RAW == '>') { 4101 if (input != ctxt->input) { 4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4104 ctxt->sax->error(ctxt->userData, 4105"Attribute list declaration doesn't start and stop in the same entity\n"); 4106 ctxt->wellFormed = 0; 4107 ctxt->disableSAX = 1; 4108 } 4109 NEXT; 4110 } 4111 4112 xmlFree(elemName); 4113 } 4114} 4115 4116/** 4117 * xmlParseElementMixedContentDecl: 4118 * @ctxt: an XML parser context 4119 * 4120 * parse the declaration for a Mixed Element content 4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4122 * 4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4124 * '(' S? '#PCDATA' S? ')' 4125 * 4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4127 * 4128 * [ VC: No Duplicate Types ] 4129 * The same name must not appear more than once in a single 4130 * mixed-content declaration. 4131 * 4132 * returns: the list of the xmlElementContentPtr describing the element choices 4133 */ 4134xmlElementContentPtr 4135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 4136 xmlElementContentPtr ret = NULL, cur = NULL, n; 4137 xmlChar *elem = NULL; 4138 4139 GROW; 4140 if ((RAW == '#') && (NXT(1) == 'P') && 4141 (NXT(2) == 'C') && (NXT(3) == 'D') && 4142 (NXT(4) == 'A') && (NXT(5) == 'T') && 4143 (NXT(6) == 'A')) { 4144 SKIP(7); 4145 SKIP_BLANKS; 4146 SHRINK; 4147 if (RAW == ')') { 4148 ctxt->entity = ctxt->input; 4149 NEXT; 4150 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4151 if (RAW == '*') { 4152 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4153 NEXT; 4154 } 4155 return(ret); 4156 } 4157 if ((RAW == '(') || (RAW == '|')) { 4158 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4159 if (ret == NULL) return(NULL); 4160 } 4161 while (RAW == '|') { 4162 NEXT; 4163 if (elem == NULL) { 4164 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4165 if (ret == NULL) return(NULL); 4166 ret->c1 = cur; 4167 if (cur != NULL) 4168 cur->parent = ret; 4169 cur = ret; 4170 } else { 4171 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4172 if (n == NULL) return(NULL); 4173 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4174 if (n->c1 != NULL) 4175 n->c1->parent = n; 4176 cur->c2 = n; 4177 if (n != NULL) 4178 n->parent = cur; 4179 cur = n; 4180 xmlFree(elem); 4181 } 4182 SKIP_BLANKS; 4183 elem = xmlParseName(ctxt); 4184 if (elem == NULL) { 4185 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4187 ctxt->sax->error(ctxt->userData, 4188 "xmlParseElementMixedContentDecl : Name expected\n"); 4189 ctxt->wellFormed = 0; 4190 ctxt->disableSAX = 1; 4191 xmlFreeElementContent(cur); 4192 return(NULL); 4193 } 4194 SKIP_BLANKS; 4195 GROW; 4196 } 4197 if ((RAW == ')') && (NXT(1) == '*')) { 4198 if (elem != NULL) { 4199 cur->c2 = xmlNewElementContent(elem, 4200 XML_ELEMENT_CONTENT_ELEMENT); 4201 if (cur->c2 != NULL) 4202 cur->c2->parent = cur; 4203 xmlFree(elem); 4204 } 4205 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4206 ctxt->entity = ctxt->input; 4207 SKIP(2); 4208 } else { 4209 if (elem != NULL) xmlFree(elem); 4210 xmlFreeElementContent(ret); 4211 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4213 ctxt->sax->error(ctxt->userData, 4214 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4215 ctxt->wellFormed = 0; 4216 ctxt->disableSAX = 1; 4217 return(NULL); 4218 } 4219 4220 } else { 4221 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4223 ctxt->sax->error(ctxt->userData, 4224 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4225 ctxt->wellFormed = 0; 4226 ctxt->disableSAX = 1; 4227 } 4228 return(ret); 4229} 4230 4231/** 4232 * xmlParseElementChildrenContentD: 4233 * @ctxt: an XML parser context 4234 * 4235 * VMS version of xmlParseElementChildrenContentDecl() 4236 * 4237 * Returns the tree of xmlElementContentPtr describing the element 4238 * hierarchy. 4239 */ 4240/** 4241 * xmlParseElementChildrenContentDecl: 4242 * @ctxt: an XML parser context 4243 * 4244 * parse the declaration for a Mixed Element content 4245 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4246 * 4247 * 4248 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4249 * 4250 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4251 * 4252 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4253 * 4254 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4255 * 4256 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4257 * TODO Parameter-entity replacement text must be properly nested 4258 * with parenthesized groups. That is to say, if either of the 4259 * opening or closing parentheses in a choice, seq, or Mixed 4260 * construct is contained in the replacement text for a parameter 4261 * entity, both must be contained in the same replacement text. For 4262 * interoperability, if a parameter-entity reference appears in a 4263 * choice, seq, or Mixed construct, its replacement text should not 4264 * be empty, and neither the first nor last non-blank character of 4265 * the replacement text should be a connector (| or ,). 4266 * 4267 * Returns the tree of xmlElementContentPtr describing the element 4268 * hierarchy. 4269 */ 4270xmlElementContentPtr 4271#ifdef VMS 4272xmlParseElementChildrenContentD 4273#else 4274xmlParseElementChildrenContentDecl 4275#endif 4276(xmlParserCtxtPtr ctxt) { 4277 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4278 xmlChar *elem; 4279 xmlChar type = 0; 4280 4281 SKIP_BLANKS; 4282 GROW; 4283 if (RAW == '(') { 4284 /* Recurse on first child */ 4285 NEXT; 4286 SKIP_BLANKS; 4287 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4288 SKIP_BLANKS; 4289 GROW; 4290 } else { 4291 elem = xmlParseName(ctxt); 4292 if (elem == NULL) { 4293 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4295 ctxt->sax->error(ctxt->userData, 4296 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4297 ctxt->wellFormed = 0; 4298 ctxt->disableSAX = 1; 4299 return(NULL); 4300 } 4301 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4302 GROW; 4303 if (RAW == '?') { 4304 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4305 NEXT; 4306 } else if (RAW == '*') { 4307 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4308 NEXT; 4309 } else if (RAW == '+') { 4310 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4311 NEXT; 4312 } else { 4313 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4314 } 4315 xmlFree(elem); 4316 GROW; 4317 } 4318 SKIP_BLANKS; 4319 SHRINK; 4320 while (RAW != ')') { 4321 /* 4322 * Each loop we parse one separator and one element. 4323 */ 4324 if (RAW == ',') { 4325 if (type == 0) type = CUR; 4326 4327 /* 4328 * Detect "Name | Name , Name" error 4329 */ 4330 else if (type != CUR) { 4331 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4333 ctxt->sax->error(ctxt->userData, 4334 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4335 type); 4336 ctxt->wellFormed = 0; 4337 ctxt->disableSAX = 1; 4338 if ((op != NULL) && (op != ret)) 4339 xmlFreeElementContent(op); 4340 if ((last != NULL) && (last != ret) && 4341 (last != ret->c1) && (last != ret->c2)) 4342 xmlFreeElementContent(last); 4343 if (ret != NULL) 4344 xmlFreeElementContent(ret); 4345 return(NULL); 4346 } 4347 NEXT; 4348 4349 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4350 if (op == NULL) { 4351 xmlFreeElementContent(ret); 4352 return(NULL); 4353 } 4354 if (last == NULL) { 4355 op->c1 = ret; 4356 if (ret != NULL) 4357 ret->parent = op; 4358 ret = cur = op; 4359 } else { 4360 cur->c2 = op; 4361 if (op != NULL) 4362 op->parent = cur; 4363 op->c1 = last; 4364 if (last != NULL) 4365 last->parent = op; 4366 cur =op; 4367 last = NULL; 4368 } 4369 } else if (RAW == '|') { 4370 if (type == 0) type = CUR; 4371 4372 /* 4373 * Detect "Name , Name | Name" error 4374 */ 4375 else if (type != CUR) { 4376 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4378 ctxt->sax->error(ctxt->userData, 4379 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4380 type); 4381 ctxt->wellFormed = 0; 4382 ctxt->disableSAX = 1; 4383 if ((op != NULL) && (op != ret) && (op != last)) 4384 xmlFreeElementContent(op); 4385 if ((last != NULL) && (last != ret) && 4386 (last != ret->c1) && (last != ret->c2)) 4387 xmlFreeElementContent(last); 4388 if (ret != NULL) 4389 xmlFreeElementContent(ret); 4390 return(NULL); 4391 } 4392 NEXT; 4393 4394 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4395 if (op == NULL) { 4396 if ((op != NULL) && (op != ret)) 4397 xmlFreeElementContent(op); 4398 if ((last != NULL) && (last != ret) && 4399 (last != ret->c1) && (last != ret->c2)) 4400 xmlFreeElementContent(last); 4401 if (ret != NULL) 4402 xmlFreeElementContent(ret); 4403 return(NULL); 4404 } 4405 if (last == NULL) { 4406 op->c1 = ret; 4407 if (ret != NULL) 4408 ret->parent = op; 4409 ret = cur = op; 4410 } else { 4411 cur->c2 = op; 4412 if (op != NULL) 4413 op->parent = cur; 4414 op->c1 = last; 4415 if (last != NULL) 4416 last->parent = op; 4417 cur =op; 4418 last = NULL; 4419 } 4420 } else { 4421 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4423 ctxt->sax->error(ctxt->userData, 4424 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4425 ctxt->wellFormed = 0; 4426 ctxt->disableSAX = 1; 4427 if ((op != NULL) && (op != ret)) 4428 xmlFreeElementContent(op); 4429 if ((last != NULL) && (last != ret) && 4430 (last != ret->c1) && (last != ret->c2)) 4431 xmlFreeElementContent(last); 4432 if (ret != NULL) 4433 xmlFreeElementContent(ret); 4434 return(NULL); 4435 } 4436 GROW; 4437 SKIP_BLANKS; 4438 GROW; 4439 if (RAW == '(') { 4440 /* Recurse on second child */ 4441 NEXT; 4442 SKIP_BLANKS; 4443 last = xmlParseElementChildrenContentDecl(ctxt); 4444 SKIP_BLANKS; 4445 } else { 4446 elem = xmlParseName(ctxt); 4447 if (elem == NULL) { 4448 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4450 ctxt->sax->error(ctxt->userData, 4451 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4452 ctxt->wellFormed = 0; 4453 ctxt->disableSAX = 1; 4454 if ((op != NULL) && (op != ret)) 4455 xmlFreeElementContent(op); 4456 if ((last != NULL) && (last != ret) && 4457 (last != ret->c1) && (last != ret->c2)) 4458 xmlFreeElementContent(last); 4459 if (ret != NULL) 4460 xmlFreeElementContent(ret); 4461 return(NULL); 4462 } 4463 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4464 xmlFree(elem); 4465 if (RAW == '?') { 4466 last->ocur = XML_ELEMENT_CONTENT_OPT; 4467 NEXT; 4468 } else if (RAW == '*') { 4469 last->ocur = XML_ELEMENT_CONTENT_MULT; 4470 NEXT; 4471 } else if (RAW == '+') { 4472 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4473 NEXT; 4474 } else { 4475 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4476 } 4477 } 4478 SKIP_BLANKS; 4479 GROW; 4480 } 4481 if ((cur != NULL) && (last != NULL)) { 4482 cur->c2 = last; 4483 if (last != NULL) 4484 last->parent = cur; 4485 } 4486 ctxt->entity = ctxt->input; 4487 NEXT; 4488 if (RAW == '?') { 4489 if (ret != NULL) 4490 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4491 NEXT; 4492 } else if (RAW == '*') { 4493 if (ret != NULL) { 4494 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4495 cur = ret; 4496 /* 4497 * Some normalization: 4498 * (a | b* | c?)* == (a | b | c)* 4499 */ 4500 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4501 if ((cur->c1 != NULL) && 4502 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4503 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 4504 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4505 if ((cur->c2 != NULL) && 4506 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4507 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 4508 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4509 cur = cur->c2; 4510 } 4511 } 4512 NEXT; 4513 } else if (RAW == '+') { 4514 if (ret != NULL) { 4515 int found = 0; 4516 4517 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4518 /* 4519 * Some normalization: 4520 * (a | b*)+ == (a | b)* 4521 * (a | b?)+ == (a | b)* 4522 */ 4523 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4524 if ((cur->c1 != NULL) && 4525 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4526 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 4527 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4528 found = 1; 4529 } 4530 if ((cur->c2 != NULL) && 4531 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4532 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 4533 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4534 found = 1; 4535 } 4536 cur = cur->c2; 4537 } 4538 if (found) 4539 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4540 } 4541 NEXT; 4542 } 4543 return(ret); 4544} 4545 4546/** 4547 * xmlParseElementContentDecl: 4548 * @ctxt: an XML parser context 4549 * @name: the name of the element being defined. 4550 * @result: the Element Content pointer will be stored here if any 4551 * 4552 * parse the declaration for an Element content either Mixed or Children, 4553 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4554 * 4555 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4556 * 4557 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4558 */ 4559 4560int 4561xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4562 xmlElementContentPtr *result) { 4563 4564 xmlElementContentPtr tree = NULL; 4565 xmlParserInputPtr input = ctxt->input; 4566 int res; 4567 4568 *result = NULL; 4569 4570 if (RAW != '(') { 4571 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4573 ctxt->sax->error(ctxt->userData, 4574 "xmlParseElementContentDecl : %s '(' expected\n", name); 4575 ctxt->wellFormed = 0; 4576 ctxt->disableSAX = 1; 4577 return(-1); 4578 } 4579 NEXT; 4580 GROW; 4581 SKIP_BLANKS; 4582 if ((RAW == '#') && (NXT(1) == 'P') && 4583 (NXT(2) == 'C') && (NXT(3) == 'D') && 4584 (NXT(4) == 'A') && (NXT(5) == 'T') && 4585 (NXT(6) == 'A')) { 4586 tree = xmlParseElementMixedContentDecl(ctxt); 4587 res = XML_ELEMENT_TYPE_MIXED; 4588 } else { 4589 tree = xmlParseElementChildrenContentDecl(ctxt); 4590 res = XML_ELEMENT_TYPE_ELEMENT; 4591 } 4592 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4593 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4595 ctxt->sax->error(ctxt->userData, 4596"Element content declaration doesn't start and stop in the same entity\n"); 4597 ctxt->wellFormed = 0; 4598 ctxt->disableSAX = 1; 4599 } 4600 SKIP_BLANKS; 4601 *result = tree; 4602 return(res); 4603} 4604 4605/** 4606 * xmlParseElementDecl: 4607 * @ctxt: an XML parser context 4608 * 4609 * parse an Element declaration. 4610 * 4611 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4612 * 4613 * [ VC: Unique Element Type Declaration ] 4614 * No element type may be declared more than once 4615 * 4616 * Returns the type of the element, or -1 in case of error 4617 */ 4618int 4619xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4620 xmlChar *name; 4621 int ret = -1; 4622 xmlElementContentPtr content = NULL; 4623 4624 GROW; 4625 if ((RAW == '<') && (NXT(1) == '!') && 4626 (NXT(2) == 'E') && (NXT(3) == 'L') && 4627 (NXT(4) == 'E') && (NXT(5) == 'M') && 4628 (NXT(6) == 'E') && (NXT(7) == 'N') && 4629 (NXT(8) == 'T')) { 4630 xmlParserInputPtr input = ctxt->input; 4631 4632 SKIP(9); 4633 if (!IS_BLANK(CUR)) { 4634 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4636 ctxt->sax->error(ctxt->userData, 4637 "Space required after 'ELEMENT'\n"); 4638 ctxt->wellFormed = 0; 4639 ctxt->disableSAX = 1; 4640 } 4641 SKIP_BLANKS; 4642 name = xmlParseName(ctxt); 4643 if (name == NULL) { 4644 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4646 ctxt->sax->error(ctxt->userData, 4647 "xmlParseElementDecl: no name for Element\n"); 4648 ctxt->wellFormed = 0; 4649 ctxt->disableSAX = 1; 4650 return(-1); 4651 } 4652 while ((RAW == 0) && (ctxt->inputNr > 1)) 4653 xmlPopInput(ctxt); 4654 if (!IS_BLANK(CUR)) { 4655 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4657 ctxt->sax->error(ctxt->userData, 4658 "Space required after the element name\n"); 4659 ctxt->wellFormed = 0; 4660 ctxt->disableSAX = 1; 4661 } 4662 SKIP_BLANKS; 4663 if ((RAW == 'E') && (NXT(1) == 'M') && 4664 (NXT(2) == 'P') && (NXT(3) == 'T') && 4665 (NXT(4) == 'Y')) { 4666 SKIP(5); 4667 /* 4668 * Element must always be empty. 4669 */ 4670 ret = XML_ELEMENT_TYPE_EMPTY; 4671 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4672 (NXT(2) == 'Y')) { 4673 SKIP(3); 4674 /* 4675 * Element is a generic container. 4676 */ 4677 ret = XML_ELEMENT_TYPE_ANY; 4678 } else if (RAW == '(') { 4679 ret = xmlParseElementContentDecl(ctxt, name, &content); 4680 } else { 4681 /* 4682 * [ WFC: PEs in Internal Subset ] error handling. 4683 */ 4684 if ((RAW == '%') && (ctxt->external == 0) && 4685 (ctxt->inputNr == 1)) { 4686 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4688 ctxt->sax->error(ctxt->userData, 4689 "PEReference: forbidden within markup decl in internal subset\n"); 4690 } else { 4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4693 ctxt->sax->error(ctxt->userData, 4694 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4695 } 4696 ctxt->wellFormed = 0; 4697 ctxt->disableSAX = 1; 4698 if (name != NULL) xmlFree(name); 4699 return(-1); 4700 } 4701 4702 SKIP_BLANKS; 4703 /* 4704 * Pop-up of finished entities. 4705 */ 4706 while ((RAW == 0) && (ctxt->inputNr > 1)) 4707 xmlPopInput(ctxt); 4708 SKIP_BLANKS; 4709 4710 if (RAW != '>') { 4711 ctxt->errNo = XML_ERR_GT_REQUIRED; 4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4713 ctxt->sax->error(ctxt->userData, 4714 "xmlParseElementDecl: expected '>' at the end\n"); 4715 ctxt->wellFormed = 0; 4716 ctxt->disableSAX = 1; 4717 } else { 4718 if (input != ctxt->input) { 4719 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4721 ctxt->sax->error(ctxt->userData, 4722"Element declaration doesn't start and stop in the same entity\n"); 4723 ctxt->wellFormed = 0; 4724 ctxt->disableSAX = 1; 4725 } 4726 4727 NEXT; 4728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4729 (ctxt->sax->elementDecl != NULL)) 4730 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4731 content); 4732 } 4733 if (content != NULL) { 4734 xmlFreeElementContent(content); 4735 } 4736 if (name != NULL) { 4737 xmlFree(name); 4738 } 4739 } 4740 return(ret); 4741} 4742 4743/** 4744 * xmlParseConditionalSections 4745 * @ctxt: an XML parser context 4746 * 4747 * [61] conditionalSect ::= includeSect | ignoreSect 4748 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4749 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4750 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4751 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4752 */ 4753 4754static void 4755xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4756 SKIP(3); 4757 SKIP_BLANKS; 4758 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4759 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4760 (NXT(6) == 'E')) { 4761 SKIP(7); 4762 SKIP_BLANKS; 4763 if (RAW != '[') { 4764 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4766 ctxt->sax->error(ctxt->userData, 4767 "XML conditional section '[' expected\n"); 4768 ctxt->wellFormed = 0; 4769 ctxt->disableSAX = 1; 4770 } else { 4771 NEXT; 4772 } 4773 if (xmlParserDebugEntities) { 4774 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4775 xmlGenericError(xmlGenericErrorContext, 4776 "%s(%d): ", ctxt->input->filename, 4777 ctxt->input->line); 4778 xmlGenericError(xmlGenericErrorContext, 4779 "Entering INCLUDE Conditional Section\n"); 4780 } 4781 4782 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4783 (NXT(2) != '>'))) { 4784 const xmlChar *check = CUR_PTR; 4785 int cons = ctxt->input->consumed; 4786 int tok = ctxt->token; 4787 4788 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4789 xmlParseConditionalSections(ctxt); 4790 } else if (IS_BLANK(CUR)) { 4791 NEXT; 4792 } else if (RAW == '%') { 4793 xmlParsePEReference(ctxt); 4794 } else 4795 xmlParseMarkupDecl(ctxt); 4796 4797 /* 4798 * Pop-up of finished entities. 4799 */ 4800 while ((RAW == 0) && (ctxt->inputNr > 1)) 4801 xmlPopInput(ctxt); 4802 4803 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4804 (tok == ctxt->token)) { 4805 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4807 ctxt->sax->error(ctxt->userData, 4808 "Content error in the external subset\n"); 4809 ctxt->wellFormed = 0; 4810 ctxt->disableSAX = 1; 4811 break; 4812 } 4813 } 4814 if (xmlParserDebugEntities) { 4815 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4816 xmlGenericError(xmlGenericErrorContext, 4817 "%s(%d): ", ctxt->input->filename, 4818 ctxt->input->line); 4819 xmlGenericError(xmlGenericErrorContext, 4820 "Leaving INCLUDE Conditional Section\n"); 4821 } 4822 4823 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4824 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4825 int state; 4826 int instate; 4827 int depth = 0; 4828 4829 SKIP(6); 4830 SKIP_BLANKS; 4831 if (RAW != '[') { 4832 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4834 ctxt->sax->error(ctxt->userData, 4835 "XML conditional section '[' expected\n"); 4836 ctxt->wellFormed = 0; 4837 ctxt->disableSAX = 1; 4838 } else { 4839 NEXT; 4840 } 4841 if (xmlParserDebugEntities) { 4842 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4843 xmlGenericError(xmlGenericErrorContext, 4844 "%s(%d): ", ctxt->input->filename, 4845 ctxt->input->line); 4846 xmlGenericError(xmlGenericErrorContext, 4847 "Entering IGNORE Conditional Section\n"); 4848 } 4849 4850 /* 4851 * Parse up to the end of the conditional section 4852 * But disable SAX event generating DTD building in the meantime 4853 */ 4854 state = ctxt->disableSAX; 4855 instate = ctxt->instate; 4856 ctxt->disableSAX = 1; 4857 ctxt->instate = XML_PARSER_IGNORE; 4858 4859 while ((depth >= 0) && (RAW != 0)) { 4860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4861 depth++; 4862 SKIP(3); 4863 continue; 4864 } 4865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4866 if (--depth >= 0) SKIP(3); 4867 continue; 4868 } 4869 NEXT; 4870 continue; 4871 } 4872 4873 ctxt->disableSAX = state; 4874 ctxt->instate = instate; 4875 4876 if (xmlParserDebugEntities) { 4877 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4878 xmlGenericError(xmlGenericErrorContext, 4879 "%s(%d): ", ctxt->input->filename, 4880 ctxt->input->line); 4881 xmlGenericError(xmlGenericErrorContext, 4882 "Leaving IGNORE Conditional Section\n"); 4883 } 4884 4885 } else { 4886 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4888 ctxt->sax->error(ctxt->userData, 4889 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4890 ctxt->wellFormed = 0; 4891 ctxt->disableSAX = 1; 4892 } 4893 4894 if (RAW == 0) 4895 SHRINK; 4896 4897 if (RAW == 0) { 4898 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4900 ctxt->sax->error(ctxt->userData, 4901 "XML conditional section not closed\n"); 4902 ctxt->wellFormed = 0; 4903 ctxt->disableSAX = 1; 4904 } else { 4905 SKIP(3); 4906 } 4907} 4908 4909/** 4910 * xmlParseMarkupDecl: 4911 * @ctxt: an XML parser context 4912 * 4913 * parse Markup declarations 4914 * 4915 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4916 * NotationDecl | PI | Comment 4917 * 4918 * [ VC: Proper Declaration/PE Nesting ] 4919 * Parameter-entity replacement text must be properly nested with 4920 * markup declarations. That is to say, if either the first character 4921 * or the last character of a markup declaration (markupdecl above) is 4922 * contained in the replacement text for a parameter-entity reference, 4923 * both must be contained in the same replacement text. 4924 * 4925 * [ WFC: PEs in Internal Subset ] 4926 * In the internal DTD subset, parameter-entity references can occur 4927 * only where markup declarations can occur, not within markup declarations. 4928 * (This does not apply to references that occur in external parameter 4929 * entities or to the external subset.) 4930 */ 4931void 4932xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4933 GROW; 4934 xmlParseElementDecl(ctxt); 4935 xmlParseAttributeListDecl(ctxt); 4936 xmlParseEntityDecl(ctxt); 4937 xmlParseNotationDecl(ctxt); 4938 xmlParsePI(ctxt); 4939 xmlParseComment(ctxt); 4940 /* 4941 * This is only for internal subset. On external entities, 4942 * the replacement is done before parsing stage 4943 */ 4944 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4945 xmlParsePEReference(ctxt); 4946 4947 /* 4948 * Conditional sections are allowed from entities included 4949 * by PE References in the internal subset. 4950 */ 4951 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 4952 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4953 xmlParseConditionalSections(ctxt); 4954 } 4955 } 4956 4957 ctxt->instate = XML_PARSER_DTD; 4958} 4959 4960/** 4961 * xmlParseTextDecl: 4962 * @ctxt: an XML parser context 4963 * 4964 * parse an XML declaration header for external entities 4965 * 4966 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4967 * 4968 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4969 */ 4970 4971void 4972xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4973 xmlChar *version; 4974 4975 /* 4976 * We know that '<?xml' is here. 4977 */ 4978 if ((RAW == '<') && (NXT(1) == '?') && 4979 (NXT(2) == 'x') && (NXT(3) == 'm') && 4980 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4981 SKIP(5); 4982 } else { 4983 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4985 ctxt->sax->error(ctxt->userData, 4986 "Text declaration '<?xml' required\n"); 4987 ctxt->wellFormed = 0; 4988 ctxt->disableSAX = 1; 4989 4990 return; 4991 } 4992 4993 if (!IS_BLANK(CUR)) { 4994 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4996 ctxt->sax->error(ctxt->userData, 4997 "Space needed after '<?xml'\n"); 4998 ctxt->wellFormed = 0; 4999 ctxt->disableSAX = 1; 5000 } 5001 SKIP_BLANKS; 5002 5003 /* 5004 * We may have the VersionInfo here. 5005 */ 5006 version = xmlParseVersionInfo(ctxt); 5007 if (version == NULL) 5008 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5009 else { 5010 if (!IS_BLANK(CUR)) { 5011 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5013 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 5014 ctxt->wellFormed = 0; 5015 ctxt->disableSAX = 1; 5016 } 5017 } 5018 ctxt->input->version = version; 5019 5020 /* 5021 * We must have the encoding declaration 5022 */ 5023 xmlParseEncodingDecl(ctxt); 5024 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5025 /* 5026 * The XML REC instructs us to stop parsing right here 5027 */ 5028 return; 5029 } 5030 5031 SKIP_BLANKS; 5032 if ((RAW == '?') && (NXT(1) == '>')) { 5033 SKIP(2); 5034 } else if (RAW == '>') { 5035 /* Deprecated old WD ... */ 5036 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5038 ctxt->sax->error(ctxt->userData, 5039 "XML declaration must end-up with '?>'\n"); 5040 ctxt->wellFormed = 0; 5041 ctxt->disableSAX = 1; 5042 NEXT; 5043 } else { 5044 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5046 ctxt->sax->error(ctxt->userData, 5047 "parsing XML declaration: '?>' expected\n"); 5048 ctxt->wellFormed = 0; 5049 ctxt->disableSAX = 1; 5050 MOVETO_ENDTAG(CUR_PTR); 5051 NEXT; 5052 } 5053} 5054 5055/** 5056 * xmlParseExternalSubset: 5057 * @ctxt: an XML parser context 5058 * @ExternalID: the external identifier 5059 * @SystemID: the system identifier (or URL) 5060 * 5061 * parse Markup declarations from an external subset 5062 * 5063 * [30] extSubset ::= textDecl? extSubsetDecl 5064 * 5065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5066 */ 5067void 5068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5069 const xmlChar *SystemID) { 5070 GROW; 5071 if ((RAW == '<') && (NXT(1) == '?') && 5072 (NXT(2) == 'x') && (NXT(3) == 'm') && 5073 (NXT(4) == 'l')) { 5074 xmlParseTextDecl(ctxt); 5075 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5076 /* 5077 * The XML REC instructs us to stop parsing right here 5078 */ 5079 ctxt->instate = XML_PARSER_EOF; 5080 return; 5081 } 5082 } 5083 if (ctxt->myDoc == NULL) { 5084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5085 } 5086 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5087 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5088 5089 ctxt->instate = XML_PARSER_DTD; 5090 ctxt->external = 1; 5091 while (((RAW == '<') && (NXT(1) == '?')) || 5092 ((RAW == '<') && (NXT(1) == '!')) || 5093 (RAW == '%') || IS_BLANK(CUR)) { 5094 const xmlChar *check = CUR_PTR; 5095 int cons = ctxt->input->consumed; 5096 int tok = ctxt->token; 5097 5098 GROW; 5099 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5100 xmlParseConditionalSections(ctxt); 5101 } else if (IS_BLANK(CUR)) { 5102 NEXT; 5103 } else if (RAW == '%') { 5104 xmlParsePEReference(ctxt); 5105 } else 5106 xmlParseMarkupDecl(ctxt); 5107 5108 /* 5109 * Pop-up of finished entities. 5110 */ 5111 while ((RAW == 0) && (ctxt->inputNr > 1)) 5112 xmlPopInput(ctxt); 5113 5114 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 5115 (tok == ctxt->token)) { 5116 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5118 ctxt->sax->error(ctxt->userData, 5119 "Content error in the external subset\n"); 5120 ctxt->wellFormed = 0; 5121 ctxt->disableSAX = 1; 5122 break; 5123 } 5124 } 5125 5126 if (RAW != 0) { 5127 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5129 ctxt->sax->error(ctxt->userData, 5130 "Extra content at the end of the document\n"); 5131 ctxt->wellFormed = 0; 5132 ctxt->disableSAX = 1; 5133 } 5134 5135} 5136 5137/** 5138 * xmlParseReference: 5139 * @ctxt: an XML parser context 5140 * 5141 * parse and handle entity references in content, depending on the SAX 5142 * interface, this may end-up in a call to character() if this is a 5143 * CharRef, a predefined entity, if there is no reference() callback. 5144 * or if the parser was asked to switch to that mode. 5145 * 5146 * [67] Reference ::= EntityRef | CharRef 5147 */ 5148void 5149xmlParseReference(xmlParserCtxtPtr ctxt) { 5150 xmlEntityPtr ent; 5151 xmlChar *val; 5152 if (RAW != '&') return; 5153 5154 if (NXT(1) == '#') { 5155 int i = 0; 5156 xmlChar out[10]; 5157 int hex = NXT(2); 5158 int value = xmlParseCharRef(ctxt); 5159 5160 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5161 /* 5162 * So we are using non-UTF-8 buffers 5163 * Check that the char fit on 8bits, if not 5164 * generate a CharRef. 5165 */ 5166 if (value <= 0xFF) { 5167 out[0] = value; 5168 out[1] = 0; 5169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5170 (!ctxt->disableSAX)) 5171 ctxt->sax->characters(ctxt->userData, out, 1); 5172 } else { 5173 if ((hex == 'x') || (hex == 'X')) 5174 sprintf((char *)out, "#x%X", value); 5175 else 5176 sprintf((char *)out, "#%d", value); 5177 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5178 (!ctxt->disableSAX)) 5179 ctxt->sax->reference(ctxt->userData, out); 5180 } 5181 } else { 5182 /* 5183 * Just encode the value in UTF-8 5184 */ 5185 COPY_BUF(0 ,out, i, value); 5186 out[i] = 0; 5187 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5188 (!ctxt->disableSAX)) 5189 ctxt->sax->characters(ctxt->userData, out, i); 5190 } 5191 } else { 5192 ent = xmlParseEntityRef(ctxt); 5193 if (ent == NULL) return; 5194 if (!ctxt->wellFormed) 5195 return; 5196 if ((ent->name != NULL) && 5197 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5198 xmlNodePtr list = NULL; 5199 int ret; 5200 5201 5202 /* 5203 * The first reference to the entity trigger a parsing phase 5204 * where the ent->children is filled with the result from 5205 * the parsing. 5206 */ 5207 if (ent->children == NULL) { 5208 xmlChar *value; 5209 value = ent->content; 5210 5211 /* 5212 * Check that this entity is well formed 5213 */ 5214 if ((value != NULL) && 5215 (value[1] == 0) && (value[0] == '<') && 5216 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5217 /* 5218 * DONE: get definite answer on this !!! 5219 * Lots of entity decls are used to declare a single 5220 * char 5221 * <!ENTITY lt "<"> 5222 * Which seems to be valid since 5223 * 2.4: The ampersand character (&) and the left angle 5224 * bracket (<) may appear in their literal form only 5225 * when used ... They are also legal within the literal 5226 * entity value of an internal entity declaration;i 5227 * see "4.3.2 Well-Formed Parsed Entities". 5228 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5229 * Looking at the OASIS test suite and James Clark 5230 * tests, this is broken. However the XML REC uses 5231 * it. Is the XML REC not well-formed ???? 5232 * This is a hack to avoid this problem 5233 * 5234 * ANSWER: since lt gt amp .. are already defined, 5235 * this is a redefinition and hence the fact that the 5236 * content is not well balanced is not a Wf error, this 5237 * is lousy but acceptable. 5238 */ 5239 list = xmlNewDocText(ctxt->myDoc, value); 5240 if (list != NULL) { 5241 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5242 (ent->children == NULL)) { 5243 ent->children = list; 5244 ent->last = list; 5245 list->parent = (xmlNodePtr) ent; 5246 } else { 5247 xmlFreeNodeList(list); 5248 } 5249 } else if (list != NULL) { 5250 xmlFreeNodeList(list); 5251 } 5252 } else { 5253 /* 5254 * 4.3.2: An internal general parsed entity is well-formed 5255 * if its replacement text matches the production labeled 5256 * content. 5257 */ 5258 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5259 ctxt->depth++; 5260 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 5261 ctxt->sax, NULL, ctxt->depth, 5262 value, &list); 5263 ctxt->depth--; 5264 } else if (ent->etype == 5265 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5266 ctxt->depth++; 5267 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5268 ctxt->sax, NULL, ctxt->depth, 5269 ent->URI, ent->ExternalID, &list); 5270 ctxt->depth--; 5271 } else { 5272 ret = -1; 5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5274 ctxt->sax->error(ctxt->userData, 5275 "Internal: invalid entity type\n"); 5276 } 5277 if (ret == XML_ERR_ENTITY_LOOP) { 5278 ctxt->errNo = XML_ERR_ENTITY_LOOP; 5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5280 ctxt->sax->error(ctxt->userData, 5281 "Detected entity reference loop\n"); 5282 ctxt->wellFormed = 0; 5283 ctxt->disableSAX = 1; 5284 return; 5285 } else if ((ret == 0) && (list != NULL)) { 5286 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5288 (ent->children == NULL)) { 5289 ent->children = list; 5290 if (ctxt->replaceEntities) { 5291 /* 5292 * Prune it directly in the generated document 5293 * except for single text nodes. 5294 */ 5295 if ((list->type == XML_TEXT_NODE) && 5296 (list->next == NULL)) { 5297 list->parent = (xmlNodePtr) ent; 5298 list = NULL; 5299 } else { 5300 while (list != NULL) { 5301 list->parent = (xmlNodePtr) ctxt->node; 5302 if (list->next == NULL) 5303 ent->last = list; 5304 list = list->next; 5305 } 5306 list = ent->children; 5307 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5308 xmlAddEntityReference(ent, list, NULL); 5309 } 5310 } else { 5311 while (list != NULL) { 5312 list->parent = (xmlNodePtr) ent; 5313 if (list->next == NULL) 5314 ent->last = list; 5315 list = list->next; 5316 } 5317 } 5318 } else { 5319 xmlFreeNodeList(list); 5320 list = NULL; 5321 } 5322 } else if (ret > 0) { 5323 ctxt->errNo = ret; 5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5325 ctxt->sax->error(ctxt->userData, 5326 "Entity value required\n"); 5327 ctxt->wellFormed = 0; 5328 ctxt->disableSAX = 1; 5329 } else if (list != NULL) { 5330 xmlFreeNodeList(list); 5331 list = NULL; 5332 } 5333 } 5334 } 5335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5337 /* 5338 * Create a node. 5339 */ 5340 ctxt->sax->reference(ctxt->userData, ent->name); 5341 return; 5342 } else if (ctxt->replaceEntities) { 5343 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5344 /* 5345 * Seems we are generating the DOM content, do 5346 * a simple tree copy for all references except the first 5347 * In the first occurrence list contains the replacement 5348 */ 5349 if (list == NULL) { 5350 xmlNodePtr new = NULL, cur, firstChild = NULL; 5351 cur = ent->children; 5352 while (cur != NULL) { 5353 new = xmlCopyNode(cur, 1); 5354 if (firstChild == NULL){ 5355 firstChild = new; 5356 } 5357 xmlAddChild(ctxt->node, new); 5358 if (cur == ent->last) 5359 break; 5360 cur = cur->next; 5361 } 5362 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5363 xmlAddEntityReference(ent, firstChild, new); 5364 } else { 5365 /* 5366 * the name change is to avoid coalescing of the 5367 * node with a possible previous text one which 5368 * would make ent->children a dangling pointer 5369 */ 5370 if (ent->children->type == XML_TEXT_NODE) 5371 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5372 if ((ent->last != ent->children) && 5373 (ent->last->type == XML_TEXT_NODE)) 5374 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5375 xmlAddChildList(ctxt->node, ent->children); 5376 } 5377 5378 /* 5379 * This is to avoid a nasty side effect, see 5380 * characters() in SAX.c 5381 */ 5382 ctxt->nodemem = 0; 5383 ctxt->nodelen = 0; 5384 return; 5385 } else { 5386 /* 5387 * Probably running in SAX mode 5388 */ 5389 xmlParserInputPtr input; 5390 5391 input = xmlNewEntityInputStream(ctxt, ent); 5392 xmlPushInput(ctxt, input); 5393 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5394 (RAW == '<') && (NXT(1) == '?') && 5395 (NXT(2) == 'x') && (NXT(3) == 'm') && 5396 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5397 xmlParseTextDecl(ctxt); 5398 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5399 /* 5400 * The XML REC instructs us to stop parsing right here 5401 */ 5402 ctxt->instate = XML_PARSER_EOF; 5403 return; 5404 } 5405 if (input->standalone == 1) { 5406 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5408 ctxt->sax->error(ctxt->userData, 5409 "external parsed entities cannot be standalone\n"); 5410 ctxt->wellFormed = 0; 5411 ctxt->disableSAX = 1; 5412 } 5413 } 5414 return; 5415 } 5416 } 5417 } else { 5418 val = ent->content; 5419 if (val == NULL) return; 5420 /* 5421 * inline the entity. 5422 */ 5423 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5424 (!ctxt->disableSAX)) 5425 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5426 } 5427 } 5428} 5429 5430/** 5431 * xmlParseEntityRef: 5432 * @ctxt: an XML parser context 5433 * 5434 * parse ENTITY references declarations 5435 * 5436 * [68] EntityRef ::= '&' Name ';' 5437 * 5438 * [ WFC: Entity Declared ] 5439 * In a document without any DTD, a document with only an internal DTD 5440 * subset which contains no parameter entity references, or a document 5441 * with "standalone='yes'", the Name given in the entity reference 5442 * must match that in an entity declaration, except that well-formed 5443 * documents need not declare any of the following entities: amp, lt, 5444 * gt, apos, quot. The declaration of a parameter entity must precede 5445 * any reference to it. Similarly, the declaration of a general entity 5446 * must precede any reference to it which appears in a default value in an 5447 * attribute-list declaration. Note that if entities are declared in the 5448 * external subset or in external parameter entities, a non-validating 5449 * processor is not obligated to read and process their declarations; 5450 * for such documents, the rule that an entity must be declared is a 5451 * well-formedness constraint only if standalone='yes'. 5452 * 5453 * [ WFC: Parsed Entity ] 5454 * An entity reference must not contain the name of an unparsed entity 5455 * 5456 * Returns the xmlEntityPtr if found, or NULL otherwise. 5457 */ 5458xmlEntityPtr 5459xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5460 xmlChar *name; 5461 xmlEntityPtr ent = NULL; 5462 5463 GROW; 5464 5465 if (RAW == '&') { 5466 NEXT; 5467 name = xmlParseName(ctxt); 5468 if (name == NULL) { 5469 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5471 ctxt->sax->error(ctxt->userData, 5472 "xmlParseEntityRef: no name\n"); 5473 ctxt->wellFormed = 0; 5474 ctxt->disableSAX = 1; 5475 } else { 5476 if (RAW == ';') { 5477 NEXT; 5478 /* 5479 * Ask first SAX for entity resolution, otherwise try the 5480 * predefined set. 5481 */ 5482 if (ctxt->sax != NULL) { 5483 if (ctxt->sax->getEntity != NULL) 5484 ent = ctxt->sax->getEntity(ctxt->userData, name); 5485 if (ent == NULL) 5486 ent = xmlGetPredefinedEntity(name); 5487 } 5488 /* 5489 * [ WFC: Entity Declared ] 5490 * In a document without any DTD, a document with only an 5491 * internal DTD subset which contains no parameter entity 5492 * references, or a document with "standalone='yes'", the 5493 * Name given in the entity reference must match that in an 5494 * entity declaration, except that well-formed documents 5495 * need not declare any of the following entities: amp, lt, 5496 * gt, apos, quot. 5497 * The declaration of a parameter entity must precede any 5498 * reference to it. 5499 * Similarly, the declaration of a general entity must 5500 * precede any reference to it which appears in a default 5501 * value in an attribute-list declaration. Note that if 5502 * entities are declared in the external subset or in 5503 * external parameter entities, a non-validating processor 5504 * is not obligated to read and process their declarations; 5505 * for such documents, the rule that an entity must be 5506 * declared is a well-formedness constraint only if 5507 * standalone='yes'. 5508 */ 5509 if (ent == NULL) { 5510 if ((ctxt->standalone == 1) || 5511 ((ctxt->hasExternalSubset == 0) && 5512 (ctxt->hasPErefs == 0))) { 5513 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5515 ctxt->sax->error(ctxt->userData, 5516 "Entity '%s' not defined\n", name); 5517 ctxt->wellFormed = 0; 5518 ctxt->valid = 0; 5519 ctxt->disableSAX = 1; 5520 } else { 5521 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5523 ctxt->sax->error(ctxt->userData, 5524 "Entity '%s' not defined\n", name); 5525 ctxt->valid = 0; 5526 } 5527 } 5528 5529 /* 5530 * [ WFC: Parsed Entity ] 5531 * An entity reference must not contain the name of an 5532 * unparsed entity 5533 */ 5534 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5535 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5537 ctxt->sax->error(ctxt->userData, 5538 "Entity reference to unparsed entity %s\n", name); 5539 ctxt->wellFormed = 0; 5540 ctxt->disableSAX = 1; 5541 } 5542 5543 /* 5544 * [ WFC: No External Entity References ] 5545 * Attribute values cannot contain direct or indirect 5546 * entity references to external entities. 5547 */ 5548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5550 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5552 ctxt->sax->error(ctxt->userData, 5553 "Attribute references external entity '%s'\n", name); 5554 ctxt->wellFormed = 0; 5555 ctxt->disableSAX = 1; 5556 } 5557 /* 5558 * [ WFC: No < in Attribute Values ] 5559 * The replacement text of any entity referred to directly or 5560 * indirectly in an attribute value (other than "<") must 5561 * not contain a <. 5562 */ 5563 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5564 (ent != NULL) && 5565 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5566 (ent->content != NULL) && 5567 (xmlStrchr(ent->content, '<'))) { 5568 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5570 ctxt->sax->error(ctxt->userData, 5571 "'<' in entity '%s' is not allowed in attributes values\n", name); 5572 ctxt->wellFormed = 0; 5573 ctxt->disableSAX = 1; 5574 } 5575 5576 /* 5577 * Internal check, no parameter entities here ... 5578 */ 5579 else { 5580 switch (ent->etype) { 5581 case XML_INTERNAL_PARAMETER_ENTITY: 5582 case XML_EXTERNAL_PARAMETER_ENTITY: 5583 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5585 ctxt->sax->error(ctxt->userData, 5586 "Attempt to reference the parameter entity '%s'\n", name); 5587 ctxt->wellFormed = 0; 5588 ctxt->disableSAX = 1; 5589 break; 5590 default: 5591 break; 5592 } 5593 } 5594 5595 /* 5596 * [ WFC: No Recursion ] 5597 * A parsed entity must not contain a recursive reference 5598 * to itself, either directly or indirectly. 5599 * Done somewhere else 5600 */ 5601 5602 } else { 5603 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5605 ctxt->sax->error(ctxt->userData, 5606 "xmlParseEntityRef: expecting ';'\n"); 5607 ctxt->wellFormed = 0; 5608 ctxt->disableSAX = 1; 5609 } 5610 xmlFree(name); 5611 } 5612 } 5613 return(ent); 5614} 5615 5616/** 5617 * xmlParseStringEntityRef: 5618 * @ctxt: an XML parser context 5619 * @str: a pointer to an index in the string 5620 * 5621 * parse ENTITY references declarations, but this version parses it from 5622 * a string value. 5623 * 5624 * [68] EntityRef ::= '&' Name ';' 5625 * 5626 * [ WFC: Entity Declared ] 5627 * In a document without any DTD, a document with only an internal DTD 5628 * subset which contains no parameter entity references, or a document 5629 * with "standalone='yes'", the Name given in the entity reference 5630 * must match that in an entity declaration, except that well-formed 5631 * documents need not declare any of the following entities: amp, lt, 5632 * gt, apos, quot. The declaration of a parameter entity must precede 5633 * any reference to it. Similarly, the declaration of a general entity 5634 * must precede any reference to it which appears in a default value in an 5635 * attribute-list declaration. Note that if entities are declared in the 5636 * external subset or in external parameter entities, a non-validating 5637 * processor is not obligated to read and process their declarations; 5638 * for such documents, the rule that an entity must be declared is a 5639 * well-formedness constraint only if standalone='yes'. 5640 * 5641 * [ WFC: Parsed Entity ] 5642 * An entity reference must not contain the name of an unparsed entity 5643 * 5644 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5645 * is updated to the current location in the string. 5646 */ 5647xmlEntityPtr 5648xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5649 xmlChar *name; 5650 const xmlChar *ptr; 5651 xmlChar cur; 5652 xmlEntityPtr ent = NULL; 5653 5654 if ((str == NULL) || (*str == NULL)) 5655 return(NULL); 5656 ptr = *str; 5657 cur = *ptr; 5658 if (cur == '&') { 5659 ptr++; 5660 cur = *ptr; 5661 name = xmlParseStringName(ctxt, &ptr); 5662 if (name == NULL) { 5663 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5665 ctxt->sax->error(ctxt->userData, 5666 "xmlParseStringEntityRef: no name\n"); 5667 ctxt->wellFormed = 0; 5668 ctxt->disableSAX = 1; 5669 } else { 5670 if (*ptr == ';') { 5671 ptr++; 5672 /* 5673 * Ask first SAX for entity resolution, otherwise try the 5674 * predefined set. 5675 */ 5676 if (ctxt->sax != NULL) { 5677 if (ctxt->sax->getEntity != NULL) 5678 ent = ctxt->sax->getEntity(ctxt->userData, name); 5679 if (ent == NULL) 5680 ent = xmlGetPredefinedEntity(name); 5681 } 5682 /* 5683 * [ WFC: Entity Declared ] 5684 * In a document without any DTD, a document with only an 5685 * internal DTD subset which contains no parameter entity 5686 * references, or a document with "standalone='yes'", the 5687 * Name given in the entity reference must match that in an 5688 * entity declaration, except that well-formed documents 5689 * need not declare any of the following entities: amp, lt, 5690 * gt, apos, quot. 5691 * The declaration of a parameter entity must precede any 5692 * reference to it. 5693 * Similarly, the declaration of a general entity must 5694 * precede any reference to it which appears in a default 5695 * value in an attribute-list declaration. Note that if 5696 * entities are declared in the external subset or in 5697 * external parameter entities, a non-validating processor 5698 * is not obligated to read and process their declarations; 5699 * for such documents, the rule that an entity must be 5700 * declared is a well-formedness constraint only if 5701 * standalone='yes'. 5702 */ 5703 if (ent == NULL) { 5704 if ((ctxt->standalone == 1) || 5705 ((ctxt->hasExternalSubset == 0) && 5706 (ctxt->hasPErefs == 0))) { 5707 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5709 ctxt->sax->error(ctxt->userData, 5710 "Entity '%s' not defined\n", name); 5711 ctxt->wellFormed = 0; 5712 ctxt->disableSAX = 1; 5713 } else { 5714 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5715 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5716 ctxt->sax->warning(ctxt->userData, 5717 "Entity '%s' not defined\n", name); 5718 } 5719 } 5720 5721 /* 5722 * [ WFC: Parsed Entity ] 5723 * An entity reference must not contain the name of an 5724 * unparsed entity 5725 */ 5726 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5727 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5729 ctxt->sax->error(ctxt->userData, 5730 "Entity reference to unparsed entity %s\n", name); 5731 ctxt->wellFormed = 0; 5732 ctxt->disableSAX = 1; 5733 } 5734 5735 /* 5736 * [ WFC: No External Entity References ] 5737 * Attribute values cannot contain direct or indirect 5738 * entity references to external entities. 5739 */ 5740 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5741 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5742 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5744 ctxt->sax->error(ctxt->userData, 5745 "Attribute references external entity '%s'\n", name); 5746 ctxt->wellFormed = 0; 5747 ctxt->disableSAX = 1; 5748 } 5749 /* 5750 * [ WFC: No < in Attribute Values ] 5751 * The replacement text of any entity referred to directly or 5752 * indirectly in an attribute value (other than "<") must 5753 * not contain a <. 5754 */ 5755 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5756 (ent != NULL) && 5757 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5758 (ent->content != NULL) && 5759 (xmlStrchr(ent->content, '<'))) { 5760 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5762 ctxt->sax->error(ctxt->userData, 5763 "'<' in entity '%s' is not allowed in attributes values\n", name); 5764 ctxt->wellFormed = 0; 5765 ctxt->disableSAX = 1; 5766 } 5767 5768 /* 5769 * Internal check, no parameter entities here ... 5770 */ 5771 else { 5772 switch (ent->etype) { 5773 case XML_INTERNAL_PARAMETER_ENTITY: 5774 case XML_EXTERNAL_PARAMETER_ENTITY: 5775 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5777 ctxt->sax->error(ctxt->userData, 5778 "Attempt to reference the parameter entity '%s'\n", name); 5779 ctxt->wellFormed = 0; 5780 ctxt->disableSAX = 1; 5781 break; 5782 default: 5783 break; 5784 } 5785 } 5786 5787 /* 5788 * [ WFC: No Recursion ] 5789 * A parsed entity must not contain a recursive reference 5790 * to itself, either directly or indirectly. 5791 * Done somewhere else 5792 */ 5793 5794 } else { 5795 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5797 ctxt->sax->error(ctxt->userData, 5798 "xmlParseStringEntityRef: expecting ';'\n"); 5799 ctxt->wellFormed = 0; 5800 ctxt->disableSAX = 1; 5801 } 5802 xmlFree(name); 5803 } 5804 } 5805 *str = ptr; 5806 return(ent); 5807} 5808 5809/** 5810 * xmlParsePEReference: 5811 * @ctxt: an XML parser context 5812 * 5813 * parse PEReference declarations 5814 * The entity content is handled directly by pushing it's content as 5815 * a new input stream. 5816 * 5817 * [69] PEReference ::= '%' Name ';' 5818 * 5819 * [ WFC: No Recursion ] 5820 * A parsed entity must not contain a recursive 5821 * reference to itself, either directly or indirectly. 5822 * 5823 * [ WFC: Entity Declared ] 5824 * In a document without any DTD, a document with only an internal DTD 5825 * subset which contains no parameter entity references, or a document 5826 * with "standalone='yes'", ... ... The declaration of a parameter 5827 * entity must precede any reference to it... 5828 * 5829 * [ VC: Entity Declared ] 5830 * In a document with an external subset or external parameter entities 5831 * with "standalone='no'", ... ... The declaration of a parameter entity 5832 * must precede any reference to it... 5833 * 5834 * [ WFC: In DTD ] 5835 * Parameter-entity references may only appear in the DTD. 5836 * NOTE: misleading but this is handled. 5837 */ 5838void 5839xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5840 xmlChar *name; 5841 xmlEntityPtr entity = NULL; 5842 xmlParserInputPtr input; 5843 5844 if (RAW == '%') { 5845 NEXT; 5846 name = xmlParseName(ctxt); 5847 if (name == NULL) { 5848 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5850 ctxt->sax->error(ctxt->userData, 5851 "xmlParsePEReference: no name\n"); 5852 ctxt->wellFormed = 0; 5853 ctxt->disableSAX = 1; 5854 } else { 5855 if (RAW == ';') { 5856 NEXT; 5857 if ((ctxt->sax != NULL) && 5858 (ctxt->sax->getParameterEntity != NULL)) 5859 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5860 name); 5861 if (entity == NULL) { 5862 /* 5863 * [ WFC: Entity Declared ] 5864 * In a document without any DTD, a document with only an 5865 * internal DTD subset which contains no parameter entity 5866 * references, or a document with "standalone='yes'", ... 5867 * ... The declaration of a parameter entity must precede 5868 * any reference to it... 5869 */ 5870 if ((ctxt->standalone == 1) || 5871 ((ctxt->hasExternalSubset == 0) && 5872 (ctxt->hasPErefs == 0))) { 5873 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5874 if ((!ctxt->disableSAX) && 5875 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5876 ctxt->sax->error(ctxt->userData, 5877 "PEReference: %%%s; not found\n", name); 5878 ctxt->wellFormed = 0; 5879 ctxt->disableSAX = 1; 5880 } else { 5881 /* 5882 * [ VC: Entity Declared ] 5883 * In a document with an external subset or external 5884 * parameter entities with "standalone='no'", ... 5885 * ... The declaration of a parameter entity must precede 5886 * any reference to it... 5887 */ 5888 if ((!ctxt->disableSAX) && 5889 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5890 ctxt->sax->warning(ctxt->userData, 5891 "PEReference: %%%s; not found\n", name); 5892 ctxt->valid = 0; 5893 } 5894 } else { 5895 /* 5896 * Internal checking in case the entity quest barfed 5897 */ 5898 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5899 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5900 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5901 ctxt->sax->warning(ctxt->userData, 5902 "Internal: %%%s; is not a parameter entity\n", name); 5903 } else { 5904 /* 5905 * TODO !!! 5906 * handle the extra spaces added before and after 5907 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5908 */ 5909 input = xmlNewEntityInputStream(ctxt, entity); 5910 xmlPushInput(ctxt, input); 5911 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5912 (RAW == '<') && (NXT(1) == '?') && 5913 (NXT(2) == 'x') && (NXT(3) == 'm') && 5914 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5915 xmlParseTextDecl(ctxt); 5916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5917 /* 5918 * The XML REC instructs us to stop parsing 5919 * right here 5920 */ 5921 ctxt->instate = XML_PARSER_EOF; 5922 xmlFree(name); 5923 return; 5924 } 5925 } 5926 if (ctxt->token == 0) 5927 ctxt->token = ' '; 5928 } 5929 } 5930 ctxt->hasPErefs = 1; 5931 } else { 5932 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5934 ctxt->sax->error(ctxt->userData, 5935 "xmlParsePEReference: expecting ';'\n"); 5936 ctxt->wellFormed = 0; 5937 ctxt->disableSAX = 1; 5938 } 5939 xmlFree(name); 5940 } 5941 } 5942} 5943 5944/** 5945 * xmlParseStringPEReference: 5946 * @ctxt: an XML parser context 5947 * @str: a pointer to an index in the string 5948 * 5949 * parse PEReference declarations 5950 * 5951 * [69] PEReference ::= '%' Name ';' 5952 * 5953 * [ WFC: No Recursion ] 5954 * A parsed entity must not contain a recursive 5955 * reference to itself, either directly or indirectly. 5956 * 5957 * [ WFC: Entity Declared ] 5958 * In a document without any DTD, a document with only an internal DTD 5959 * subset which contains no parameter entity references, or a document 5960 * with "standalone='yes'", ... ... The declaration of a parameter 5961 * entity must precede any reference to it... 5962 * 5963 * [ VC: Entity Declared ] 5964 * In a document with an external subset or external parameter entities 5965 * with "standalone='no'", ... ... The declaration of a parameter entity 5966 * must precede any reference to it... 5967 * 5968 * [ WFC: In DTD ] 5969 * Parameter-entity references may only appear in the DTD. 5970 * NOTE: misleading but this is handled. 5971 * 5972 * Returns the string of the entity content. 5973 * str is updated to the current value of the index 5974 */ 5975xmlEntityPtr 5976xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5977 const xmlChar *ptr; 5978 xmlChar cur; 5979 xmlChar *name; 5980 xmlEntityPtr entity = NULL; 5981 5982 if ((str == NULL) || (*str == NULL)) return(NULL); 5983 ptr = *str; 5984 cur = *ptr; 5985 if (cur == '%') { 5986 ptr++; 5987 cur = *ptr; 5988 name = xmlParseStringName(ctxt, &ptr); 5989 if (name == NULL) { 5990 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5992 ctxt->sax->error(ctxt->userData, 5993 "xmlParseStringPEReference: no name\n"); 5994 ctxt->wellFormed = 0; 5995 ctxt->disableSAX = 1; 5996 } else { 5997 cur = *ptr; 5998 if (cur == ';') { 5999 ptr++; 6000 cur = *ptr; 6001 if ((ctxt->sax != NULL) && 6002 (ctxt->sax->getParameterEntity != NULL)) 6003 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6004 name); 6005 if (entity == NULL) { 6006 /* 6007 * [ WFC: Entity Declared ] 6008 * In a document without any DTD, a document with only an 6009 * internal DTD subset which contains no parameter entity 6010 * references, or a document with "standalone='yes'", ... 6011 * ... The declaration of a parameter entity must precede 6012 * any reference to it... 6013 */ 6014 if ((ctxt->standalone == 1) || 6015 ((ctxt->hasExternalSubset == 0) && 6016 (ctxt->hasPErefs == 0))) { 6017 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 6018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6019 ctxt->sax->error(ctxt->userData, 6020 "PEReference: %%%s; not found\n", name); 6021 ctxt->wellFormed = 0; 6022 ctxt->disableSAX = 1; 6023 } else { 6024 /* 6025 * [ VC: Entity Declared ] 6026 * In a document with an external subset or external 6027 * parameter entities with "standalone='no'", ... 6028 * ... The declaration of a parameter entity must 6029 * precede any reference to it... 6030 */ 6031 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6032 ctxt->sax->warning(ctxt->userData, 6033 "PEReference: %%%s; not found\n", name); 6034 ctxt->valid = 0; 6035 } 6036 } else { 6037 /* 6038 * Internal checking in case the entity quest barfed 6039 */ 6040 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6041 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6042 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6043 ctxt->sax->warning(ctxt->userData, 6044 "Internal: %%%s; is not a parameter entity\n", name); 6045 } 6046 } 6047 ctxt->hasPErefs = 1; 6048 } else { 6049 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6051 ctxt->sax->error(ctxt->userData, 6052 "xmlParseStringPEReference: expecting ';'\n"); 6053 ctxt->wellFormed = 0; 6054 ctxt->disableSAX = 1; 6055 } 6056 xmlFree(name); 6057 } 6058 } 6059 *str = ptr; 6060 return(entity); 6061} 6062 6063/** 6064 * xmlParseDocTypeDecl: 6065 * @ctxt: an XML parser context 6066 * 6067 * parse a DOCTYPE declaration 6068 * 6069 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6070 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6071 * 6072 * [ VC: Root Element Type ] 6073 * The Name in the document type declaration must match the element 6074 * type of the root element. 6075 */ 6076 6077void 6078xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6079 xmlChar *name = NULL; 6080 xmlChar *ExternalID = NULL; 6081 xmlChar *URI = NULL; 6082 6083 /* 6084 * We know that '<!DOCTYPE' has been detected. 6085 */ 6086 SKIP(9); 6087 6088 SKIP_BLANKS; 6089 6090 /* 6091 * Parse the DOCTYPE name. 6092 */ 6093 name = xmlParseName(ctxt); 6094 if (name == NULL) { 6095 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6097 ctxt->sax->error(ctxt->userData, 6098 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6099 ctxt->wellFormed = 0; 6100 ctxt->disableSAX = 1; 6101 } 6102 ctxt->intSubName = name; 6103 6104 SKIP_BLANKS; 6105 6106 /* 6107 * Check for SystemID and ExternalID 6108 */ 6109 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6110 6111 if ((URI != NULL) || (ExternalID != NULL)) { 6112 ctxt->hasExternalSubset = 1; 6113 } 6114 ctxt->extSubURI = URI; 6115 ctxt->extSubSystem = ExternalID; 6116 6117 SKIP_BLANKS; 6118 6119 /* 6120 * Create and update the internal subset. 6121 */ 6122 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6123 (!ctxt->disableSAX)) 6124 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6125 6126 /* 6127 * Is there any internal subset declarations ? 6128 * they are handled separately in xmlParseInternalSubset() 6129 */ 6130 if (RAW == '[') 6131 return; 6132 6133 /* 6134 * We should be at the end of the DOCTYPE declaration. 6135 */ 6136 if (RAW != '>') { 6137 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6139 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6140 ctxt->wellFormed = 0; 6141 ctxt->disableSAX = 1; 6142 } 6143 NEXT; 6144} 6145 6146/** 6147 * xmlParseInternalSubset: 6148 * @ctxt: an XML parser context 6149 * 6150 * parse the internal subset declaration 6151 * 6152 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6153 */ 6154 6155static void 6156xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6157 /* 6158 * Is there any DTD definition ? 6159 */ 6160 if (RAW == '[') { 6161 ctxt->instate = XML_PARSER_DTD; 6162 NEXT; 6163 /* 6164 * Parse the succession of Markup declarations and 6165 * PEReferences. 6166 * Subsequence (markupdecl | PEReference | S)* 6167 */ 6168 while (RAW != ']') { 6169 const xmlChar *check = CUR_PTR; 6170 int cons = ctxt->input->consumed; 6171 6172 SKIP_BLANKS; 6173 xmlParseMarkupDecl(ctxt); 6174 xmlParsePEReference(ctxt); 6175 6176 /* 6177 * Pop-up of finished entities. 6178 */ 6179 while ((RAW == 0) && (ctxt->inputNr > 1)) 6180 xmlPopInput(ctxt); 6181 6182 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6183 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6185 ctxt->sax->error(ctxt->userData, 6186 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6187 ctxt->wellFormed = 0; 6188 ctxt->disableSAX = 1; 6189 break; 6190 } 6191 } 6192 if (RAW == ']') { 6193 NEXT; 6194 SKIP_BLANKS; 6195 } 6196 } 6197 6198 /* 6199 * We should be at the end of the DOCTYPE declaration. 6200 */ 6201 if (RAW != '>') { 6202 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6204 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6205 ctxt->wellFormed = 0; 6206 ctxt->disableSAX = 1; 6207 } 6208 NEXT; 6209} 6210 6211/** 6212 * xmlParseAttribute: 6213 * @ctxt: an XML parser context 6214 * @value: a xmlChar ** used to store the value of the attribute 6215 * 6216 * parse an attribute 6217 * 6218 * [41] Attribute ::= Name Eq AttValue 6219 * 6220 * [ WFC: No External Entity References ] 6221 * Attribute values cannot contain direct or indirect entity references 6222 * to external entities. 6223 * 6224 * [ WFC: No < in Attribute Values ] 6225 * The replacement text of any entity referred to directly or indirectly in 6226 * an attribute value (other than "<") must not contain a <. 6227 * 6228 * [ VC: Attribute Value Type ] 6229 * The attribute must have been declared; the value must be of the type 6230 * declared for it. 6231 * 6232 * [25] Eq ::= S? '=' S? 6233 * 6234 * With namespace: 6235 * 6236 * [NS 11] Attribute ::= QName Eq AttValue 6237 * 6238 * Also the case QName == xmlns:??? is handled independently as a namespace 6239 * definition. 6240 * 6241 * Returns the attribute name, and the value in *value. 6242 */ 6243 6244xmlChar * 6245xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6246 xmlChar *name, *val; 6247 6248 *value = NULL; 6249 GROW; 6250 name = xmlParseName(ctxt); 6251 if (name == NULL) { 6252 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6254 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 6255 ctxt->wellFormed = 0; 6256 ctxt->disableSAX = 1; 6257 return(NULL); 6258 } 6259 6260 /* 6261 * read the value 6262 */ 6263 SKIP_BLANKS; 6264 if (RAW == '=') { 6265 NEXT; 6266 SKIP_BLANKS; 6267 val = xmlParseAttValue(ctxt); 6268 ctxt->instate = XML_PARSER_CONTENT; 6269 } else { 6270 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6272 ctxt->sax->error(ctxt->userData, 6273 "Specification mandate value for attribute %s\n", name); 6274 ctxt->wellFormed = 0; 6275 ctxt->disableSAX = 1; 6276 xmlFree(name); 6277 return(NULL); 6278 } 6279 6280 /* 6281 * Check that xml:lang conforms to the specification 6282 * No more registered as an error, just generate a warning now 6283 * since this was deprecated in XML second edition 6284 */ 6285 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6286 if (!xmlCheckLanguageID(val)) { 6287 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6288 ctxt->sax->warning(ctxt->userData, 6289 "Malformed value for xml:lang : %s\n", val); 6290 } 6291 } 6292 6293 /* 6294 * Check that xml:space conforms to the specification 6295 */ 6296 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6297 if (xmlStrEqual(val, BAD_CAST "default")) 6298 *(ctxt->space) = 0; 6299 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6300 *(ctxt->space) = 1; 6301 else { 6302 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6304 ctxt->sax->error(ctxt->userData, 6305"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 6306 val); 6307 ctxt->wellFormed = 0; 6308 ctxt->disableSAX = 1; 6309 } 6310 } 6311 6312 *value = val; 6313 return(name); 6314} 6315 6316/** 6317 * xmlParseStartTag: 6318 * @ctxt: an XML parser context 6319 * 6320 * parse a start of tag either for rule element or 6321 * EmptyElement. In both case we don't parse the tag closing chars. 6322 * 6323 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6324 * 6325 * [ WFC: Unique Att Spec ] 6326 * No attribute name may appear more than once in the same start-tag or 6327 * empty-element tag. 6328 * 6329 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6330 * 6331 * [ WFC: Unique Att Spec ] 6332 * No attribute name may appear more than once in the same start-tag or 6333 * empty-element tag. 6334 * 6335 * With namespace: 6336 * 6337 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6338 * 6339 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6340 * 6341 * Returns the element name parsed 6342 */ 6343 6344xmlChar * 6345xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6346 xmlChar *name; 6347 xmlChar *attname; 6348 xmlChar *attvalue; 6349 const xmlChar **atts = NULL; 6350 int nbatts = 0; 6351 int maxatts = 0; 6352 int i; 6353 6354 if (RAW != '<') return(NULL); 6355 NEXT1; 6356 6357 name = xmlParseName(ctxt); 6358 if (name == NULL) { 6359 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6361 ctxt->sax->error(ctxt->userData, 6362 "xmlParseStartTag: invalid element name\n"); 6363 ctxt->wellFormed = 0; 6364 ctxt->disableSAX = 1; 6365 return(NULL); 6366 } 6367 6368 /* 6369 * Now parse the attributes, it ends up with the ending 6370 * 6371 * (S Attribute)* S? 6372 */ 6373 SKIP_BLANKS; 6374 GROW; 6375 6376 while ((RAW != '>') && 6377 ((RAW != '/') || (NXT(1) != '>')) && 6378 (IS_CHAR(RAW))) { 6379 const xmlChar *q = CUR_PTR; 6380 int cons = ctxt->input->consumed; 6381 6382 attname = xmlParseAttribute(ctxt, &attvalue); 6383 if ((attname != NULL) && (attvalue != NULL)) { 6384 /* 6385 * [ WFC: Unique Att Spec ] 6386 * No attribute name may appear more than once in the same 6387 * start-tag or empty-element tag. 6388 */ 6389 for (i = 0; i < nbatts;i += 2) { 6390 if (xmlStrEqual(atts[i], attname)) { 6391 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6393 ctxt->sax->error(ctxt->userData, 6394 "Attribute %s redefined\n", 6395 attname); 6396 ctxt->wellFormed = 0; 6397 ctxt->disableSAX = 1; 6398 xmlFree(attname); 6399 xmlFree(attvalue); 6400 goto failed; 6401 } 6402 } 6403 6404 /* 6405 * Add the pair to atts 6406 */ 6407 if (atts == NULL) { 6408 maxatts = 10; 6409 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6410 if (atts == NULL) { 6411 xmlGenericError(xmlGenericErrorContext, 6412 "malloc of %ld byte failed\n", 6413 maxatts * (long)sizeof(xmlChar *)); 6414 return(NULL); 6415 } 6416 } else if (nbatts + 4 > maxatts) { 6417 maxatts *= 2; 6418 atts = (const xmlChar **) xmlRealloc((void *) atts, 6419 maxatts * sizeof(xmlChar *)); 6420 if (atts == NULL) { 6421 xmlGenericError(xmlGenericErrorContext, 6422 "realloc of %ld byte failed\n", 6423 maxatts * (long)sizeof(xmlChar *)); 6424 return(NULL); 6425 } 6426 } 6427 atts[nbatts++] = attname; 6428 atts[nbatts++] = attvalue; 6429 atts[nbatts] = NULL; 6430 atts[nbatts + 1] = NULL; 6431 } else { 6432 if (attname != NULL) 6433 xmlFree(attname); 6434 if (attvalue != NULL) 6435 xmlFree(attvalue); 6436 } 6437 6438failed: 6439 6440 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6441 break; 6442 if (!IS_BLANK(RAW)) { 6443 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6445 ctxt->sax->error(ctxt->userData, 6446 "attributes construct error\n"); 6447 ctxt->wellFormed = 0; 6448 ctxt->disableSAX = 1; 6449 } 6450 SKIP_BLANKS; 6451 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6452 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6454 ctxt->sax->error(ctxt->userData, 6455 "xmlParseStartTag: problem parsing attributes\n"); 6456 ctxt->wellFormed = 0; 6457 ctxt->disableSAX = 1; 6458 break; 6459 } 6460 GROW; 6461 } 6462 6463 /* 6464 * SAX: Start of Element ! 6465 */ 6466 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6467 (!ctxt->disableSAX)) 6468 ctxt->sax->startElement(ctxt->userData, name, atts); 6469 6470 if (atts != NULL) { 6471 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6472 xmlFree((void *) atts); 6473 } 6474 return(name); 6475} 6476 6477/** 6478 * xmlParseEndTag: 6479 * @ctxt: an XML parser context 6480 * 6481 * parse an end of tag 6482 * 6483 * [42] ETag ::= '</' Name S? '>' 6484 * 6485 * With namespace 6486 * 6487 * [NS 9] ETag ::= '</' QName S? '>' 6488 */ 6489 6490void 6491xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6492 xmlChar *name; 6493 xmlChar *oldname; 6494 6495 GROW; 6496 if ((RAW != '<') || (NXT(1) != '/')) { 6497 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6499 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6500 ctxt->wellFormed = 0; 6501 ctxt->disableSAX = 1; 6502 return; 6503 } 6504 SKIP(2); 6505 6506 name = xmlParseName(ctxt); 6507 6508 /* 6509 * We should definitely be at the ending "S? '>'" part 6510 */ 6511 GROW; 6512 SKIP_BLANKS; 6513 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6514 ctxt->errNo = XML_ERR_GT_REQUIRED; 6515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6516 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6517 ctxt->wellFormed = 0; 6518 ctxt->disableSAX = 1; 6519 } else 6520 NEXT1; 6521 6522 /* 6523 * [ WFC: Element Type Match ] 6524 * The Name in an element's end-tag must match the element type in the 6525 * start-tag. 6526 * 6527 */ 6528 if ((name == NULL) || (ctxt->name == NULL) || 6529 (!xmlStrEqual(name, ctxt->name))) { 6530 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6532 if ((name != NULL) && (ctxt->name != NULL)) { 6533 ctxt->sax->error(ctxt->userData, 6534 "Opening and ending tag mismatch: %s and %s\n", 6535 ctxt->name, name); 6536 } else if (ctxt->name != NULL) { 6537 ctxt->sax->error(ctxt->userData, 6538 "Ending tag error for: %s\n", ctxt->name); 6539 } else { 6540 ctxt->sax->error(ctxt->userData, 6541 "Ending tag error: internal error ???\n"); 6542 } 6543 6544 } 6545 ctxt->wellFormed = 0; 6546 ctxt->disableSAX = 1; 6547 } 6548 6549 /* 6550 * SAX: End of Tag 6551 */ 6552 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6553 (!ctxt->disableSAX)) 6554 ctxt->sax->endElement(ctxt->userData, name); 6555 6556 if (name != NULL) 6557 xmlFree(name); 6558 oldname = namePop(ctxt); 6559 spacePop(ctxt); 6560 if (oldname != NULL) { 6561#ifdef DEBUG_STACK 6562 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6563#endif 6564 xmlFree(oldname); 6565 } 6566 return; 6567} 6568 6569/** 6570 * xmlParseCDSect: 6571 * @ctxt: an XML parser context 6572 * 6573 * Parse escaped pure raw content. 6574 * 6575 * [18] CDSect ::= CDStart CData CDEnd 6576 * 6577 * [19] CDStart ::= '<![CDATA[' 6578 * 6579 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6580 * 6581 * [21] CDEnd ::= ']]>' 6582 */ 6583void 6584xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6585 xmlChar *buf = NULL; 6586 int len = 0; 6587 int size = XML_PARSER_BUFFER_SIZE; 6588 int r, rl; 6589 int s, sl; 6590 int cur, l; 6591 int count = 0; 6592 6593 if ((NXT(0) == '<') && (NXT(1) == '!') && 6594 (NXT(2) == '[') && (NXT(3) == 'C') && 6595 (NXT(4) == 'D') && (NXT(5) == 'A') && 6596 (NXT(6) == 'T') && (NXT(7) == 'A') && 6597 (NXT(8) == '[')) { 6598 SKIP(9); 6599 } else 6600 return; 6601 6602 ctxt->instate = XML_PARSER_CDATA_SECTION; 6603 r = CUR_CHAR(rl); 6604 if (!IS_CHAR(r)) { 6605 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6607 ctxt->sax->error(ctxt->userData, 6608 "CData section not finished\n"); 6609 ctxt->wellFormed = 0; 6610 ctxt->disableSAX = 1; 6611 ctxt->instate = XML_PARSER_CONTENT; 6612 return; 6613 } 6614 NEXTL(rl); 6615 s = CUR_CHAR(sl); 6616 if (!IS_CHAR(s)) { 6617 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6619 ctxt->sax->error(ctxt->userData, 6620 "CData section not finished\n"); 6621 ctxt->wellFormed = 0; 6622 ctxt->disableSAX = 1; 6623 ctxt->instate = XML_PARSER_CONTENT; 6624 return; 6625 } 6626 NEXTL(sl); 6627 cur = CUR_CHAR(l); 6628 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6629 if (buf == NULL) { 6630 xmlGenericError(xmlGenericErrorContext, 6631 "malloc of %d byte failed\n", size); 6632 return; 6633 } 6634 while (IS_CHAR(cur) && 6635 ((r != ']') || (s != ']') || (cur != '>'))) { 6636 if (len + 5 >= size) { 6637 size *= 2; 6638 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6639 if (buf == NULL) { 6640 xmlGenericError(xmlGenericErrorContext, 6641 "realloc of %d byte failed\n", size); 6642 return; 6643 } 6644 } 6645 COPY_BUF(rl,buf,len,r); 6646 r = s; 6647 rl = sl; 6648 s = cur; 6649 sl = l; 6650 count++; 6651 if (count > 50) { 6652 GROW; 6653 count = 0; 6654 } 6655 NEXTL(l); 6656 cur = CUR_CHAR(l); 6657 } 6658 buf[len] = 0; 6659 ctxt->instate = XML_PARSER_CONTENT; 6660 if (cur != '>') { 6661 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6663 ctxt->sax->error(ctxt->userData, 6664 "CData section not finished\n%.50s\n", buf); 6665 ctxt->wellFormed = 0; 6666 ctxt->disableSAX = 1; 6667 xmlFree(buf); 6668 return; 6669 } 6670 NEXTL(l); 6671 6672 /* 6673 * OK the buffer is to be consumed as cdata. 6674 */ 6675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6676 if (ctxt->sax->cdataBlock != NULL) 6677 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6678 else if (ctxt->sax->characters != NULL) 6679 ctxt->sax->characters(ctxt->userData, buf, len); 6680 } 6681 xmlFree(buf); 6682} 6683 6684/** 6685 * xmlParseContent: 6686 * @ctxt: an XML parser context 6687 * 6688 * Parse a content: 6689 * 6690 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6691 */ 6692 6693void 6694xmlParseContent(xmlParserCtxtPtr ctxt) { 6695 GROW; 6696 while (((RAW != 0) || (ctxt->token != 0)) && 6697 ((RAW != '<') || (NXT(1) != '/'))) { 6698 const xmlChar *test = CUR_PTR; 6699 int cons = ctxt->input->consumed; 6700 int tok = ctxt->token; 6701 const xmlChar *cur = ctxt->input->cur; 6702 6703 /* 6704 * Handle possible processed charrefs. 6705 */ 6706 if (ctxt->token != 0) { 6707 xmlParseCharData(ctxt, 0); 6708 } 6709 /* 6710 * First case : a Processing Instruction. 6711 */ 6712 else if ((*cur == '<') && (cur[1] == '?')) { 6713 xmlParsePI(ctxt); 6714 } 6715 6716 /* 6717 * Second case : a CDSection 6718 */ 6719 else if ((*cur == '<') && (NXT(1) == '!') && 6720 (NXT(2) == '[') && (NXT(3) == 'C') && 6721 (NXT(4) == 'D') && (NXT(5) == 'A') && 6722 (NXT(6) == 'T') && (NXT(7) == 'A') && 6723 (NXT(8) == '[')) { 6724 xmlParseCDSect(ctxt); 6725 } 6726 6727 /* 6728 * Third case : a comment 6729 */ 6730 else if ((*cur == '<') && (NXT(1) == '!') && 6731 (NXT(2) == '-') && (NXT(3) == '-')) { 6732 xmlParseComment(ctxt); 6733 ctxt->instate = XML_PARSER_CONTENT; 6734 } 6735 6736 /* 6737 * Fourth case : a sub-element. 6738 */ 6739 else if (*cur == '<') { 6740 xmlParseElement(ctxt); 6741 } 6742 6743 /* 6744 * Fifth case : a reference. If if has not been resolved, 6745 * parsing returns it's Name, create the node 6746 */ 6747 6748 else if (*cur == '&') { 6749 xmlParseReference(ctxt); 6750 } 6751 6752 /* 6753 * Last case, text. Note that References are handled directly. 6754 */ 6755 else { 6756 xmlParseCharData(ctxt, 0); 6757 } 6758 6759 GROW; 6760 /* 6761 * Pop-up of finished entities. 6762 */ 6763 while ((RAW == 0) && (ctxt->inputNr > 1)) 6764 xmlPopInput(ctxt); 6765 SHRINK; 6766 6767 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6768 (tok == ctxt->token)) { 6769 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6771 ctxt->sax->error(ctxt->userData, 6772 "detected an error in element content\n"); 6773 ctxt->wellFormed = 0; 6774 ctxt->disableSAX = 1; 6775 ctxt->instate = XML_PARSER_EOF; 6776 break; 6777 } 6778 } 6779} 6780 6781/** 6782 * xmlParseElement: 6783 * @ctxt: an XML parser context 6784 * 6785 * parse an XML element, this is highly recursive 6786 * 6787 * [39] element ::= EmptyElemTag | STag content ETag 6788 * 6789 * [ WFC: Element Type Match ] 6790 * The Name in an element's end-tag must match the element type in the 6791 * start-tag. 6792 * 6793 * [ VC: Element Valid ] 6794 * An element is valid if there is a declaration matching elementdecl 6795 * where the Name matches the element type and one of the following holds: 6796 * - The declaration matches EMPTY and the element has no content. 6797 * - The declaration matches children and the sequence of child elements 6798 * belongs to the language generated by the regular expression in the 6799 * content model, with optional white space (characters matching the 6800 * nonterminal S) between each pair of child elements. 6801 * - The declaration matches Mixed and the content consists of character 6802 * data and child elements whose types match names in the content model. 6803 * - The declaration matches ANY, and the types of any child elements have 6804 * been declared. 6805 */ 6806 6807void 6808xmlParseElement(xmlParserCtxtPtr ctxt) { 6809 const xmlChar *openTag = CUR_PTR; 6810 xmlChar *name; 6811 xmlChar *oldname; 6812 xmlParserNodeInfo node_info; 6813 xmlNodePtr ret; 6814 6815 /* Capture start position */ 6816 if (ctxt->record_info) { 6817 node_info.begin_pos = ctxt->input->consumed + 6818 (CUR_PTR - ctxt->input->base); 6819 node_info.begin_line = ctxt->input->line; 6820 } 6821 6822 if (ctxt->spaceNr == 0) 6823 spacePush(ctxt, -1); 6824 else 6825 spacePush(ctxt, *ctxt->space); 6826 6827 name = xmlParseStartTag(ctxt); 6828 if (name == NULL) { 6829 spacePop(ctxt); 6830 return; 6831 } 6832 namePush(ctxt, name); 6833 ret = ctxt->node; 6834 6835 /* 6836 * [ VC: Root Element Type ] 6837 * The Name in the document type declaration must match the element 6838 * type of the root element. 6839 */ 6840 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6841 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6842 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6843 6844 /* 6845 * Check for an Empty Element. 6846 */ 6847 if ((RAW == '/') && (NXT(1) == '>')) { 6848 SKIP(2); 6849 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6850 (!ctxt->disableSAX)) 6851 ctxt->sax->endElement(ctxt->userData, name); 6852 oldname = namePop(ctxt); 6853 spacePop(ctxt); 6854 if (oldname != NULL) { 6855#ifdef DEBUG_STACK 6856 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6857#endif 6858 xmlFree(oldname); 6859 } 6860 if ( ret != NULL && ctxt->record_info ) { 6861 node_info.end_pos = ctxt->input->consumed + 6862 (CUR_PTR - ctxt->input->base); 6863 node_info.end_line = ctxt->input->line; 6864 node_info.node = ret; 6865 xmlParserAddNodeInfo(ctxt, &node_info); 6866 } 6867 return; 6868 } 6869 if (RAW == '>') { 6870 NEXT1; 6871 } else { 6872 ctxt->errNo = XML_ERR_GT_REQUIRED; 6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6874 ctxt->sax->error(ctxt->userData, 6875 "Couldn't find end of Start Tag\n%.30s\n", 6876 openTag); 6877 ctxt->wellFormed = 0; 6878 ctxt->disableSAX = 1; 6879 6880 /* 6881 * end of parsing of this node. 6882 */ 6883 nodePop(ctxt); 6884 oldname = namePop(ctxt); 6885 spacePop(ctxt); 6886 if (oldname != NULL) { 6887#ifdef DEBUG_STACK 6888 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6889#endif 6890 xmlFree(oldname); 6891 } 6892 6893 /* 6894 * Capture end position and add node 6895 */ 6896 if ( ret != NULL && ctxt->record_info ) { 6897 node_info.end_pos = ctxt->input->consumed + 6898 (CUR_PTR - ctxt->input->base); 6899 node_info.end_line = ctxt->input->line; 6900 node_info.node = ret; 6901 xmlParserAddNodeInfo(ctxt, &node_info); 6902 } 6903 return; 6904 } 6905 6906 /* 6907 * Parse the content of the element: 6908 */ 6909 xmlParseContent(ctxt); 6910 if (!IS_CHAR(RAW)) { 6911 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED; 6912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6913 ctxt->sax->error(ctxt->userData, 6914 "Premature end of data in tag %.30s\n", openTag); 6915 ctxt->wellFormed = 0; 6916 ctxt->disableSAX = 1; 6917 6918 /* 6919 * end of parsing of this node. 6920 */ 6921 nodePop(ctxt); 6922 oldname = namePop(ctxt); 6923 spacePop(ctxt); 6924 if (oldname != NULL) { 6925#ifdef DEBUG_STACK 6926 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6927#endif 6928 xmlFree(oldname); 6929 } 6930 return; 6931 } 6932 6933 /* 6934 * parse the end of tag: '</' should be here. 6935 */ 6936 xmlParseEndTag(ctxt); 6937 6938 /* 6939 * Capture end position and add node 6940 */ 6941 if ( ret != NULL && ctxt->record_info ) { 6942 node_info.end_pos = ctxt->input->consumed + 6943 (CUR_PTR - ctxt->input->base); 6944 node_info.end_line = ctxt->input->line; 6945 node_info.node = ret; 6946 xmlParserAddNodeInfo(ctxt, &node_info); 6947 } 6948} 6949 6950/** 6951 * xmlParseVersionNum: 6952 * @ctxt: an XML parser context 6953 * 6954 * parse the XML version value. 6955 * 6956 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6957 * 6958 * Returns the string giving the XML version number, or NULL 6959 */ 6960xmlChar * 6961xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6962 xmlChar *buf = NULL; 6963 int len = 0; 6964 int size = 10; 6965 xmlChar cur; 6966 6967 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6968 if (buf == NULL) { 6969 xmlGenericError(xmlGenericErrorContext, 6970 "malloc of %d byte failed\n", size); 6971 return(NULL); 6972 } 6973 cur = CUR; 6974 while (((cur >= 'a') && (cur <= 'z')) || 6975 ((cur >= 'A') && (cur <= 'Z')) || 6976 ((cur >= '0') && (cur <= '9')) || 6977 (cur == '_') || (cur == '.') || 6978 (cur == ':') || (cur == '-')) { 6979 if (len + 1 >= size) { 6980 size *= 2; 6981 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6982 if (buf == NULL) { 6983 xmlGenericError(xmlGenericErrorContext, 6984 "realloc of %d byte failed\n", size); 6985 return(NULL); 6986 } 6987 } 6988 buf[len++] = cur; 6989 NEXT; 6990 cur=CUR; 6991 } 6992 buf[len] = 0; 6993 return(buf); 6994} 6995 6996/** 6997 * xmlParseVersionInfo: 6998 * @ctxt: an XML parser context 6999 * 7000 * parse the XML version. 7001 * 7002 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 7003 * 7004 * [25] Eq ::= S? '=' S? 7005 * 7006 * Returns the version string, e.g. "1.0" 7007 */ 7008 7009xmlChar * 7010xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 7011 xmlChar *version = NULL; 7012 const xmlChar *q; 7013 7014 if ((RAW == 'v') && (NXT(1) == 'e') && 7015 (NXT(2) == 'r') && (NXT(3) == 's') && 7016 (NXT(4) == 'i') && (NXT(5) == 'o') && 7017 (NXT(6) == 'n')) { 7018 SKIP(7); 7019 SKIP_BLANKS; 7020 if (RAW != '=') { 7021 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7023 ctxt->sax->error(ctxt->userData, 7024 "xmlParseVersionInfo : expected '='\n"); 7025 ctxt->wellFormed = 0; 7026 ctxt->disableSAX = 1; 7027 return(NULL); 7028 } 7029 NEXT; 7030 SKIP_BLANKS; 7031 if (RAW == '"') { 7032 NEXT; 7033 q = CUR_PTR; 7034 version = xmlParseVersionNum(ctxt); 7035 if (RAW != '"') { 7036 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7038 ctxt->sax->error(ctxt->userData, 7039 "String not closed\n%.50s\n", q); 7040 ctxt->wellFormed = 0; 7041 ctxt->disableSAX = 1; 7042 } else 7043 NEXT; 7044 } else if (RAW == '\''){ 7045 NEXT; 7046 q = CUR_PTR; 7047 version = xmlParseVersionNum(ctxt); 7048 if (RAW != '\'') { 7049 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7051 ctxt->sax->error(ctxt->userData, 7052 "String not closed\n%.50s\n", q); 7053 ctxt->wellFormed = 0; 7054 ctxt->disableSAX = 1; 7055 } else 7056 NEXT; 7057 } else { 7058 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7060 ctxt->sax->error(ctxt->userData, 7061 "xmlParseVersionInfo : expected ' or \"\n"); 7062 ctxt->wellFormed = 0; 7063 ctxt->disableSAX = 1; 7064 } 7065 } 7066 return(version); 7067} 7068 7069/** 7070 * xmlParseEncName: 7071 * @ctxt: an XML parser context 7072 * 7073 * parse the XML encoding name 7074 * 7075 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 7076 * 7077 * Returns the encoding name value or NULL 7078 */ 7079xmlChar * 7080xmlParseEncName(xmlParserCtxtPtr ctxt) { 7081 xmlChar *buf = NULL; 7082 int len = 0; 7083 int size = 10; 7084 xmlChar cur; 7085 7086 cur = CUR; 7087 if (((cur >= 'a') && (cur <= 'z')) || 7088 ((cur >= 'A') && (cur <= 'Z'))) { 7089 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 7090 if (buf == NULL) { 7091 xmlGenericError(xmlGenericErrorContext, 7092 "malloc of %d byte failed\n", size); 7093 return(NULL); 7094 } 7095 7096 buf[len++] = cur; 7097 NEXT; 7098 cur = CUR; 7099 while (((cur >= 'a') && (cur <= 'z')) || 7100 ((cur >= 'A') && (cur <= 'Z')) || 7101 ((cur >= '0') && (cur <= '9')) || 7102 (cur == '.') || (cur == '_') || 7103 (cur == '-')) { 7104 if (len + 1 >= size) { 7105 size *= 2; 7106 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7107 if (buf == NULL) { 7108 xmlGenericError(xmlGenericErrorContext, 7109 "realloc of %d byte failed\n", size); 7110 return(NULL); 7111 } 7112 } 7113 buf[len++] = cur; 7114 NEXT; 7115 cur = CUR; 7116 if (cur == 0) { 7117 SHRINK; 7118 GROW; 7119 cur = CUR; 7120 } 7121 } 7122 buf[len] = 0; 7123 } else { 7124 ctxt->errNo = XML_ERR_ENCODING_NAME; 7125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7126 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 7127 ctxt->wellFormed = 0; 7128 ctxt->disableSAX = 1; 7129 } 7130 return(buf); 7131} 7132 7133/** 7134 * xmlParseEncodingDecl: 7135 * @ctxt: an XML parser context 7136 * 7137 * parse the XML encoding declaration 7138 * 7139 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 7140 * 7141 * this setups the conversion filters. 7142 * 7143 * Returns the encoding value or NULL 7144 */ 7145 7146xmlChar * 7147xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 7148 xmlChar *encoding = NULL; 7149 const xmlChar *q; 7150 7151 SKIP_BLANKS; 7152 if ((RAW == 'e') && (NXT(1) == 'n') && 7153 (NXT(2) == 'c') && (NXT(3) == 'o') && 7154 (NXT(4) == 'd') && (NXT(5) == 'i') && 7155 (NXT(6) == 'n') && (NXT(7) == 'g')) { 7156 SKIP(8); 7157 SKIP_BLANKS; 7158 if (RAW != '=') { 7159 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7161 ctxt->sax->error(ctxt->userData, 7162 "xmlParseEncodingDecl : expected '='\n"); 7163 ctxt->wellFormed = 0; 7164 ctxt->disableSAX = 1; 7165 return(NULL); 7166 } 7167 NEXT; 7168 SKIP_BLANKS; 7169 if (RAW == '"') { 7170 NEXT; 7171 q = CUR_PTR; 7172 encoding = xmlParseEncName(ctxt); 7173 if (RAW != '"') { 7174 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7176 ctxt->sax->error(ctxt->userData, 7177 "String not closed\n%.50s\n", q); 7178 ctxt->wellFormed = 0; 7179 ctxt->disableSAX = 1; 7180 } else 7181 NEXT; 7182 } else if (RAW == '\''){ 7183 NEXT; 7184 q = CUR_PTR; 7185 encoding = xmlParseEncName(ctxt); 7186 if (RAW != '\'') { 7187 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7189 ctxt->sax->error(ctxt->userData, 7190 "String not closed\n%.50s\n", q); 7191 ctxt->wellFormed = 0; 7192 ctxt->disableSAX = 1; 7193 } else 7194 NEXT; 7195 } else { 7196 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7198 ctxt->sax->error(ctxt->userData, 7199 "xmlParseEncodingDecl : expected ' or \"\n"); 7200 ctxt->wellFormed = 0; 7201 ctxt->disableSAX = 1; 7202 } 7203 if (encoding != NULL) { 7204 xmlCharEncoding enc; 7205 xmlCharEncodingHandlerPtr handler; 7206 7207 if (ctxt->input->encoding != NULL) 7208 xmlFree((xmlChar *) ctxt->input->encoding); 7209 ctxt->input->encoding = encoding; 7210 7211 enc = xmlParseCharEncoding((const char *) encoding); 7212 /* 7213 * registered set of known encodings 7214 */ 7215 if (enc != XML_CHAR_ENCODING_ERROR) { 7216 xmlSwitchEncoding(ctxt, enc); 7217 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7218 xmlFree(encoding); 7219 return(NULL); 7220 } 7221 } else { 7222 /* 7223 * fallback for unknown encodings 7224 */ 7225 handler = xmlFindCharEncodingHandler((const char *) encoding); 7226 if (handler != NULL) { 7227 xmlSwitchToEncoding(ctxt, handler); 7228 } else { 7229 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 7230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7231 ctxt->sax->error(ctxt->userData, 7232 "Unsupported encoding %s\n", encoding); 7233 return(NULL); 7234 } 7235 } 7236 } 7237 } 7238 return(encoding); 7239} 7240 7241/** 7242 * xmlParseSDDecl: 7243 * @ctxt: an XML parser context 7244 * 7245 * parse the XML standalone declaration 7246 * 7247 * [32] SDDecl ::= S 'standalone' Eq 7248 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 7249 * 7250 * [ VC: Standalone Document Declaration ] 7251 * TODO The standalone document declaration must have the value "no" 7252 * if any external markup declarations contain declarations of: 7253 * - attributes with default values, if elements to which these 7254 * attributes apply appear in the document without specifications 7255 * of values for these attributes, or 7256 * - entities (other than amp, lt, gt, apos, quot), if references 7257 * to those entities appear in the document, or 7258 * - attributes with values subject to normalization, where the 7259 * attribute appears in the document with a value which will change 7260 * as a result of normalization, or 7261 * - element types with element content, if white space occurs directly 7262 * within any instance of those types. 7263 * 7264 * Returns 1 if standalone, 0 otherwise 7265 */ 7266 7267int 7268xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 7269 int standalone = -1; 7270 7271 SKIP_BLANKS; 7272 if ((RAW == 's') && (NXT(1) == 't') && 7273 (NXT(2) == 'a') && (NXT(3) == 'n') && 7274 (NXT(4) == 'd') && (NXT(5) == 'a') && 7275 (NXT(6) == 'l') && (NXT(7) == 'o') && 7276 (NXT(8) == 'n') && (NXT(9) == 'e')) { 7277 SKIP(10); 7278 SKIP_BLANKS; 7279 if (RAW != '=') { 7280 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7282 ctxt->sax->error(ctxt->userData, 7283 "XML standalone declaration : expected '='\n"); 7284 ctxt->wellFormed = 0; 7285 ctxt->disableSAX = 1; 7286 return(standalone); 7287 } 7288 NEXT; 7289 SKIP_BLANKS; 7290 if (RAW == '\''){ 7291 NEXT; 7292 if ((RAW == 'n') && (NXT(1) == 'o')) { 7293 standalone = 0; 7294 SKIP(2); 7295 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7296 (NXT(2) == 's')) { 7297 standalone = 1; 7298 SKIP(3); 7299 } else { 7300 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7302 ctxt->sax->error(ctxt->userData, 7303 "standalone accepts only 'yes' or 'no'\n"); 7304 ctxt->wellFormed = 0; 7305 ctxt->disableSAX = 1; 7306 } 7307 if (RAW != '\'') { 7308 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7310 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7311 ctxt->wellFormed = 0; 7312 ctxt->disableSAX = 1; 7313 } else 7314 NEXT; 7315 } else if (RAW == '"'){ 7316 NEXT; 7317 if ((RAW == 'n') && (NXT(1) == 'o')) { 7318 standalone = 0; 7319 SKIP(2); 7320 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7321 (NXT(2) == 's')) { 7322 standalone = 1; 7323 SKIP(3); 7324 } else { 7325 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7327 ctxt->sax->error(ctxt->userData, 7328 "standalone accepts only 'yes' or 'no'\n"); 7329 ctxt->wellFormed = 0; 7330 ctxt->disableSAX = 1; 7331 } 7332 if (RAW != '"') { 7333 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7335 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7336 ctxt->wellFormed = 0; 7337 ctxt->disableSAX = 1; 7338 } else 7339 NEXT; 7340 } else { 7341 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7343 ctxt->sax->error(ctxt->userData, 7344 "Standalone value not found\n"); 7345 ctxt->wellFormed = 0; 7346 ctxt->disableSAX = 1; 7347 } 7348 } 7349 return(standalone); 7350} 7351 7352/** 7353 * xmlParseXMLDecl: 7354 * @ctxt: an XML parser context 7355 * 7356 * parse an XML declaration header 7357 * 7358 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 7359 */ 7360 7361void 7362xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7363 xmlChar *version; 7364 7365 /* 7366 * We know that '<?xml' is here. 7367 */ 7368 SKIP(5); 7369 7370 if (!IS_BLANK(RAW)) { 7371 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7373 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7374 ctxt->wellFormed = 0; 7375 ctxt->disableSAX = 1; 7376 } 7377 SKIP_BLANKS; 7378 7379 /* 7380 * We must have the VersionInfo here. 7381 */ 7382 version = xmlParseVersionInfo(ctxt); 7383 if (version == NULL) { 7384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7385 ctxt->sax->error(ctxt->userData, 7386 "Malformed declaration expecting version\n"); 7387 ctxt->wellFormed = 0; 7388 ctxt->disableSAX = 1; 7389 } else { 7390 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 7391 /* 7392 * TODO: Blueberry should be detected here 7393 */ 7394 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7395 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", 7396 version); 7397 } 7398 if (ctxt->version != NULL) 7399 xmlFree((void *) ctxt->version); 7400 ctxt->version = version; 7401 } 7402 7403 /* 7404 * We may have the encoding declaration 7405 */ 7406 if (!IS_BLANK(RAW)) { 7407 if ((RAW == '?') && (NXT(1) == '>')) { 7408 SKIP(2); 7409 return; 7410 } 7411 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7413 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7414 ctxt->wellFormed = 0; 7415 ctxt->disableSAX = 1; 7416 } 7417 xmlParseEncodingDecl(ctxt); 7418 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7419 /* 7420 * The XML REC instructs us to stop parsing right here 7421 */ 7422 return; 7423 } 7424 7425 /* 7426 * We may have the standalone status. 7427 */ 7428 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7429 if ((RAW == '?') && (NXT(1) == '>')) { 7430 SKIP(2); 7431 return; 7432 } 7433 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7435 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7436 ctxt->wellFormed = 0; 7437 ctxt->disableSAX = 1; 7438 } 7439 SKIP_BLANKS; 7440 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7441 7442 SKIP_BLANKS; 7443 if ((RAW == '?') && (NXT(1) == '>')) { 7444 SKIP(2); 7445 } else if (RAW == '>') { 7446 /* Deprecated old WD ... */ 7447 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7449 ctxt->sax->error(ctxt->userData, 7450 "XML declaration must end-up with '?>'\n"); 7451 ctxt->wellFormed = 0; 7452 ctxt->disableSAX = 1; 7453 NEXT; 7454 } else { 7455 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7457 ctxt->sax->error(ctxt->userData, 7458 "parsing XML declaration: '?>' expected\n"); 7459 ctxt->wellFormed = 0; 7460 ctxt->disableSAX = 1; 7461 MOVETO_ENDTAG(CUR_PTR); 7462 NEXT; 7463 } 7464} 7465 7466/** 7467 * xmlParseMisc: 7468 * @ctxt: an XML parser context 7469 * 7470 * parse an XML Misc* optional field. 7471 * 7472 * [27] Misc ::= Comment | PI | S 7473 */ 7474 7475void 7476xmlParseMisc(xmlParserCtxtPtr ctxt) { 7477 while (((RAW == '<') && (NXT(1) == '?')) || 7478 ((RAW == '<') && (NXT(1) == '!') && 7479 (NXT(2) == '-') && (NXT(3) == '-')) || 7480 IS_BLANK(CUR)) { 7481 if ((RAW == '<') && (NXT(1) == '?')) { 7482 xmlParsePI(ctxt); 7483 } else if (IS_BLANK(CUR)) { 7484 NEXT; 7485 } else 7486 xmlParseComment(ctxt); 7487 } 7488} 7489 7490/** 7491 * xmlParseDocument: 7492 * @ctxt: an XML parser context 7493 * 7494 * parse an XML document (and build a tree if using the standard SAX 7495 * interface). 7496 * 7497 * [1] document ::= prolog element Misc* 7498 * 7499 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7500 * 7501 * Returns 0, -1 in case of error. the parser context is augmented 7502 * as a result of the parsing. 7503 */ 7504 7505int 7506xmlParseDocument(xmlParserCtxtPtr ctxt) { 7507 xmlChar start[4]; 7508 xmlCharEncoding enc; 7509 7510 xmlInitParser(); 7511 7512 GROW; 7513 7514 /* 7515 * SAX: beginning of the document processing. 7516 */ 7517 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7518 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7519 7520 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 7521 /* 7522 * Get the 4 first bytes and decode the charset 7523 * if enc != XML_CHAR_ENCODING_NONE 7524 * plug some encoding conversion routines. 7525 */ 7526 start[0] = RAW; 7527 start[1] = NXT(1); 7528 start[2] = NXT(2); 7529 start[3] = NXT(3); 7530 enc = xmlDetectCharEncoding(start, 4); 7531 if (enc != XML_CHAR_ENCODING_NONE) { 7532 xmlSwitchEncoding(ctxt, enc); 7533 } 7534 } 7535 7536 7537 if (CUR == 0) { 7538 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7540 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7541 ctxt->wellFormed = 0; 7542 ctxt->disableSAX = 1; 7543 } 7544 7545 /* 7546 * Check for the XMLDecl in the Prolog. 7547 */ 7548 GROW; 7549 if ((RAW == '<') && (NXT(1) == '?') && 7550 (NXT(2) == 'x') && (NXT(3) == 'm') && 7551 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7552 7553 /* 7554 * Note that we will switch encoding on the fly. 7555 */ 7556 xmlParseXMLDecl(ctxt); 7557 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7558 /* 7559 * The XML REC instructs us to stop parsing right here 7560 */ 7561 return(-1); 7562 } 7563 ctxt->standalone = ctxt->input->standalone; 7564 SKIP_BLANKS; 7565 } else { 7566 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7567 } 7568 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7569 ctxt->sax->startDocument(ctxt->userData); 7570 7571 /* 7572 * The Misc part of the Prolog 7573 */ 7574 GROW; 7575 xmlParseMisc(ctxt); 7576 7577 /* 7578 * Then possibly doc type declaration(s) and more Misc 7579 * (doctypedecl Misc*)? 7580 */ 7581 GROW; 7582 if ((RAW == '<') && (NXT(1) == '!') && 7583 (NXT(2) == 'D') && (NXT(3) == 'O') && 7584 (NXT(4) == 'C') && (NXT(5) == 'T') && 7585 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7586 (NXT(8) == 'E')) { 7587 7588 ctxt->inSubset = 1; 7589 xmlParseDocTypeDecl(ctxt); 7590 if (RAW == '[') { 7591 ctxt->instate = XML_PARSER_DTD; 7592 xmlParseInternalSubset(ctxt); 7593 } 7594 7595 /* 7596 * Create and update the external subset. 7597 */ 7598 ctxt->inSubset = 2; 7599 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7600 (!ctxt->disableSAX)) 7601 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7602 ctxt->extSubSystem, ctxt->extSubURI); 7603 ctxt->inSubset = 0; 7604 7605 7606 ctxt->instate = XML_PARSER_PROLOG; 7607 xmlParseMisc(ctxt); 7608 } 7609 7610 /* 7611 * Time to start parsing the tree itself 7612 */ 7613 GROW; 7614 if (RAW != '<') { 7615 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7617 ctxt->sax->error(ctxt->userData, 7618 "Start tag expected, '<' not found\n"); 7619 ctxt->wellFormed = 0; 7620 ctxt->disableSAX = 1; 7621 ctxt->instate = XML_PARSER_EOF; 7622 } else { 7623 ctxt->instate = XML_PARSER_CONTENT; 7624 xmlParseElement(ctxt); 7625 ctxt->instate = XML_PARSER_EPILOG; 7626 7627 7628 /* 7629 * The Misc part at the end 7630 */ 7631 xmlParseMisc(ctxt); 7632 7633 if (RAW != 0) { 7634 ctxt->errNo = XML_ERR_DOCUMENT_END; 7635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7636 ctxt->sax->error(ctxt->userData, 7637 "Extra content at the end of the document\n"); 7638 ctxt->wellFormed = 0; 7639 ctxt->disableSAX = 1; 7640 } 7641 ctxt->instate = XML_PARSER_EOF; 7642 } 7643 7644 /* 7645 * SAX: end of the document processing. 7646 */ 7647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7648 (!ctxt->disableSAX)) 7649 ctxt->sax->endDocument(ctxt->userData); 7650 7651 if (! ctxt->wellFormed) { 7652 ctxt->valid = 0; 7653 return(-1); 7654 } 7655 return(0); 7656} 7657 7658/** 7659 * xmlParseExtParsedEnt: 7660 * @ctxt: an XML parser context 7661 * 7662 * parse a general parsed entity 7663 * An external general parsed entity is well-formed if it matches the 7664 * production labeled extParsedEnt. 7665 * 7666 * [78] extParsedEnt ::= TextDecl? content 7667 * 7668 * Returns 0, -1 in case of error. the parser context is augmented 7669 * as a result of the parsing. 7670 */ 7671 7672int 7673xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7674 xmlChar start[4]; 7675 xmlCharEncoding enc; 7676 7677 xmlDefaultSAXHandlerInit(); 7678 7679 GROW; 7680 7681 /* 7682 * SAX: beginning of the document processing. 7683 */ 7684 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7685 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7686 7687 /* 7688 * Get the 4 first bytes and decode the charset 7689 * if enc != XML_CHAR_ENCODING_NONE 7690 * plug some encoding conversion routines. 7691 */ 7692 start[0] = RAW; 7693 start[1] = NXT(1); 7694 start[2] = NXT(2); 7695 start[3] = NXT(3); 7696 enc = xmlDetectCharEncoding(start, 4); 7697 if (enc != XML_CHAR_ENCODING_NONE) { 7698 xmlSwitchEncoding(ctxt, enc); 7699 } 7700 7701 7702 if (CUR == 0) { 7703 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7705 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7706 ctxt->wellFormed = 0; 7707 ctxt->disableSAX = 1; 7708 } 7709 7710 /* 7711 * Check for the XMLDecl in the Prolog. 7712 */ 7713 GROW; 7714 if ((RAW == '<') && (NXT(1) == '?') && 7715 (NXT(2) == 'x') && (NXT(3) == 'm') && 7716 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7717 7718 /* 7719 * Note that we will switch encoding on the fly. 7720 */ 7721 xmlParseXMLDecl(ctxt); 7722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7723 /* 7724 * The XML REC instructs us to stop parsing right here 7725 */ 7726 return(-1); 7727 } 7728 SKIP_BLANKS; 7729 } else { 7730 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7731 } 7732 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7733 ctxt->sax->startDocument(ctxt->userData); 7734 7735 /* 7736 * Doing validity checking on chunk doesn't make sense 7737 */ 7738 ctxt->instate = XML_PARSER_CONTENT; 7739 ctxt->validate = 0; 7740 ctxt->loadsubset = 0; 7741 ctxt->depth = 0; 7742 7743 xmlParseContent(ctxt); 7744 7745 if ((RAW == '<') && (NXT(1) == '/')) { 7746 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7748 ctxt->sax->error(ctxt->userData, 7749 "chunk is not well balanced\n"); 7750 ctxt->wellFormed = 0; 7751 ctxt->disableSAX = 1; 7752 } else if (RAW != 0) { 7753 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7755 ctxt->sax->error(ctxt->userData, 7756 "extra content at the end of well balanced chunk\n"); 7757 ctxt->wellFormed = 0; 7758 ctxt->disableSAX = 1; 7759 } 7760 7761 /* 7762 * SAX: end of the document processing. 7763 */ 7764 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7765 (!ctxt->disableSAX)) 7766 ctxt->sax->endDocument(ctxt->userData); 7767 7768 if (! ctxt->wellFormed) return(-1); 7769 return(0); 7770} 7771 7772/************************************************************************ 7773 * * 7774 * Progressive parsing interfaces * 7775 * * 7776 ************************************************************************/ 7777 7778/** 7779 * xmlParseLookupSequence: 7780 * @ctxt: an XML parser context 7781 * @first: the first char to lookup 7782 * @next: the next char to lookup or zero 7783 * @third: the next char to lookup or zero 7784 * 7785 * Try to find if a sequence (first, next, third) or just (first next) or 7786 * (first) is available in the input stream. 7787 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7788 * to avoid rescanning sequences of bytes, it DOES change the state of the 7789 * parser, do not use liberally. 7790 * 7791 * Returns the index to the current parsing point if the full sequence 7792 * is available, -1 otherwise. 7793 */ 7794static int 7795xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7796 xmlChar next, xmlChar third) { 7797 int base, len; 7798 xmlParserInputPtr in; 7799 const xmlChar *buf; 7800 7801 in = ctxt->input; 7802 if (in == NULL) return(-1); 7803 base = in->cur - in->base; 7804 if (base < 0) return(-1); 7805 if (ctxt->checkIndex > base) 7806 base = ctxt->checkIndex; 7807 if (in->buf == NULL) { 7808 buf = in->base; 7809 len = in->length; 7810 } else { 7811 buf = in->buf->buffer->content; 7812 len = in->buf->buffer->use; 7813 } 7814 /* take into account the sequence length */ 7815 if (third) len -= 2; 7816 else if (next) len --; 7817 for (;base < len;base++) { 7818 if (buf[base] == first) { 7819 if (third != 0) { 7820 if ((buf[base + 1] != next) || 7821 (buf[base + 2] != third)) continue; 7822 } else if (next != 0) { 7823 if (buf[base + 1] != next) continue; 7824 } 7825 ctxt->checkIndex = 0; 7826#ifdef DEBUG_PUSH 7827 if (next == 0) 7828 xmlGenericError(xmlGenericErrorContext, 7829 "PP: lookup '%c' found at %d\n", 7830 first, base); 7831 else if (third == 0) 7832 xmlGenericError(xmlGenericErrorContext, 7833 "PP: lookup '%c%c' found at %d\n", 7834 first, next, base); 7835 else 7836 xmlGenericError(xmlGenericErrorContext, 7837 "PP: lookup '%c%c%c' found at %d\n", 7838 first, next, third, base); 7839#endif 7840 return(base - (in->cur - in->base)); 7841 } 7842 } 7843 ctxt->checkIndex = base; 7844#ifdef DEBUG_PUSH 7845 if (next == 0) 7846 xmlGenericError(xmlGenericErrorContext, 7847 "PP: lookup '%c' failed\n", first); 7848 else if (third == 0) 7849 xmlGenericError(xmlGenericErrorContext, 7850 "PP: lookup '%c%c' failed\n", first, next); 7851 else 7852 xmlGenericError(xmlGenericErrorContext, 7853 "PP: lookup '%c%c%c' failed\n", first, next, third); 7854#endif 7855 return(-1); 7856} 7857 7858/** 7859 * xmlParseTryOrFinish: 7860 * @ctxt: an XML parser context 7861 * @terminate: last chunk indicator 7862 * 7863 * Try to progress on parsing 7864 * 7865 * Returns zero if no parsing was possible 7866 */ 7867static int 7868xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7869 int ret = 0; 7870 int avail; 7871 xmlChar cur, next; 7872 7873#ifdef DEBUG_PUSH 7874 switch (ctxt->instate) { 7875 case XML_PARSER_EOF: 7876 xmlGenericError(xmlGenericErrorContext, 7877 "PP: try EOF\n"); break; 7878 case XML_PARSER_START: 7879 xmlGenericError(xmlGenericErrorContext, 7880 "PP: try START\n"); break; 7881 case XML_PARSER_MISC: 7882 xmlGenericError(xmlGenericErrorContext, 7883 "PP: try MISC\n");break; 7884 case XML_PARSER_COMMENT: 7885 xmlGenericError(xmlGenericErrorContext, 7886 "PP: try COMMENT\n");break; 7887 case XML_PARSER_PROLOG: 7888 xmlGenericError(xmlGenericErrorContext, 7889 "PP: try PROLOG\n");break; 7890 case XML_PARSER_START_TAG: 7891 xmlGenericError(xmlGenericErrorContext, 7892 "PP: try START_TAG\n");break; 7893 case XML_PARSER_CONTENT: 7894 xmlGenericError(xmlGenericErrorContext, 7895 "PP: try CONTENT\n");break; 7896 case XML_PARSER_CDATA_SECTION: 7897 xmlGenericError(xmlGenericErrorContext, 7898 "PP: try CDATA_SECTION\n");break; 7899 case XML_PARSER_END_TAG: 7900 xmlGenericError(xmlGenericErrorContext, 7901 "PP: try END_TAG\n");break; 7902 case XML_PARSER_ENTITY_DECL: 7903 xmlGenericError(xmlGenericErrorContext, 7904 "PP: try ENTITY_DECL\n");break; 7905 case XML_PARSER_ENTITY_VALUE: 7906 xmlGenericError(xmlGenericErrorContext, 7907 "PP: try ENTITY_VALUE\n");break; 7908 case XML_PARSER_ATTRIBUTE_VALUE: 7909 xmlGenericError(xmlGenericErrorContext, 7910 "PP: try ATTRIBUTE_VALUE\n");break; 7911 case XML_PARSER_DTD: 7912 xmlGenericError(xmlGenericErrorContext, 7913 "PP: try DTD\n");break; 7914 case XML_PARSER_EPILOG: 7915 xmlGenericError(xmlGenericErrorContext, 7916 "PP: try EPILOG\n");break; 7917 case XML_PARSER_PI: 7918 xmlGenericError(xmlGenericErrorContext, 7919 "PP: try PI\n");break; 7920 case XML_PARSER_IGNORE: 7921 xmlGenericError(xmlGenericErrorContext, 7922 "PP: try IGNORE\n");break; 7923 } 7924#endif 7925 7926 while (1) { 7927 /* 7928 * Pop-up of finished entities. 7929 */ 7930 while ((RAW == 0) && (ctxt->inputNr > 1)) 7931 xmlPopInput(ctxt); 7932 7933 if (ctxt->input ==NULL) break; 7934 if (ctxt->input->buf == NULL) 7935 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7936 else 7937 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7938 if (avail < 1) 7939 goto done; 7940 switch (ctxt->instate) { 7941 case XML_PARSER_EOF: 7942 /* 7943 * Document parsing is done ! 7944 */ 7945 goto done; 7946 case XML_PARSER_START: 7947 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 7948 xmlChar start[4]; 7949 xmlCharEncoding enc; 7950 7951 /* 7952 * Very first chars read from the document flow. 7953 */ 7954 if (avail < 4) 7955 goto done; 7956 7957 /* 7958 * Get the 4 first bytes and decode the charset 7959 * if enc != XML_CHAR_ENCODING_NONE 7960 * plug some encoding conversion routines. 7961 */ 7962 start[0] = RAW; 7963 start[1] = NXT(1); 7964 start[2] = NXT(2); 7965 start[3] = NXT(3); 7966 enc = xmlDetectCharEncoding(start, 4); 7967 if (enc != XML_CHAR_ENCODING_NONE) { 7968 xmlSwitchEncoding(ctxt, enc); 7969 } 7970 break; 7971 } 7972 7973 cur = ctxt->input->cur[0]; 7974 next = ctxt->input->cur[1]; 7975 if (cur == 0) { 7976 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7977 ctxt->sax->setDocumentLocator(ctxt->userData, 7978 &xmlDefaultSAXLocator); 7979 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7981 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7982 ctxt->wellFormed = 0; 7983 ctxt->disableSAX = 1; 7984 ctxt->instate = XML_PARSER_EOF; 7985#ifdef DEBUG_PUSH 7986 xmlGenericError(xmlGenericErrorContext, 7987 "PP: entering EOF\n"); 7988#endif 7989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7990 ctxt->sax->endDocument(ctxt->userData); 7991 goto done; 7992 } 7993 if ((cur == '<') && (next == '?')) { 7994 /* PI or XML decl */ 7995 if (avail < 5) return(ret); 7996 if ((!terminate) && 7997 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7998 return(ret); 7999 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8000 ctxt->sax->setDocumentLocator(ctxt->userData, 8001 &xmlDefaultSAXLocator); 8002 if ((ctxt->input->cur[2] == 'x') && 8003 (ctxt->input->cur[3] == 'm') && 8004 (ctxt->input->cur[4] == 'l') && 8005 (IS_BLANK(ctxt->input->cur[5]))) { 8006 ret += 5; 8007#ifdef DEBUG_PUSH 8008 xmlGenericError(xmlGenericErrorContext, 8009 "PP: Parsing XML Decl\n"); 8010#endif 8011 xmlParseXMLDecl(ctxt); 8012 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8013 /* 8014 * The XML REC instructs us to stop parsing right 8015 * here 8016 */ 8017 ctxt->instate = XML_PARSER_EOF; 8018 return(0); 8019 } 8020 ctxt->standalone = ctxt->input->standalone; 8021 if ((ctxt->encoding == NULL) && 8022 (ctxt->input->encoding != NULL)) 8023 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 8024 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8025 (!ctxt->disableSAX)) 8026 ctxt->sax->startDocument(ctxt->userData); 8027 ctxt->instate = XML_PARSER_MISC; 8028#ifdef DEBUG_PUSH 8029 xmlGenericError(xmlGenericErrorContext, 8030 "PP: entering MISC\n"); 8031#endif 8032 } else { 8033 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8034 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8035 (!ctxt->disableSAX)) 8036 ctxt->sax->startDocument(ctxt->userData); 8037 ctxt->instate = XML_PARSER_MISC; 8038#ifdef DEBUG_PUSH 8039 xmlGenericError(xmlGenericErrorContext, 8040 "PP: entering MISC\n"); 8041#endif 8042 } 8043 } else { 8044 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8045 ctxt->sax->setDocumentLocator(ctxt->userData, 8046 &xmlDefaultSAXLocator); 8047 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8048 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8049 (!ctxt->disableSAX)) 8050 ctxt->sax->startDocument(ctxt->userData); 8051 ctxt->instate = XML_PARSER_MISC; 8052#ifdef DEBUG_PUSH 8053 xmlGenericError(xmlGenericErrorContext, 8054 "PP: entering MISC\n"); 8055#endif 8056 } 8057 break; 8058 case XML_PARSER_MISC: 8059 SKIP_BLANKS; 8060 if (ctxt->input->buf == NULL) 8061 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8062 else 8063 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8064 if (avail < 2) 8065 goto done; 8066 cur = ctxt->input->cur[0]; 8067 next = ctxt->input->cur[1]; 8068 if ((cur == '<') && (next == '?')) { 8069 if ((!terminate) && 8070 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8071 goto done; 8072#ifdef DEBUG_PUSH 8073 xmlGenericError(xmlGenericErrorContext, 8074 "PP: Parsing PI\n"); 8075#endif 8076 xmlParsePI(ctxt); 8077 } else if ((cur == '<') && (next == '!') && 8078 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8079 if ((!terminate) && 8080 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8081 goto done; 8082#ifdef DEBUG_PUSH 8083 xmlGenericError(xmlGenericErrorContext, 8084 "PP: Parsing Comment\n"); 8085#endif 8086 xmlParseComment(ctxt); 8087 ctxt->instate = XML_PARSER_MISC; 8088 } else if ((cur == '<') && (next == '!') && 8089 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 8090 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 8091 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 8092 (ctxt->input->cur[8] == 'E')) { 8093 if ((!terminate) && 8094 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8095 goto done; 8096#ifdef DEBUG_PUSH 8097 xmlGenericError(xmlGenericErrorContext, 8098 "PP: Parsing internal subset\n"); 8099#endif 8100 ctxt->inSubset = 1; 8101 xmlParseDocTypeDecl(ctxt); 8102 if (RAW == '[') { 8103 ctxt->instate = XML_PARSER_DTD; 8104#ifdef DEBUG_PUSH 8105 xmlGenericError(xmlGenericErrorContext, 8106 "PP: entering DTD\n"); 8107#endif 8108 } else { 8109 /* 8110 * Create and update the external subset. 8111 */ 8112 ctxt->inSubset = 2; 8113 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8114 (ctxt->sax->externalSubset != NULL)) 8115 ctxt->sax->externalSubset(ctxt->userData, 8116 ctxt->intSubName, ctxt->extSubSystem, 8117 ctxt->extSubURI); 8118 ctxt->inSubset = 0; 8119 ctxt->instate = XML_PARSER_PROLOG; 8120#ifdef DEBUG_PUSH 8121 xmlGenericError(xmlGenericErrorContext, 8122 "PP: entering PROLOG\n"); 8123#endif 8124 } 8125 } else if ((cur == '<') && (next == '!') && 8126 (avail < 9)) { 8127 goto done; 8128 } else { 8129 ctxt->instate = XML_PARSER_START_TAG; 8130#ifdef DEBUG_PUSH 8131 xmlGenericError(xmlGenericErrorContext, 8132 "PP: entering START_TAG\n"); 8133#endif 8134 } 8135 break; 8136 case XML_PARSER_IGNORE: 8137 xmlGenericError(xmlGenericErrorContext, 8138 "PP: internal error, state == IGNORE"); 8139 ctxt->instate = XML_PARSER_DTD; 8140#ifdef DEBUG_PUSH 8141 xmlGenericError(xmlGenericErrorContext, 8142 "PP: entering DTD\n"); 8143#endif 8144 break; 8145 case XML_PARSER_PROLOG: 8146 SKIP_BLANKS; 8147 if (ctxt->input->buf == NULL) 8148 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8149 else 8150 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8151 if (avail < 2) 8152 goto done; 8153 cur = ctxt->input->cur[0]; 8154 next = ctxt->input->cur[1]; 8155 if ((cur == '<') && (next == '?')) { 8156 if ((!terminate) && 8157 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8158 goto done; 8159#ifdef DEBUG_PUSH 8160 xmlGenericError(xmlGenericErrorContext, 8161 "PP: Parsing PI\n"); 8162#endif 8163 xmlParsePI(ctxt); 8164 } else if ((cur == '<') && (next == '!') && 8165 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8166 if ((!terminate) && 8167 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8168 goto done; 8169#ifdef DEBUG_PUSH 8170 xmlGenericError(xmlGenericErrorContext, 8171 "PP: Parsing Comment\n"); 8172#endif 8173 xmlParseComment(ctxt); 8174 ctxt->instate = XML_PARSER_PROLOG; 8175 } else if ((cur == '<') && (next == '!') && 8176 (avail < 4)) { 8177 goto done; 8178 } else { 8179 ctxt->instate = XML_PARSER_START_TAG; 8180#ifdef DEBUG_PUSH 8181 xmlGenericError(xmlGenericErrorContext, 8182 "PP: entering START_TAG\n"); 8183#endif 8184 } 8185 break; 8186 case XML_PARSER_EPILOG: 8187 SKIP_BLANKS; 8188 if (ctxt->input->buf == NULL) 8189 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8190 else 8191 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8192 if (avail < 2) 8193 goto done; 8194 cur = ctxt->input->cur[0]; 8195 next = ctxt->input->cur[1]; 8196 if ((cur == '<') && (next == '?')) { 8197 if ((!terminate) && 8198 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8199 goto done; 8200#ifdef DEBUG_PUSH 8201 xmlGenericError(xmlGenericErrorContext, 8202 "PP: Parsing PI\n"); 8203#endif 8204 xmlParsePI(ctxt); 8205 ctxt->instate = XML_PARSER_EPILOG; 8206 } else if ((cur == '<') && (next == '!') && 8207 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8208 if ((!terminate) && 8209 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8210 goto done; 8211#ifdef DEBUG_PUSH 8212 xmlGenericError(xmlGenericErrorContext, 8213 "PP: Parsing Comment\n"); 8214#endif 8215 xmlParseComment(ctxt); 8216 ctxt->instate = XML_PARSER_EPILOG; 8217 } else if ((cur == '<') && (next == '!') && 8218 (avail < 4)) { 8219 goto done; 8220 } else { 8221 ctxt->errNo = XML_ERR_DOCUMENT_END; 8222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8223 ctxt->sax->error(ctxt->userData, 8224 "Extra content at the end of the document\n"); 8225 ctxt->wellFormed = 0; 8226 ctxt->disableSAX = 1; 8227 ctxt->instate = XML_PARSER_EOF; 8228#ifdef DEBUG_PUSH 8229 xmlGenericError(xmlGenericErrorContext, 8230 "PP: entering EOF\n"); 8231#endif 8232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8233 (!ctxt->disableSAX)) 8234 ctxt->sax->endDocument(ctxt->userData); 8235 goto done; 8236 } 8237 break; 8238 case XML_PARSER_START_TAG: { 8239 xmlChar *name, *oldname; 8240 8241 if ((avail < 2) && (ctxt->inputNr == 1)) 8242 goto done; 8243 cur = ctxt->input->cur[0]; 8244 if (cur != '<') { 8245 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8247 ctxt->sax->error(ctxt->userData, 8248 "Start tag expect, '<' not found\n"); 8249 ctxt->wellFormed = 0; 8250 ctxt->disableSAX = 1; 8251 ctxt->instate = XML_PARSER_EOF; 8252#ifdef DEBUG_PUSH 8253 xmlGenericError(xmlGenericErrorContext, 8254 "PP: entering EOF\n"); 8255#endif 8256 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8257 (!ctxt->disableSAX)) 8258 ctxt->sax->endDocument(ctxt->userData); 8259 goto done; 8260 } 8261 if ((!terminate) && 8262 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8263 goto done; 8264 if (ctxt->spaceNr == 0) 8265 spacePush(ctxt, -1); 8266 else 8267 spacePush(ctxt, *ctxt->space); 8268 name = xmlParseStartTag(ctxt); 8269 if (name == NULL) { 8270 spacePop(ctxt); 8271 ctxt->instate = XML_PARSER_EOF; 8272#ifdef DEBUG_PUSH 8273 xmlGenericError(xmlGenericErrorContext, 8274 "PP: entering EOF\n"); 8275#endif 8276 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8277 (!ctxt->disableSAX)) 8278 ctxt->sax->endDocument(ctxt->userData); 8279 goto done; 8280 } 8281 namePush(ctxt, xmlStrdup(name)); 8282 8283 /* 8284 * [ VC: Root Element Type ] 8285 * The Name in the document type declaration must match 8286 * the element type of the root element. 8287 */ 8288 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8289 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8290 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8291 8292 /* 8293 * Check for an Empty Element. 8294 */ 8295 if ((RAW == '/') && (NXT(1) == '>')) { 8296 SKIP(2); 8297 if ((ctxt->sax != NULL) && 8298 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 8299 ctxt->sax->endElement(ctxt->userData, name); 8300 xmlFree(name); 8301 oldname = namePop(ctxt); 8302 spacePop(ctxt); 8303 if (oldname != NULL) { 8304#ifdef DEBUG_STACK 8305 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8306#endif 8307 xmlFree(oldname); 8308 } 8309 if (ctxt->name == NULL) { 8310 ctxt->instate = XML_PARSER_EPILOG; 8311#ifdef DEBUG_PUSH 8312 xmlGenericError(xmlGenericErrorContext, 8313 "PP: entering EPILOG\n"); 8314#endif 8315 } else { 8316 ctxt->instate = XML_PARSER_CONTENT; 8317#ifdef DEBUG_PUSH 8318 xmlGenericError(xmlGenericErrorContext, 8319 "PP: entering CONTENT\n"); 8320#endif 8321 } 8322 break; 8323 } 8324 if (RAW == '>') { 8325 NEXT; 8326 } else { 8327 ctxt->errNo = XML_ERR_GT_REQUIRED; 8328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8329 ctxt->sax->error(ctxt->userData, 8330 "Couldn't find end of Start Tag %s\n", 8331 name); 8332 ctxt->wellFormed = 0; 8333 ctxt->disableSAX = 1; 8334 8335 /* 8336 * end of parsing of this node. 8337 */ 8338 nodePop(ctxt); 8339 oldname = namePop(ctxt); 8340 spacePop(ctxt); 8341 if (oldname != NULL) { 8342#ifdef DEBUG_STACK 8343 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8344#endif 8345 xmlFree(oldname); 8346 } 8347 } 8348 xmlFree(name); 8349 ctxt->instate = XML_PARSER_CONTENT; 8350#ifdef DEBUG_PUSH 8351 xmlGenericError(xmlGenericErrorContext, 8352 "PP: entering CONTENT\n"); 8353#endif 8354 break; 8355 } 8356 case XML_PARSER_CONTENT: { 8357 const xmlChar *test; 8358 int cons; 8359 int tok; 8360 8361 /* 8362 * Handle preparsed entities and charRef 8363 */ 8364 if (ctxt->token != 0) { 8365 xmlChar current[2] = { 0 , 0 } ; 8366 8367 current[0] = (xmlChar) ctxt->token; 8368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8369 (ctxt->sax->characters != NULL)) 8370 ctxt->sax->characters(ctxt->userData, current, 1); 8371 ctxt->token = 0; 8372 } 8373 if ((avail < 2) && (ctxt->inputNr == 1)) 8374 goto done; 8375 cur = ctxt->input->cur[0]; 8376 next = ctxt->input->cur[1]; 8377 8378 test = CUR_PTR; 8379 cons = ctxt->input->consumed; 8380 tok = ctxt->token; 8381 if ((cur == '<') && (next == '?')) { 8382 if ((!terminate) && 8383 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8384 goto done; 8385#ifdef DEBUG_PUSH 8386 xmlGenericError(xmlGenericErrorContext, 8387 "PP: Parsing PI\n"); 8388#endif 8389 xmlParsePI(ctxt); 8390 } else if ((cur == '<') && (next == '!') && 8391 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8392 if ((!terminate) && 8393 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8394 goto done; 8395#ifdef DEBUG_PUSH 8396 xmlGenericError(xmlGenericErrorContext, 8397 "PP: Parsing Comment\n"); 8398#endif 8399 xmlParseComment(ctxt); 8400 ctxt->instate = XML_PARSER_CONTENT; 8401 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8402 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8403 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8404 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8405 (ctxt->input->cur[8] == '[')) { 8406 SKIP(9); 8407 ctxt->instate = XML_PARSER_CDATA_SECTION; 8408#ifdef DEBUG_PUSH 8409 xmlGenericError(xmlGenericErrorContext, 8410 "PP: entering CDATA_SECTION\n"); 8411#endif 8412 break; 8413 } else if ((cur == '<') && (next == '!') && 8414 (avail < 9)) { 8415 goto done; 8416 } else if ((cur == '<') && (next == '/')) { 8417 ctxt->instate = XML_PARSER_END_TAG; 8418#ifdef DEBUG_PUSH 8419 xmlGenericError(xmlGenericErrorContext, 8420 "PP: entering END_TAG\n"); 8421#endif 8422 break; 8423 } else if (cur == '<') { 8424 ctxt->instate = XML_PARSER_START_TAG; 8425#ifdef DEBUG_PUSH 8426 xmlGenericError(xmlGenericErrorContext, 8427 "PP: entering START_TAG\n"); 8428#endif 8429 break; 8430 } else if (cur == '&') { 8431 if ((!terminate) && 8432 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8433 goto done; 8434#ifdef DEBUG_PUSH 8435 xmlGenericError(xmlGenericErrorContext, 8436 "PP: Parsing Reference\n"); 8437#endif 8438 xmlParseReference(ctxt); 8439 } else { 8440 /* TODO Avoid the extra copy, handle directly !!! */ 8441 /* 8442 * Goal of the following test is: 8443 * - minimize calls to the SAX 'character' callback 8444 * when they are mergeable 8445 * - handle an problem for isBlank when we only parse 8446 * a sequence of blank chars and the next one is 8447 * not available to check against '<' presence. 8448 * - tries to homogenize the differences in SAX 8449 * callbacks between the push and pull versions 8450 * of the parser. 8451 */ 8452 if ((ctxt->inputNr == 1) && 8453 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8454 if ((!terminate) && 8455 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8456 goto done; 8457 } 8458 ctxt->checkIndex = 0; 8459#ifdef DEBUG_PUSH 8460 xmlGenericError(xmlGenericErrorContext, 8461 "PP: Parsing char data\n"); 8462#endif 8463 xmlParseCharData(ctxt, 0); 8464 } 8465 /* 8466 * Pop-up of finished entities. 8467 */ 8468 while ((RAW == 0) && (ctxt->inputNr > 1)) 8469 xmlPopInput(ctxt); 8470 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 8471 (tok == ctxt->token)) { 8472 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8474 ctxt->sax->error(ctxt->userData, 8475 "detected an error in element content\n"); 8476 ctxt->wellFormed = 0; 8477 ctxt->disableSAX = 1; 8478 ctxt->instate = XML_PARSER_EOF; 8479 break; 8480 } 8481 break; 8482 } 8483 case XML_PARSER_CDATA_SECTION: { 8484 /* 8485 * The Push mode need to have the SAX callback for 8486 * cdataBlock merge back contiguous callbacks. 8487 */ 8488 int base; 8489 8490 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8491 if (base < 0) { 8492 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8493 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8494 if (ctxt->sax->cdataBlock != NULL) 8495 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8496 XML_PARSER_BIG_BUFFER_SIZE); 8497 } 8498 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8499 ctxt->checkIndex = 0; 8500 } 8501 goto done; 8502 } else { 8503 if ((ctxt->sax != NULL) && (base > 0) && 8504 (!ctxt->disableSAX)) { 8505 if (ctxt->sax->cdataBlock != NULL) 8506 ctxt->sax->cdataBlock(ctxt->userData, 8507 ctxt->input->cur, base); 8508 } 8509 SKIP(base + 3); 8510 ctxt->checkIndex = 0; 8511 ctxt->instate = XML_PARSER_CONTENT; 8512#ifdef DEBUG_PUSH 8513 xmlGenericError(xmlGenericErrorContext, 8514 "PP: entering CONTENT\n"); 8515#endif 8516 } 8517 break; 8518 } 8519 case XML_PARSER_END_TAG: 8520 if (avail < 2) 8521 goto done; 8522 if ((!terminate) && 8523 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8524 goto done; 8525 xmlParseEndTag(ctxt); 8526 if (ctxt->name == NULL) { 8527 ctxt->instate = XML_PARSER_EPILOG; 8528#ifdef DEBUG_PUSH 8529 xmlGenericError(xmlGenericErrorContext, 8530 "PP: entering EPILOG\n"); 8531#endif 8532 } else { 8533 ctxt->instate = XML_PARSER_CONTENT; 8534#ifdef DEBUG_PUSH 8535 xmlGenericError(xmlGenericErrorContext, 8536 "PP: entering CONTENT\n"); 8537#endif 8538 } 8539 break; 8540 case XML_PARSER_DTD: { 8541 /* 8542 * Sorry but progressive parsing of the internal subset 8543 * is not expected to be supported. We first check that 8544 * the full content of the internal subset is available and 8545 * the parsing is launched only at that point. 8546 * Internal subset ends up with "']' S? '>'" in an unescaped 8547 * section and not in a ']]>' sequence which are conditional 8548 * sections (whoever argued to keep that crap in XML deserve 8549 * a place in hell !). 8550 */ 8551 int base, i; 8552 xmlChar *buf; 8553 xmlChar quote = 0; 8554 8555 base = ctxt->input->cur - ctxt->input->base; 8556 if (base < 0) return(0); 8557 if (ctxt->checkIndex > base) 8558 base = ctxt->checkIndex; 8559 buf = ctxt->input->buf->buffer->content; 8560 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8561 base++) { 8562 if (quote != 0) { 8563 if (buf[base] == quote) 8564 quote = 0; 8565 continue; 8566 } 8567 if (buf[base] == '"') { 8568 quote = '"'; 8569 continue; 8570 } 8571 if (buf[base] == '\'') { 8572 quote = '\''; 8573 continue; 8574 } 8575 if (buf[base] == ']') { 8576 if ((unsigned int) base +1 >= 8577 ctxt->input->buf->buffer->use) 8578 break; 8579 if (buf[base + 1] == ']') { 8580 /* conditional crap, skip both ']' ! */ 8581 base++; 8582 continue; 8583 } 8584 for (i = 0; 8585 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8586 i++) { 8587 if (buf[base + i] == '>') 8588 goto found_end_int_subset; 8589 } 8590 break; 8591 } 8592 } 8593 /* 8594 * We didn't found the end of the Internal subset 8595 */ 8596 if (quote == 0) 8597 ctxt->checkIndex = base; 8598#ifdef DEBUG_PUSH 8599 if (next == 0) 8600 xmlGenericError(xmlGenericErrorContext, 8601 "PP: lookup of int subset end filed\n"); 8602#endif 8603 goto done; 8604 8605found_end_int_subset: 8606 xmlParseInternalSubset(ctxt); 8607 ctxt->inSubset = 2; 8608 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8609 (ctxt->sax->externalSubset != NULL)) 8610 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8611 ctxt->extSubSystem, ctxt->extSubURI); 8612 ctxt->inSubset = 0; 8613 ctxt->instate = XML_PARSER_PROLOG; 8614 ctxt->checkIndex = 0; 8615#ifdef DEBUG_PUSH 8616 xmlGenericError(xmlGenericErrorContext, 8617 "PP: entering PROLOG\n"); 8618#endif 8619 break; 8620 } 8621 case XML_PARSER_COMMENT: 8622 xmlGenericError(xmlGenericErrorContext, 8623 "PP: internal error, state == COMMENT\n"); 8624 ctxt->instate = XML_PARSER_CONTENT; 8625#ifdef DEBUG_PUSH 8626 xmlGenericError(xmlGenericErrorContext, 8627 "PP: entering CONTENT\n"); 8628#endif 8629 break; 8630 case XML_PARSER_PI: 8631 xmlGenericError(xmlGenericErrorContext, 8632 "PP: internal error, state == PI\n"); 8633 ctxt->instate = XML_PARSER_CONTENT; 8634#ifdef DEBUG_PUSH 8635 xmlGenericError(xmlGenericErrorContext, 8636 "PP: entering CONTENT\n"); 8637#endif 8638 break; 8639 case XML_PARSER_ENTITY_DECL: 8640 xmlGenericError(xmlGenericErrorContext, 8641 "PP: internal error, state == ENTITY_DECL\n"); 8642 ctxt->instate = XML_PARSER_DTD; 8643#ifdef DEBUG_PUSH 8644 xmlGenericError(xmlGenericErrorContext, 8645 "PP: entering DTD\n"); 8646#endif 8647 break; 8648 case XML_PARSER_ENTITY_VALUE: 8649 xmlGenericError(xmlGenericErrorContext, 8650 "PP: internal error, state == ENTITY_VALUE\n"); 8651 ctxt->instate = XML_PARSER_CONTENT; 8652#ifdef DEBUG_PUSH 8653 xmlGenericError(xmlGenericErrorContext, 8654 "PP: entering DTD\n"); 8655#endif 8656 break; 8657 case XML_PARSER_ATTRIBUTE_VALUE: 8658 xmlGenericError(xmlGenericErrorContext, 8659 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8660 ctxt->instate = XML_PARSER_START_TAG; 8661#ifdef DEBUG_PUSH 8662 xmlGenericError(xmlGenericErrorContext, 8663 "PP: entering START_TAG\n"); 8664#endif 8665 break; 8666 case XML_PARSER_SYSTEM_LITERAL: 8667 xmlGenericError(xmlGenericErrorContext, 8668 "PP: internal error, state == SYSTEM_LITERAL\n"); 8669 ctxt->instate = XML_PARSER_START_TAG; 8670#ifdef DEBUG_PUSH 8671 xmlGenericError(xmlGenericErrorContext, 8672 "PP: entering START_TAG\n"); 8673#endif 8674 break; 8675 case XML_PARSER_PUBLIC_LITERAL: 8676 xmlGenericError(xmlGenericErrorContext, 8677 "PP: internal error, state == PUBLIC_LITERAL\n"); 8678 ctxt->instate = XML_PARSER_START_TAG; 8679#ifdef DEBUG_PUSH 8680 xmlGenericError(xmlGenericErrorContext, 8681 "PP: entering START_TAG\n"); 8682#endif 8683 break; 8684 } 8685 } 8686done: 8687#ifdef DEBUG_PUSH 8688 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8689#endif 8690 return(ret); 8691} 8692 8693/** 8694 * xmlParseChunk: 8695 * @ctxt: an XML parser context 8696 * @chunk: an char array 8697 * @size: the size in byte of the chunk 8698 * @terminate: last chunk indicator 8699 * 8700 * Parse a Chunk of memory 8701 * 8702 * Returns zero if no error, the xmlParserErrors otherwise. 8703 */ 8704int 8705xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8706 int terminate) { 8707 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8708 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8709 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8710 int cur = ctxt->input->cur - ctxt->input->base; 8711 8712 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8713 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8714 ctxt->input->cur = ctxt->input->base + cur; 8715 ctxt->input->end = 8716 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8717#ifdef DEBUG_PUSH 8718 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8719#endif 8720 8721 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8722 xmlParseTryOrFinish(ctxt, terminate); 8723 } else if (ctxt->instate != XML_PARSER_EOF) { 8724 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8725 xmlParserInputBufferPtr in = ctxt->input->buf; 8726 if ((in->encoder != NULL) && (in->buffer != NULL) && 8727 (in->raw != NULL)) { 8728 int nbchars; 8729 8730 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8731 if (nbchars < 0) { 8732 xmlGenericError(xmlGenericErrorContext, 8733 "xmlParseChunk: encoder error\n"); 8734 return(XML_ERR_INVALID_ENCODING); 8735 } 8736 } 8737 } 8738 } 8739 xmlParseTryOrFinish(ctxt, terminate); 8740 if (terminate) { 8741 /* 8742 * Check for termination 8743 */ 8744 if ((ctxt->instate != XML_PARSER_EOF) && 8745 (ctxt->instate != XML_PARSER_EPILOG)) { 8746 ctxt->errNo = XML_ERR_DOCUMENT_END; 8747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8748 ctxt->sax->error(ctxt->userData, 8749 "Extra content at the end of the document\n"); 8750 ctxt->wellFormed = 0; 8751 ctxt->disableSAX = 1; 8752 } 8753 if (ctxt->instate != XML_PARSER_EOF) { 8754 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8755 (!ctxt->disableSAX)) 8756 ctxt->sax->endDocument(ctxt->userData); 8757 } 8758 ctxt->instate = XML_PARSER_EOF; 8759 } 8760 return((xmlParserErrors) ctxt->errNo); 8761} 8762 8763/************************************************************************ 8764 * * 8765 * I/O front end functions to the parser * 8766 * * 8767 ************************************************************************/ 8768 8769/** 8770 * xmlStopParser: 8771 * @ctxt: an XML parser context 8772 * 8773 * Blocks further parser processing 8774 */ 8775void 8776xmlStopParser(xmlParserCtxtPtr ctxt) { 8777 ctxt->instate = XML_PARSER_EOF; 8778 if (ctxt->input != NULL) 8779 ctxt->input->cur = BAD_CAST""; 8780} 8781 8782/** 8783 * xmlCreatePushParserCtxt: 8784 * @sax: a SAX handler 8785 * @user_data: The user data returned on SAX callbacks 8786 * @chunk: a pointer to an array of chars 8787 * @size: number of chars in the array 8788 * @filename: an optional file name or URI 8789 * 8790 * Create a parser context for using the XML parser in push mode 8791 * To allow content encoding detection, @size should be >= 4 8792 * The value of @filename is used for fetching external entities 8793 * and error/warning reports. 8794 * 8795 * Returns the new parser context or NULL 8796 */ 8797xmlParserCtxtPtr 8798xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8799 const char *chunk, int size, const char *filename) { 8800 xmlParserCtxtPtr ctxt; 8801 xmlParserInputPtr inputStream; 8802 xmlParserInputBufferPtr buf; 8803 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8804 8805 /* 8806 * plug some encoding conversion routines 8807 */ 8808 if ((chunk != NULL) && (size >= 4)) 8809 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8810 8811 buf = xmlAllocParserInputBuffer(enc); 8812 if (buf == NULL) return(NULL); 8813 8814 ctxt = xmlNewParserCtxt(); 8815 if (ctxt == NULL) { 8816 xmlFree(buf); 8817 return(NULL); 8818 } 8819 if (sax != NULL) { 8820 if (ctxt->sax != &xmlDefaultSAXHandler) 8821 xmlFree(ctxt->sax); 8822 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8823 if (ctxt->sax == NULL) { 8824 xmlFree(buf); 8825 xmlFree(ctxt); 8826 return(NULL); 8827 } 8828 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8829 if (user_data != NULL) 8830 ctxt->userData = user_data; 8831 } 8832 if (filename == NULL) { 8833 ctxt->directory = NULL; 8834 } else { 8835 ctxt->directory = xmlParserGetDirectory(filename); 8836 } 8837 8838 inputStream = xmlNewInputStream(ctxt); 8839 if (inputStream == NULL) { 8840 xmlFreeParserCtxt(ctxt); 8841 return(NULL); 8842 } 8843 8844 if (filename == NULL) 8845 inputStream->filename = NULL; 8846 else 8847 inputStream->filename = xmlMemStrdup(filename); 8848 inputStream->buf = buf; 8849 inputStream->base = inputStream->buf->buffer->content; 8850 inputStream->cur = inputStream->buf->buffer->content; 8851 inputStream->end = 8852 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 8853 8854 inputPush(ctxt, inputStream); 8855 8856 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8857 (ctxt->input->buf != NULL)) { 8858 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8859 int cur = ctxt->input->cur - ctxt->input->base; 8860 8861 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8862 8863 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8864 ctxt->input->cur = ctxt->input->base + cur; 8865 ctxt->input->end = 8866 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8867#ifdef DEBUG_PUSH 8868 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8869#endif 8870 } 8871 8872 if (enc != XML_CHAR_ENCODING_NONE) { 8873 xmlSwitchEncoding(ctxt, enc); 8874 } 8875 8876 return(ctxt); 8877} 8878 8879/** 8880 * xmlCreateIOParserCtxt: 8881 * @sax: a SAX handler 8882 * @user_data: The user data returned on SAX callbacks 8883 * @ioread: an I/O read function 8884 * @ioclose: an I/O close function 8885 * @ioctx: an I/O handler 8886 * @enc: the charset encoding if known 8887 * 8888 * Create a parser context for using the XML parser with an existing 8889 * I/O stream 8890 * 8891 * Returns the new parser context or NULL 8892 */ 8893xmlParserCtxtPtr 8894xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8895 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8896 void *ioctx, xmlCharEncoding enc) { 8897 xmlParserCtxtPtr ctxt; 8898 xmlParserInputPtr inputStream; 8899 xmlParserInputBufferPtr buf; 8900 8901 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8902 if (buf == NULL) return(NULL); 8903 8904 ctxt = xmlNewParserCtxt(); 8905 if (ctxt == NULL) { 8906 xmlFree(buf); 8907 return(NULL); 8908 } 8909 if (sax != NULL) { 8910 if (ctxt->sax != &xmlDefaultSAXHandler) 8911 xmlFree(ctxt->sax); 8912 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8913 if (ctxt->sax == NULL) { 8914 xmlFree(buf); 8915 xmlFree(ctxt); 8916 return(NULL); 8917 } 8918 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8919 if (user_data != NULL) 8920 ctxt->userData = user_data; 8921 } 8922 8923 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8924 if (inputStream == NULL) { 8925 xmlFreeParserCtxt(ctxt); 8926 return(NULL); 8927 } 8928 inputPush(ctxt, inputStream); 8929 8930 return(ctxt); 8931} 8932 8933/************************************************************************ 8934 * * 8935 * Front ends when parsing a DTD * 8936 * * 8937 ************************************************************************/ 8938 8939/** 8940 * xmlIOParseDTD: 8941 * @sax: the SAX handler block or NULL 8942 * @input: an Input Buffer 8943 * @enc: the charset encoding if known 8944 * 8945 * Load and parse a DTD 8946 * 8947 * Returns the resulting xmlDtdPtr or NULL in case of error. 8948 * @input will be freed at parsing end. 8949 */ 8950 8951xmlDtdPtr 8952xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 8953 xmlCharEncoding enc) { 8954 xmlDtdPtr ret = NULL; 8955 xmlParserCtxtPtr ctxt; 8956 xmlParserInputPtr pinput = NULL; 8957 xmlChar start[4]; 8958 8959 if (input == NULL) 8960 return(NULL); 8961 8962 ctxt = xmlNewParserCtxt(); 8963 if (ctxt == NULL) { 8964 return(NULL); 8965 } 8966 8967 /* 8968 * Set-up the SAX context 8969 */ 8970 if (sax != NULL) { 8971 if (ctxt->sax != NULL) 8972 xmlFree(ctxt->sax); 8973 ctxt->sax = sax; 8974 ctxt->userData = NULL; 8975 } 8976 8977 /* 8978 * generate a parser input from the I/O handler 8979 */ 8980 8981 pinput = xmlNewIOInputStream(ctxt, input, enc); 8982 if (pinput == NULL) { 8983 if (sax != NULL) ctxt->sax = NULL; 8984 xmlFreeParserCtxt(ctxt); 8985 return(NULL); 8986 } 8987 8988 /* 8989 * plug some encoding conversion routines here. 8990 */ 8991 xmlPushInput(ctxt, pinput); 8992 8993 pinput->filename = NULL; 8994 pinput->line = 1; 8995 pinput->col = 1; 8996 pinput->base = ctxt->input->cur; 8997 pinput->cur = ctxt->input->cur; 8998 pinput->free = NULL; 8999 9000 /* 9001 * let's parse that entity knowing it's an external subset. 9002 */ 9003 ctxt->inSubset = 2; 9004 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9005 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9006 BAD_CAST "none", BAD_CAST "none"); 9007 9008 if (enc == XML_CHAR_ENCODING_NONE) { 9009 /* 9010 * Get the 4 first bytes and decode the charset 9011 * if enc != XML_CHAR_ENCODING_NONE 9012 * plug some encoding conversion routines. 9013 */ 9014 start[0] = RAW; 9015 start[1] = NXT(1); 9016 start[2] = NXT(2); 9017 start[3] = NXT(3); 9018 enc = xmlDetectCharEncoding(start, 4); 9019 if (enc != XML_CHAR_ENCODING_NONE) { 9020 xmlSwitchEncoding(ctxt, enc); 9021 } 9022 } 9023 9024 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 9025 9026 if (ctxt->myDoc != NULL) { 9027 if (ctxt->wellFormed) { 9028 ret = ctxt->myDoc->extSubset; 9029 ctxt->myDoc->extSubset = NULL; 9030 } else { 9031 ret = NULL; 9032 } 9033 xmlFreeDoc(ctxt->myDoc); 9034 ctxt->myDoc = NULL; 9035 } 9036 if (sax != NULL) ctxt->sax = NULL; 9037 xmlFreeParserCtxt(ctxt); 9038 9039 return(ret); 9040} 9041 9042/** 9043 * xmlSAXParseDTD: 9044 * @sax: the SAX handler block 9045 * @ExternalID: a NAME* containing the External ID of the DTD 9046 * @SystemID: a NAME* containing the URL to the DTD 9047 * 9048 * Load and parse an external subset. 9049 * 9050 * Returns the resulting xmlDtdPtr or NULL in case of error. 9051 */ 9052 9053xmlDtdPtr 9054xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 9055 const xmlChar *SystemID) { 9056 xmlDtdPtr ret = NULL; 9057 xmlParserCtxtPtr ctxt; 9058 xmlParserInputPtr input = NULL; 9059 xmlCharEncoding enc; 9060 9061 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 9062 9063 ctxt = xmlNewParserCtxt(); 9064 if (ctxt == NULL) { 9065 return(NULL); 9066 } 9067 9068 /* 9069 * Set-up the SAX context 9070 */ 9071 if (sax != NULL) { 9072 if (ctxt->sax != NULL) 9073 xmlFree(ctxt->sax); 9074 ctxt->sax = sax; 9075 ctxt->userData = NULL; 9076 } 9077 9078 /* 9079 * Ask the Entity resolver to load the damn thing 9080 */ 9081 9082 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 9083 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 9084 if (input == NULL) { 9085 if (sax != NULL) ctxt->sax = NULL; 9086 xmlFreeParserCtxt(ctxt); 9087 return(NULL); 9088 } 9089 9090 /* 9091 * plug some encoding conversion routines here. 9092 */ 9093 xmlPushInput(ctxt, input); 9094 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 9095 xmlSwitchEncoding(ctxt, enc); 9096 9097 if (input->filename == NULL) 9098 input->filename = (char *) xmlStrdup(SystemID); 9099 input->line = 1; 9100 input->col = 1; 9101 input->base = ctxt->input->cur; 9102 input->cur = ctxt->input->cur; 9103 input->free = NULL; 9104 9105 /* 9106 * let's parse that entity knowing it's an external subset. 9107 */ 9108 ctxt->inSubset = 2; 9109 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9110 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9111 ExternalID, SystemID); 9112 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 9113 9114 if (ctxt->myDoc != NULL) { 9115 if (ctxt->wellFormed) { 9116 ret = ctxt->myDoc->extSubset; 9117 ctxt->myDoc->extSubset = NULL; 9118 } else { 9119 ret = NULL; 9120 } 9121 xmlFreeDoc(ctxt->myDoc); 9122 ctxt->myDoc = NULL; 9123 } 9124 if (sax != NULL) ctxt->sax = NULL; 9125 xmlFreeParserCtxt(ctxt); 9126 9127 return(ret); 9128} 9129 9130/** 9131 * xmlParseDTD: 9132 * @ExternalID: a NAME* containing the External ID of the DTD 9133 * @SystemID: a NAME* containing the URL to the DTD 9134 * 9135 * Load and parse an external subset. 9136 * 9137 * Returns the resulting xmlDtdPtr or NULL in case of error. 9138 */ 9139 9140xmlDtdPtr 9141xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 9142 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 9143} 9144 9145/************************************************************************ 9146 * * 9147 * Front ends when parsing an Entity * 9148 * * 9149 ************************************************************************/ 9150 9151/** 9152 * xmlParseCtxtExternalEntity: 9153 * @ctx: the existing parsing context 9154 * @URL: the URL for the entity to load 9155 * @ID: the System ID for the entity to load 9156 * @lst: the return value for the set of parsed nodes 9157 * 9158 * Parse an external general entity within an existing parsing context 9159 * An external general parsed entity is well-formed if it matches the 9160 * production labeled extParsedEnt. 9161 * 9162 * [78] extParsedEnt ::= TextDecl? content 9163 * 9164 * Returns 0 if the entity is well formed, -1 in case of args problem and 9165 * the parser error code otherwise 9166 */ 9167 9168int 9169xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 9170 const xmlChar *ID, xmlNodePtr *lst) { 9171 xmlParserCtxtPtr ctxt; 9172 xmlDocPtr newDoc; 9173 xmlSAXHandlerPtr oldsax = NULL; 9174 int ret = 0; 9175 xmlChar start[4]; 9176 xmlCharEncoding enc; 9177 9178 if (ctx->depth > 40) { 9179 return(XML_ERR_ENTITY_LOOP); 9180 } 9181 9182 if (lst != NULL) 9183 *lst = NULL; 9184 if ((URL == NULL) && (ID == NULL)) 9185 return(-1); 9186 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 9187 return(-1); 9188 9189 9190 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9191 if (ctxt == NULL) return(-1); 9192 ctxt->userData = ctxt; 9193 oldsax = ctxt->sax; 9194 ctxt->sax = ctx->sax; 9195 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9196 if (newDoc == NULL) { 9197 xmlFreeParserCtxt(ctxt); 9198 return(-1); 9199 } 9200 if (ctx->myDoc != NULL) { 9201 newDoc->intSubset = ctx->myDoc->intSubset; 9202 newDoc->extSubset = ctx->myDoc->extSubset; 9203 } 9204 if (ctx->myDoc->URL != NULL) { 9205 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 9206 } 9207 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9208 if (newDoc->children == NULL) { 9209 ctxt->sax = oldsax; 9210 xmlFreeParserCtxt(ctxt); 9211 newDoc->intSubset = NULL; 9212 newDoc->extSubset = NULL; 9213 xmlFreeDoc(newDoc); 9214 return(-1); 9215 } 9216 nodePush(ctxt, newDoc->children); 9217 if (ctx->myDoc == NULL) { 9218 ctxt->myDoc = newDoc; 9219 } else { 9220 ctxt->myDoc = ctx->myDoc; 9221 newDoc->children->doc = ctx->myDoc; 9222 } 9223 9224 /* 9225 * Get the 4 first bytes and decode the charset 9226 * if enc != XML_CHAR_ENCODING_NONE 9227 * plug some encoding conversion routines. 9228 */ 9229 GROW 9230 start[0] = RAW; 9231 start[1] = NXT(1); 9232 start[2] = NXT(2); 9233 start[3] = NXT(3); 9234 enc = xmlDetectCharEncoding(start, 4); 9235 if (enc != XML_CHAR_ENCODING_NONE) { 9236 xmlSwitchEncoding(ctxt, enc); 9237 } 9238 9239 /* 9240 * Parse a possible text declaration first 9241 */ 9242 if ((RAW == '<') && (NXT(1) == '?') && 9243 (NXT(2) == 'x') && (NXT(3) == 'm') && 9244 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9245 xmlParseTextDecl(ctxt); 9246 } 9247 9248 /* 9249 * Doing validity checking on chunk doesn't make sense 9250 */ 9251 ctxt->instate = XML_PARSER_CONTENT; 9252 ctxt->validate = ctx->validate; 9253 ctxt->loadsubset = ctx->loadsubset; 9254 ctxt->depth = ctx->depth + 1; 9255 ctxt->replaceEntities = ctx->replaceEntities; 9256 if (ctxt->validate) { 9257 ctxt->vctxt.error = ctx->vctxt.error; 9258 ctxt->vctxt.warning = ctx->vctxt.warning; 9259 } else { 9260 ctxt->vctxt.error = NULL; 9261 ctxt->vctxt.warning = NULL; 9262 } 9263 ctxt->vctxt.nodeTab = NULL; 9264 ctxt->vctxt.nodeNr = 0; 9265 ctxt->vctxt.nodeMax = 0; 9266 ctxt->vctxt.node = NULL; 9267 9268 xmlParseContent(ctxt); 9269 9270 if ((RAW == '<') && (NXT(1) == '/')) { 9271 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9273 ctxt->sax->error(ctxt->userData, 9274 "chunk is not well balanced\n"); 9275 ctxt->wellFormed = 0; 9276 ctxt->disableSAX = 1; 9277 } else if (RAW != 0) { 9278 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9280 ctxt->sax->error(ctxt->userData, 9281 "extra content at the end of well balanced chunk\n"); 9282 ctxt->wellFormed = 0; 9283 ctxt->disableSAX = 1; 9284 } 9285 if (ctxt->node != newDoc->children) { 9286 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9288 ctxt->sax->error(ctxt->userData, 9289 "chunk is not well balanced\n"); 9290 ctxt->wellFormed = 0; 9291 ctxt->disableSAX = 1; 9292 } 9293 9294 if (!ctxt->wellFormed) { 9295 if (ctxt->errNo == 0) 9296 ret = 1; 9297 else 9298 ret = ctxt->errNo; 9299 } else { 9300 if (lst != NULL) { 9301 xmlNodePtr cur; 9302 9303 /* 9304 * Return the newly created nodeset after unlinking it from 9305 * they pseudo parent. 9306 */ 9307 cur = newDoc->children->children; 9308 *lst = cur; 9309 while (cur != NULL) { 9310 cur->parent = NULL; 9311 cur = cur->next; 9312 } 9313 newDoc->children->children = NULL; 9314 } 9315 ret = 0; 9316 } 9317 ctxt->sax = oldsax; 9318 xmlFreeParserCtxt(ctxt); 9319 newDoc->intSubset = NULL; 9320 newDoc->extSubset = NULL; 9321 xmlFreeDoc(newDoc); 9322 9323 return(ret); 9324} 9325 9326/** 9327 * xmlParseExternalEntityPrivate: 9328 * @doc: the document the chunk pertains to 9329 * @oldctxt: the previous parser context if available 9330 * @sax: the SAX handler bloc (possibly NULL) 9331 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9332 * @depth: Used for loop detection, use 0 9333 * @URL: the URL for the entity to load 9334 * @ID: the System ID for the entity to load 9335 * @list: the return value for the set of parsed nodes 9336 * 9337 * Private version of xmlParseExternalEntity() 9338 * 9339 * Returns 0 if the entity is well formed, -1 in case of args problem and 9340 * the parser error code otherwise 9341 */ 9342 9343static int 9344xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 9345 xmlSAXHandlerPtr sax, 9346 void *user_data, int depth, const xmlChar *URL, 9347 const xmlChar *ID, xmlNodePtr *list) { 9348 xmlParserCtxtPtr ctxt; 9349 xmlDocPtr newDoc; 9350 xmlSAXHandlerPtr oldsax = NULL; 9351 int ret = 0; 9352 xmlChar start[4]; 9353 xmlCharEncoding enc; 9354 9355 if (depth > 40) { 9356 return(XML_ERR_ENTITY_LOOP); 9357 } 9358 9359 9360 9361 if (list != NULL) 9362 *list = NULL; 9363 if ((URL == NULL) && (ID == NULL)) 9364 return(-1); 9365 if (doc == NULL) /* @@ relax but check for dereferences */ 9366 return(-1); 9367 9368 9369 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9370 if (ctxt == NULL) return(-1); 9371 ctxt->userData = ctxt; 9372 if (oldctxt != NULL) { 9373 ctxt->_private = oldctxt->_private; 9374 ctxt->loadsubset = oldctxt->loadsubset; 9375 ctxt->validate = oldctxt->validate; 9376 ctxt->external = oldctxt->external; 9377 } else { 9378 /* 9379 * Doing validity checking on chunk without context 9380 * doesn't make sense 9381 */ 9382 ctxt->_private = NULL; 9383 ctxt->validate = 0; 9384 ctxt->external = 2; 9385 ctxt->loadsubset = 0; 9386 } 9387 if (sax != NULL) { 9388 oldsax = ctxt->sax; 9389 ctxt->sax = sax; 9390 if (user_data != NULL) 9391 ctxt->userData = user_data; 9392 } 9393 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9394 if (newDoc == NULL) { 9395 xmlFreeParserCtxt(ctxt); 9396 return(-1); 9397 } 9398 if (doc != NULL) { 9399 newDoc->intSubset = doc->intSubset; 9400 newDoc->extSubset = doc->extSubset; 9401 } 9402 if (doc->URL != NULL) { 9403 newDoc->URL = xmlStrdup(doc->URL); 9404 } 9405 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9406 if (newDoc->children == NULL) { 9407 if (sax != NULL) 9408 ctxt->sax = oldsax; 9409 xmlFreeParserCtxt(ctxt); 9410 newDoc->intSubset = NULL; 9411 newDoc->extSubset = NULL; 9412 xmlFreeDoc(newDoc); 9413 return(-1); 9414 } 9415 nodePush(ctxt, newDoc->children); 9416 if (doc == NULL) { 9417 ctxt->myDoc = newDoc; 9418 } else { 9419 ctxt->myDoc = doc; 9420 newDoc->children->doc = doc; 9421 } 9422 9423 /* 9424 * Get the 4 first bytes and decode the charset 9425 * if enc != XML_CHAR_ENCODING_NONE 9426 * plug some encoding conversion routines. 9427 */ 9428 GROW; 9429 start[0] = RAW; 9430 start[1] = NXT(1); 9431 start[2] = NXT(2); 9432 start[3] = NXT(3); 9433 enc = xmlDetectCharEncoding(start, 4); 9434 if (enc != XML_CHAR_ENCODING_NONE) { 9435 xmlSwitchEncoding(ctxt, enc); 9436 } 9437 9438 /* 9439 * Parse a possible text declaration first 9440 */ 9441 if ((RAW == '<') && (NXT(1) == '?') && 9442 (NXT(2) == 'x') && (NXT(3) == 'm') && 9443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9444 xmlParseTextDecl(ctxt); 9445 } 9446 9447 ctxt->instate = XML_PARSER_CONTENT; 9448 ctxt->depth = depth; 9449 9450 xmlParseContent(ctxt); 9451 9452 if ((RAW == '<') && (NXT(1) == '/')) { 9453 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9455 ctxt->sax->error(ctxt->userData, 9456 "chunk is not well balanced\n"); 9457 ctxt->wellFormed = 0; 9458 ctxt->disableSAX = 1; 9459 } else if (RAW != 0) { 9460 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9462 ctxt->sax->error(ctxt->userData, 9463 "extra content at the end of well balanced chunk\n"); 9464 ctxt->wellFormed = 0; 9465 ctxt->disableSAX = 1; 9466 } 9467 if (ctxt->node != newDoc->children) { 9468 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9470 ctxt->sax->error(ctxt->userData, 9471 "chunk is not well balanced\n"); 9472 ctxt->wellFormed = 0; 9473 ctxt->disableSAX = 1; 9474 } 9475 9476 if (!ctxt->wellFormed) { 9477 if (ctxt->errNo == 0) 9478 ret = 1; 9479 else 9480 ret = ctxt->errNo; 9481 } else { 9482 if (list != NULL) { 9483 xmlNodePtr cur; 9484 9485 /* 9486 * Return the newly created nodeset after unlinking it from 9487 * they pseudo parent. 9488 */ 9489 cur = newDoc->children->children; 9490 *list = cur; 9491 while (cur != NULL) { 9492 cur->parent = NULL; 9493 cur = cur->next; 9494 } 9495 newDoc->children->children = NULL; 9496 } 9497 ret = 0; 9498 } 9499 if (sax != NULL) 9500 ctxt->sax = oldsax; 9501 xmlFreeParserCtxt(ctxt); 9502 newDoc->intSubset = NULL; 9503 newDoc->extSubset = NULL; 9504 xmlFreeDoc(newDoc); 9505 9506 return(ret); 9507} 9508 9509/** 9510 * xmlParseExternalEntity: 9511 * @doc: the document the chunk pertains to 9512 * @sax: the SAX handler bloc (possibly NULL) 9513 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9514 * @depth: Used for loop detection, use 0 9515 * @URL: the URL for the entity to load 9516 * @ID: the System ID for the entity to load 9517 * @lst: the return value for the set of parsed nodes 9518 * 9519 * Parse an external general entity 9520 * An external general parsed entity is well-formed if it matches the 9521 * production labeled extParsedEnt. 9522 * 9523 * [78] extParsedEnt ::= TextDecl? content 9524 * 9525 * Returns 0 if the entity is well formed, -1 in case of args problem and 9526 * the parser error code otherwise 9527 */ 9528 9529int 9530xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 9531 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 9532 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 9533 ID, lst)); 9534} 9535 9536/** 9537 * xmlParseBalancedChunkMemory: 9538 * @doc: the document the chunk pertains to 9539 * @sax: the SAX handler bloc (possibly NULL) 9540 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9541 * @depth: Used for loop detection, use 0 9542 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9543 * @lst: the return value for the set of parsed nodes 9544 * 9545 * Parse a well-balanced chunk of an XML document 9546 * called by the parser 9547 * The allowed sequence for the Well Balanced Chunk is the one defined by 9548 * the content production in the XML grammar: 9549 * 9550 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9551 * 9552 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9553 * the parser error code otherwise 9554 */ 9555 9556int 9557xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9558 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 9559 xmlParserCtxtPtr ctxt; 9560 xmlDocPtr newDoc; 9561 xmlSAXHandlerPtr oldsax = NULL; 9562 int size; 9563 int ret = 0; 9564 9565 if (depth > 40) { 9566 return(XML_ERR_ENTITY_LOOP); 9567 } 9568 9569 9570 if (lst != NULL) 9571 *lst = NULL; 9572 if (string == NULL) 9573 return(-1); 9574 9575 size = xmlStrlen(string); 9576 9577 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9578 if (ctxt == NULL) return(-1); 9579 ctxt->userData = ctxt; 9580 if (sax != NULL) { 9581 oldsax = ctxt->sax; 9582 ctxt->sax = sax; 9583 if (user_data != NULL) 9584 ctxt->userData = user_data; 9585 } 9586 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9587 if (newDoc == NULL) { 9588 xmlFreeParserCtxt(ctxt); 9589 return(-1); 9590 } 9591 if (doc != NULL) { 9592 newDoc->intSubset = doc->intSubset; 9593 newDoc->extSubset = doc->extSubset; 9594 } 9595 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9596 if (newDoc->children == NULL) { 9597 if (sax != NULL) 9598 ctxt->sax = oldsax; 9599 xmlFreeParserCtxt(ctxt); 9600 newDoc->intSubset = NULL; 9601 newDoc->extSubset = NULL; 9602 xmlFreeDoc(newDoc); 9603 return(-1); 9604 } 9605 nodePush(ctxt, newDoc->children); 9606 if (doc == NULL) { 9607 ctxt->myDoc = newDoc; 9608 } else { 9609 ctxt->myDoc = doc; 9610 newDoc->children->doc = doc; 9611 } 9612 ctxt->instate = XML_PARSER_CONTENT; 9613 ctxt->depth = depth; 9614 9615 /* 9616 * Doing validity checking on chunk doesn't make sense 9617 */ 9618 ctxt->validate = 0; 9619 ctxt->loadsubset = 0; 9620 9621 xmlParseContent(ctxt); 9622 9623 if ((RAW == '<') && (NXT(1) == '/')) { 9624 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9626 ctxt->sax->error(ctxt->userData, 9627 "chunk is not well balanced\n"); 9628 ctxt->wellFormed = 0; 9629 ctxt->disableSAX = 1; 9630 } else if (RAW != 0) { 9631 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9633 ctxt->sax->error(ctxt->userData, 9634 "extra content at the end of well balanced chunk\n"); 9635 ctxt->wellFormed = 0; 9636 ctxt->disableSAX = 1; 9637 } 9638 if (ctxt->node != newDoc->children) { 9639 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9641 ctxt->sax->error(ctxt->userData, 9642 "chunk is not well balanced\n"); 9643 ctxt->wellFormed = 0; 9644 ctxt->disableSAX = 1; 9645 } 9646 9647 if (!ctxt->wellFormed) { 9648 if (ctxt->errNo == 0) 9649 ret = 1; 9650 else 9651 ret = ctxt->errNo; 9652 } else { 9653 if (lst != NULL) { 9654 xmlNodePtr cur; 9655 9656 /* 9657 * Return the newly created nodeset after unlinking it from 9658 * they pseudo parent. 9659 */ 9660 cur = newDoc->children->children; 9661 *lst = cur; 9662 while (cur != NULL) { 9663 cur->parent = NULL; 9664 cur = cur->next; 9665 } 9666 newDoc->children->children = NULL; 9667 } 9668 ret = 0; 9669 } 9670 if (sax != NULL) 9671 ctxt->sax = oldsax; 9672 xmlFreeParserCtxt(ctxt); 9673 newDoc->intSubset = NULL; 9674 newDoc->extSubset = NULL; 9675 xmlFreeDoc(newDoc); 9676 9677 return(ret); 9678} 9679 9680/** 9681 * xmlSAXParseEntity: 9682 * @sax: the SAX handler block 9683 * @filename: the filename 9684 * 9685 * parse an XML external entity out of context and build a tree. 9686 * It use the given SAX function block to handle the parsing callback. 9687 * If sax is NULL, fallback to the default DOM tree building routines. 9688 * 9689 * [78] extParsedEnt ::= TextDecl? content 9690 * 9691 * This correspond to a "Well Balanced" chunk 9692 * 9693 * Returns the resulting document tree 9694 */ 9695 9696xmlDocPtr 9697xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9698 xmlDocPtr ret; 9699 xmlParserCtxtPtr ctxt; 9700 char *directory = NULL; 9701 9702 ctxt = xmlCreateFileParserCtxt(filename); 9703 if (ctxt == NULL) { 9704 return(NULL); 9705 } 9706 if (sax != NULL) { 9707 if (ctxt->sax != NULL) 9708 xmlFree(ctxt->sax); 9709 ctxt->sax = sax; 9710 ctxt->userData = NULL; 9711 } 9712 9713 if ((ctxt->directory == NULL) && (directory == NULL)) 9714 directory = xmlParserGetDirectory(filename); 9715 9716 xmlParseExtParsedEnt(ctxt); 9717 9718 if (ctxt->wellFormed) 9719 ret = ctxt->myDoc; 9720 else { 9721 ret = NULL; 9722 xmlFreeDoc(ctxt->myDoc); 9723 ctxt->myDoc = NULL; 9724 } 9725 if (sax != NULL) 9726 ctxt->sax = NULL; 9727 xmlFreeParserCtxt(ctxt); 9728 9729 return(ret); 9730} 9731 9732/** 9733 * xmlParseEntity: 9734 * @filename: the filename 9735 * 9736 * parse an XML external entity out of context and build a tree. 9737 * 9738 * [78] extParsedEnt ::= TextDecl? content 9739 * 9740 * This correspond to a "Well Balanced" chunk 9741 * 9742 * Returns the resulting document tree 9743 */ 9744 9745xmlDocPtr 9746xmlParseEntity(const char *filename) { 9747 return(xmlSAXParseEntity(NULL, filename)); 9748} 9749 9750/** 9751 * xmlCreateEntityParserCtxt: 9752 * @URL: the entity URL 9753 * @ID: the entity PUBLIC ID 9754 * @base: a possible base for the target URI 9755 * 9756 * Create a parser context for an external entity 9757 * Automatic support for ZLIB/Compress compressed document is provided 9758 * by default if found at compile-time. 9759 * 9760 * Returns the new parser context or NULL 9761 */ 9762xmlParserCtxtPtr 9763xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9764 const xmlChar *base) { 9765 xmlParserCtxtPtr ctxt; 9766 xmlParserInputPtr inputStream; 9767 char *directory = NULL; 9768 xmlChar *uri; 9769 9770 ctxt = xmlNewParserCtxt(); 9771 if (ctxt == NULL) { 9772 return(NULL); 9773 } 9774 9775 uri = xmlBuildURI(URL, base); 9776 9777 if (uri == NULL) { 9778 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9779 if (inputStream == NULL) { 9780 xmlFreeParserCtxt(ctxt); 9781 return(NULL); 9782 } 9783 9784 inputPush(ctxt, inputStream); 9785 9786 if ((ctxt->directory == NULL) && (directory == NULL)) 9787 directory = xmlParserGetDirectory((char *)URL); 9788 if ((ctxt->directory == NULL) && (directory != NULL)) 9789 ctxt->directory = directory; 9790 } else { 9791 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9792 if (inputStream == NULL) { 9793 xmlFree(uri); 9794 xmlFreeParserCtxt(ctxt); 9795 return(NULL); 9796 } 9797 9798 inputPush(ctxt, inputStream); 9799 9800 if ((ctxt->directory == NULL) && (directory == NULL)) 9801 directory = xmlParserGetDirectory((char *)uri); 9802 if ((ctxt->directory == NULL) && (directory != NULL)) 9803 ctxt->directory = directory; 9804 xmlFree(uri); 9805 } 9806 9807 return(ctxt); 9808} 9809 9810/************************************************************************ 9811 * * 9812 * Front ends when parsing from a file * 9813 * * 9814 ************************************************************************/ 9815 9816/** 9817 * xmlCreateFileParserCtxt: 9818 * @filename: the filename 9819 * 9820 * Create a parser context for a file content. 9821 * Automatic support for ZLIB/Compress compressed document is provided 9822 * by default if found at compile-time. 9823 * 9824 * Returns the new parser context or NULL 9825 */ 9826xmlParserCtxtPtr 9827xmlCreateFileParserCtxt(const char *filename) 9828{ 9829 xmlParserCtxtPtr ctxt; 9830 xmlParserInputPtr inputStream; 9831 char *directory = NULL; 9832 9833 ctxt = xmlNewParserCtxt(); 9834 if (ctxt == NULL) { 9835 if (xmlDefaultSAXHandler.error != NULL) { 9836 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9837 } 9838 return(NULL); 9839 } 9840 9841 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 9842 if (inputStream == NULL) { 9843 xmlFreeParserCtxt(ctxt); 9844 return(NULL); 9845 } 9846 9847 inputPush(ctxt, inputStream); 9848 if ((ctxt->directory == NULL) && (directory == NULL)) 9849 directory = xmlParserGetDirectory(filename); 9850 if ((ctxt->directory == NULL) && (directory != NULL)) 9851 ctxt->directory = directory; 9852 9853 return(ctxt); 9854} 9855 9856/** 9857 * xmlSAXParseFileWithData: 9858 * @sax: the SAX handler block 9859 * @filename: the filename 9860 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9861 * documents 9862 * @data: the userdata 9863 * 9864 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9865 * compressed document is provided by default if found at compile-time. 9866 * It use the given SAX function block to handle the parsing callback. 9867 * If sax is NULL, fallback to the default DOM tree building routines. 9868 * 9869 * User data (void *) is stored within the parser context, so it is 9870 * available nearly everywhere in libxml. 9871 * 9872 * Returns the resulting document tree 9873 */ 9874 9875xmlDocPtr 9876xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 9877 int recovery, void *data) { 9878 xmlDocPtr ret; 9879 xmlParserCtxtPtr ctxt; 9880 char *directory = NULL; 9881 9882 xmlInitParser(); 9883 9884 ctxt = xmlCreateFileParserCtxt(filename); 9885 if (ctxt == NULL) { 9886 return(NULL); 9887 } 9888 if (sax != NULL) { 9889 if (ctxt->sax != NULL) 9890 xmlFree(ctxt->sax); 9891 ctxt->sax = sax; 9892 } 9893 if (data!=NULL) { 9894 ctxt->_private=data; 9895 } 9896 9897 if ((ctxt->directory == NULL) && (directory == NULL)) 9898 directory = xmlParserGetDirectory(filename); 9899 if ((ctxt->directory == NULL) && (directory != NULL)) 9900 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9901 9902 xmlParseDocument(ctxt); 9903 9904 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9905 else { 9906 ret = NULL; 9907 xmlFreeDoc(ctxt->myDoc); 9908 ctxt->myDoc = NULL; 9909 } 9910 if (sax != NULL) 9911 ctxt->sax = NULL; 9912 xmlFreeParserCtxt(ctxt); 9913 9914 return(ret); 9915} 9916 9917/** 9918 * xmlSAXParseFile: 9919 * @sax: the SAX handler block 9920 * @filename: the filename 9921 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9922 * documents 9923 * 9924 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9925 * compressed document is provided by default if found at compile-time. 9926 * It use the given SAX function block to handle the parsing callback. 9927 * If sax is NULL, fallback to the default DOM tree building routines. 9928 * 9929 * Returns the resulting document tree 9930 */ 9931 9932xmlDocPtr 9933xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9934 int recovery) { 9935 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 9936} 9937 9938/** 9939 * xmlRecoverDoc: 9940 * @cur: a pointer to an array of xmlChar 9941 * 9942 * parse an XML in-memory document and build a tree. 9943 * In the case the document is not Well Formed, a tree is built anyway 9944 * 9945 * Returns the resulting document tree 9946 */ 9947 9948xmlDocPtr 9949xmlRecoverDoc(xmlChar *cur) { 9950 return(xmlSAXParseDoc(NULL, cur, 1)); 9951} 9952 9953/** 9954 * xmlParseFile: 9955 * @filename: the filename 9956 * 9957 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9958 * compressed document is provided by default if found at compile-time. 9959 * 9960 * Returns the resulting document tree if the file was wellformed, 9961 * NULL otherwise. 9962 */ 9963 9964xmlDocPtr 9965xmlParseFile(const char *filename) { 9966 return(xmlSAXParseFile(NULL, filename, 0)); 9967} 9968 9969/** 9970 * xmlRecoverFile: 9971 * @filename: the filename 9972 * 9973 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9974 * compressed document is provided by default if found at compile-time. 9975 * In the case the document is not Well Formed, a tree is built anyway 9976 * 9977 * Returns the resulting document tree 9978 */ 9979 9980xmlDocPtr 9981xmlRecoverFile(const char *filename) { 9982 return(xmlSAXParseFile(NULL, filename, 1)); 9983} 9984 9985 9986/** 9987 * xmlSetupParserForBuffer: 9988 * @ctxt: an XML parser context 9989 * @buffer: a xmlChar * buffer 9990 * @filename: a file name 9991 * 9992 * Setup the parser context to parse a new buffer; Clears any prior 9993 * contents from the parser context. The buffer parameter must not be 9994 * NULL, but the filename parameter can be 9995 */ 9996void 9997xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9998 const char* filename) 9999{ 10000 xmlParserInputPtr input; 10001 10002 input = xmlNewInputStream(ctxt); 10003 if (input == NULL) { 10004 perror("malloc"); 10005 xmlFree(ctxt); 10006 return; 10007 } 10008 10009 xmlClearParserCtxt(ctxt); 10010 if (filename != NULL) 10011 input->filename = xmlMemStrdup(filename); 10012 input->base = buffer; 10013 input->cur = buffer; 10014 input->end = &buffer[xmlStrlen(buffer)]; 10015 inputPush(ctxt, input); 10016} 10017 10018/** 10019 * xmlSAXUserParseFile: 10020 * @sax: a SAX handler 10021 * @user_data: The user data returned on SAX callbacks 10022 * @filename: a file name 10023 * 10024 * parse an XML file and call the given SAX handler routines. 10025 * Automatic support for ZLIB/Compress compressed document is provided 10026 * 10027 * Returns 0 in case of success or a error number otherwise 10028 */ 10029int 10030xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 10031 const char *filename) { 10032 int ret = 0; 10033 xmlParserCtxtPtr ctxt; 10034 10035 ctxt = xmlCreateFileParserCtxt(filename); 10036 if (ctxt == NULL) return -1; 10037 if (ctxt->sax != &xmlDefaultSAXHandler) 10038 xmlFree(ctxt->sax); 10039 ctxt->sax = sax; 10040 if (user_data != NULL) 10041 ctxt->userData = user_data; 10042 10043 xmlParseDocument(ctxt); 10044 10045 if (ctxt->wellFormed) 10046 ret = 0; 10047 else { 10048 if (ctxt->errNo != 0) 10049 ret = ctxt->errNo; 10050 else 10051 ret = -1; 10052 } 10053 if (sax != NULL) 10054 ctxt->sax = NULL; 10055 xmlFreeParserCtxt(ctxt); 10056 10057 return ret; 10058} 10059 10060/************************************************************************ 10061 * * 10062 * Front ends when parsing from memory * 10063 * * 10064 ************************************************************************/ 10065 10066/** 10067 * xmlCreateMemoryParserCtxt: 10068 * @buffer: a pointer to a char array 10069 * @size: the size of the array 10070 * 10071 * Create a parser context for an XML in-memory document. 10072 * 10073 * Returns the new parser context or NULL 10074 */ 10075xmlParserCtxtPtr 10076xmlCreateMemoryParserCtxt(const char *buffer, int size) { 10077 xmlParserCtxtPtr ctxt; 10078 xmlParserInputPtr input; 10079 xmlParserInputBufferPtr buf; 10080 10081 if (buffer == NULL) 10082 return(NULL); 10083 if (size <= 0) 10084 return(NULL); 10085 10086 ctxt = xmlNewParserCtxt(); 10087 if (ctxt == NULL) 10088 return(NULL); 10089 10090 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 10091 if (buf == NULL) return(NULL); 10092 10093 input = xmlNewInputStream(ctxt); 10094 if (input == NULL) { 10095 xmlFreeParserCtxt(ctxt); 10096 return(NULL); 10097 } 10098 10099 input->filename = NULL; 10100 input->buf = buf; 10101 input->base = input->buf->buffer->content; 10102 input->cur = input->buf->buffer->content; 10103 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 10104 10105 inputPush(ctxt, input); 10106 return(ctxt); 10107} 10108 10109/** 10110 * xmlSAXParseMemory: 10111 * @sax: the SAX handler block 10112 * @buffer: an pointer to a char array 10113 * @size: the size of the array 10114 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 10115 * documents 10116 * 10117 * parse an XML in-memory block and use the given SAX function block 10118 * to handle the parsing callback. If sax is NULL, fallback to the default 10119 * DOM tree building routines. 10120 * 10121 * Returns the resulting document tree 10122 */ 10123xmlDocPtr 10124xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 10125 int size, int recovery) { 10126 xmlDocPtr ret; 10127 xmlParserCtxtPtr ctxt; 10128 10129 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10130 if (ctxt == NULL) return(NULL); 10131 if (sax != NULL) { 10132 ctxt->sax = sax; 10133 } 10134 10135 xmlParseDocument(ctxt); 10136 10137 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10138 else { 10139 ret = NULL; 10140 xmlFreeDoc(ctxt->myDoc); 10141 ctxt->myDoc = NULL; 10142 } 10143 if (sax != NULL) 10144 ctxt->sax = NULL; 10145 xmlFreeParserCtxt(ctxt); 10146 10147 return(ret); 10148} 10149 10150/** 10151 * xmlParseMemory: 10152 * @buffer: an pointer to a char array 10153 * @size: the size of the array 10154 * 10155 * parse an XML in-memory block and build a tree. 10156 * 10157 * Returns the resulting document tree 10158 */ 10159 10160xmlDocPtr xmlParseMemory(const char *buffer, int size) { 10161 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 10162} 10163 10164/** 10165 * xmlRecoverMemory: 10166 * @buffer: an pointer to a char array 10167 * @size: the size of the array 10168 * 10169 * parse an XML in-memory block and build a tree. 10170 * In the case the document is not Well Formed, a tree is built anyway 10171 * 10172 * Returns the resulting document tree 10173 */ 10174 10175xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 10176 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 10177} 10178 10179/** 10180 * xmlSAXUserParseMemory: 10181 * @sax: a SAX handler 10182 * @user_data: The user data returned on SAX callbacks 10183 * @buffer: an in-memory XML document input 10184 * @size: the length of the XML document in bytes 10185 * 10186 * A better SAX parsing routine. 10187 * parse an XML in-memory buffer and call the given SAX handler routines. 10188 * 10189 * Returns 0 in case of success or a error number otherwise 10190 */ 10191int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 10192 const char *buffer, int size) { 10193 int ret = 0; 10194 xmlParserCtxtPtr ctxt; 10195 xmlSAXHandlerPtr oldsax = NULL; 10196 10197 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10198 if (ctxt == NULL) return -1; 10199 if (sax != NULL) { 10200 oldsax = ctxt->sax; 10201 ctxt->sax = sax; 10202 } 10203 if (user_data != NULL) 10204 ctxt->userData = user_data; 10205 10206 xmlParseDocument(ctxt); 10207 10208 if (ctxt->wellFormed) 10209 ret = 0; 10210 else { 10211 if (ctxt->errNo != 0) 10212 ret = ctxt->errNo; 10213 else 10214 ret = -1; 10215 } 10216 if (sax != NULL) { 10217 ctxt->sax = oldsax; 10218 } 10219 xmlFreeParserCtxt(ctxt); 10220 10221 return ret; 10222} 10223 10224/** 10225 * xmlCreateDocParserCtxt: 10226 * @cur: a pointer to an array of xmlChar 10227 * 10228 * Creates a parser context for an XML in-memory document. 10229 * 10230 * Returns the new parser context or NULL 10231 */ 10232xmlParserCtxtPtr 10233xmlCreateDocParserCtxt(xmlChar *cur) { 10234 int len; 10235 10236 if (cur == NULL) 10237 return(NULL); 10238 len = xmlStrlen(cur); 10239 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 10240} 10241 10242/** 10243 * xmlSAXParseDoc: 10244 * @sax: the SAX handler block 10245 * @cur: a pointer to an array of xmlChar 10246 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10247 * documents 10248 * 10249 * parse an XML in-memory document and build a tree. 10250 * It use the given SAX function block to handle the parsing callback. 10251 * If sax is NULL, fallback to the default DOM tree building routines. 10252 * 10253 * Returns the resulting document tree 10254 */ 10255 10256xmlDocPtr 10257xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 10258 xmlDocPtr ret; 10259 xmlParserCtxtPtr ctxt; 10260 10261 if (cur == NULL) return(NULL); 10262 10263 10264 ctxt = xmlCreateDocParserCtxt(cur); 10265 if (ctxt == NULL) return(NULL); 10266 if (sax != NULL) { 10267 ctxt->sax = sax; 10268 ctxt->userData = NULL; 10269 } 10270 10271 xmlParseDocument(ctxt); 10272 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10273 else { 10274 ret = NULL; 10275 xmlFreeDoc(ctxt->myDoc); 10276 ctxt->myDoc = NULL; 10277 } 10278 if (sax != NULL) 10279 ctxt->sax = NULL; 10280 xmlFreeParserCtxt(ctxt); 10281 10282 return(ret); 10283} 10284 10285/** 10286 * xmlParseDoc: 10287 * @cur: a pointer to an array of xmlChar 10288 * 10289 * parse an XML in-memory document and build a tree. 10290 * 10291 * Returns the resulting document tree 10292 */ 10293 10294xmlDocPtr 10295xmlParseDoc(xmlChar *cur) { 10296 return(xmlSAXParseDoc(NULL, cur, 0)); 10297} 10298 10299/************************************************************************ 10300 * * 10301 * Specific function to keep track of entities references * 10302 * and used by the XSLT debugger * 10303 * * 10304 ************************************************************************/ 10305 10306static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 10307 10308/** 10309 * xmlAddEntityReference: 10310 * @ent : A valid entity 10311 * @firstNode : A valid first node for children of entity 10312 * @lastNode : A valid last node of children entity 10313 * 10314 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 10315 */ 10316static void 10317xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 10318 xmlNodePtr lastNode) 10319{ 10320 if (xmlEntityRefFunc != NULL) { 10321 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 10322 } 10323} 10324 10325 10326/** 10327 * xmlSetEntityReferenceFunc: 10328 * @func : A valid function 10329 * 10330 * Set the function to call call back when a xml reference has been made 10331 */ 10332void 10333xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 10334{ 10335 xmlEntityRefFunc = func; 10336} 10337 10338/************************************************************************ 10339 * * 10340 * Miscellaneous * 10341 * * 10342 ************************************************************************/ 10343 10344#ifdef LIBXML_XPATH_ENABLED 10345#include <libxml/xpath.h> 10346#endif 10347 10348extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 10349static int xmlParserInitialized = 0; 10350 10351/** 10352 * xmlInitParser: 10353 * 10354 * Initialization function for the XML parser. 10355 * This is not reentrant. Call once before processing in case of 10356 * use in multithreaded programs. 10357 */ 10358 10359void 10360xmlInitParser(void) { 10361 if (xmlParserInitialized != 0) 10362 return; 10363 10364 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 10365 (xmlGenericError == NULL)) 10366 initGenericErrorDefaultFunc(NULL); 10367 xmlInitThreads(); 10368 xmlInitMemory(); 10369 xmlInitCharEncodingHandlers(); 10370 xmlInitializePredefinedEntities(); 10371 xmlDefaultSAXHandlerInit(); 10372 xmlRegisterDefaultInputCallbacks(); 10373 xmlRegisterDefaultOutputCallbacks(); 10374#ifdef LIBXML_HTML_ENABLED 10375 htmlInitAutoClose(); 10376 htmlDefaultSAXHandlerInit(); 10377#endif 10378#ifdef LIBXML_XPATH_ENABLED 10379 xmlXPathInit(); 10380#endif 10381 xmlParserInitialized = 1; 10382} 10383 10384/** 10385 * xmlCleanupParser: 10386 * 10387 * Cleanup function for the XML parser. It tries to reclaim all 10388 * parsing related global memory allocated for the parser processing. 10389 * It doesn't deallocate any document related memory. Calling this 10390 * function should not prevent reusing the parser. 10391 */ 10392 10393void 10394xmlCleanupParser(void) { 10395 xmlCleanupCharEncodingHandlers(); 10396 xmlCleanupPredefinedEntities(); 10397#ifdef LIBXML_CATALOG_ENABLED 10398 xmlCatalogCleanup(); 10399#endif 10400 xmlCleanupThreads(); 10401 xmlParserInitialized = 0; 10402} 10403