parser.c revision 44e1dd0027983f7112e5b6a9101156b574ecbc26
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <libxml/xmlmemory.h> 45#include <libxml/threads.h> 46#include <libxml/globals.h> 47#include <libxml/tree.h> 48#include <libxml/parser.h> 49#include <libxml/parserInternals.h> 50#include <libxml/valid.h> 51#include <libxml/entities.h> 52#include <libxml/xmlerror.h> 53#include <libxml/encoding.h> 54#include <libxml/xmlIO.h> 55#include <libxml/uri.h> 56#ifdef LIBXML_CATALOG_ENABLED 57#include <libxml/catalog.h> 58#endif 59 60#ifdef HAVE_CTYPE_H 61#include <ctype.h> 62#endif 63#ifdef HAVE_STDLIB_H 64#include <stdlib.h> 65#endif 66#ifdef HAVE_SYS_STAT_H 67#include <sys/stat.h> 68#endif 69#ifdef HAVE_FCNTL_H 70#include <fcntl.h> 71#endif 72#ifdef HAVE_UNISTD_H 73#include <unistd.h> 74#endif 75#ifdef HAVE_ZLIB_H 76#include <zlib.h> 77#endif 78 79/** 80 * MAX_DEPTH: 81 * 82 * arbitrary depth limit for the XML documents that we allow to 83 * process. This is not a limitation of the parser but a safety 84 * boundary feature. 85 */ 86#define MAX_DEPTH 1024 87 88#define XML_PARSER_BIG_BUFFER_SIZE 300 89#define XML_PARSER_BUFFER_SIZE 100 90 91#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 92 93/* 94 * List of XML prefixed PI allowed by W3C specs 95 */ 96 97static const char *xmlW3CPIs[] = { 98 "xml-stylesheet", 99 NULL 100}; 101 102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 104 const xmlChar **str); 105 106static int 107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 108 xmlSAXHandlerPtr sax, 109 void *user_data, int depth, const xmlChar *URL, 110 const xmlChar *ID, xmlNodePtr *list); 111 112static void 113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 114 xmlNodePtr lastNode); 115 116static int 117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 118 const xmlChar *string, void *user_data, xmlNodePtr *lst); 119/************************************************************************ 120 * * 121 * Parser stacks related functions and macros * 122 * * 123 ************************************************************************/ 124 125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 126 const xmlChar ** str); 127 128/** 129 * inputPush: 130 * @ctxt: an XML parser context 131 * @value: the parser input 132 * 133 * Pushes a new parser input on top of the input stack 134 * 135 * Returns 0 in case of error, the index in the stack otherwise 136 */ 137extern int 138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 139{ 140 if (ctxt->inputNr >= ctxt->inputMax) { 141 ctxt->inputMax *= 2; 142 ctxt->inputTab = 143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 144 ctxt->inputMax * 145 sizeof(ctxt->inputTab[0])); 146 if (ctxt->inputTab == NULL) { 147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); 148 return (0); 149 } 150 } 151 ctxt->inputTab[ctxt->inputNr] = value; 152 ctxt->input = value; 153 return (ctxt->inputNr++); 154} 155/** 156 * inputPop: 157 * @ctxt: an XML parser context 158 * 159 * Pops the top parser input from the input stack 160 * 161 * Returns the input just removed 162 */ 163extern xmlParserInputPtr 164inputPop(xmlParserCtxtPtr ctxt) 165{ 166 xmlParserInputPtr ret; 167 168 if (ctxt->inputNr <= 0) 169 return (0); 170 ctxt->inputNr--; 171 if (ctxt->inputNr > 0) 172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 173 else 174 ctxt->input = NULL; 175 ret = ctxt->inputTab[ctxt->inputNr]; 176 ctxt->inputTab[ctxt->inputNr] = 0; 177 return (ret); 178} 179/** 180 * nodePush: 181 * @ctxt: an XML parser context 182 * @value: the element node 183 * 184 * Pushes a new element node on top of the node stack 185 * 186 * Returns 0 in case of error, the index in the stack otherwise 187 */ 188extern int 189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 190{ 191 if (ctxt->nodeNr >= ctxt->nodeMax) { 192 ctxt->nodeMax *= 2; 193 ctxt->nodeTab = 194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 195 ctxt->nodeMax * 196 sizeof(ctxt->nodeTab[0])); 197 if (ctxt->nodeTab == NULL) { 198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); 199 return (0); 200 } 201 } 202#ifdef MAX_DEPTH 203 if (ctxt->nodeNr > MAX_DEPTH) { 204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 205 ctxt->sax->error(ctxt->userData, 206 "Excessive depth in document: change MAX_DEPTH = %d\n", 207 MAX_DEPTH); 208 ctxt->wellFormed = 0; 209 ctxt->instate = XML_PARSER_EOF; 210 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 211 return(0); 212 } 213#endif 214 ctxt->nodeTab[ctxt->nodeNr] = value; 215 ctxt->node = value; 216 return (ctxt->nodeNr++); 217} 218/** 219 * nodePop: 220 * @ctxt: an XML parser context 221 * 222 * Pops the top element node from the node stack 223 * 224 * Returns the node just removed 225 */ 226extern xmlNodePtr 227nodePop(xmlParserCtxtPtr ctxt) 228{ 229 xmlNodePtr ret; 230 231 if (ctxt->nodeNr <= 0) 232 return (0); 233 ctxt->nodeNr--; 234 if (ctxt->nodeNr > 0) 235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 236 else 237 ctxt->node = NULL; 238 ret = ctxt->nodeTab[ctxt->nodeNr]; 239 ctxt->nodeTab[ctxt->nodeNr] = 0; 240 return (ret); 241} 242/** 243 * namePush: 244 * @ctxt: an XML parser context 245 * @value: the element name 246 * 247 * Pushes a new element name on top of the name stack 248 * 249 * Returns 0 in case of error, the index in the stack otherwise 250 */ 251extern int 252namePush(xmlParserCtxtPtr ctxt, xmlChar * value) 253{ 254 if (ctxt->nameNr >= ctxt->nameMax) { 255 ctxt->nameMax *= 2; 256 ctxt->nameTab = 257 (xmlChar * *)xmlRealloc(ctxt->nameTab, 258 ctxt->nameMax * 259 sizeof(ctxt->nameTab[0])); 260 if (ctxt->nameTab == NULL) { 261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); 262 return (0); 263 } 264 } 265 ctxt->nameTab[ctxt->nameNr] = value; 266 ctxt->name = value; 267 return (ctxt->nameNr++); 268} 269/** 270 * namePop: 271 * @ctxt: an XML parser context 272 * 273 * Pops the top element name from the name stack 274 * 275 * Returns the name just removed 276 */ 277extern xmlChar * 278namePop(xmlParserCtxtPtr ctxt) 279{ 280 xmlChar *ret; 281 282 if (ctxt->nameNr <= 0) 283 return (0); 284 ctxt->nameNr--; 285 if (ctxt->nameNr > 0) 286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 287 else 288 ctxt->name = NULL; 289 ret = ctxt->nameTab[ctxt->nameNr]; 290 ctxt->nameTab[ctxt->nameNr] = 0; 291 return (ret); 292} 293 294static int spacePush(xmlParserCtxtPtr ctxt, int val) { 295 if (ctxt->spaceNr >= ctxt->spaceMax) { 296 ctxt->spaceMax *= 2; 297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 299 if (ctxt->spaceTab == NULL) { 300 xmlGenericError(xmlGenericErrorContext, 301 "realloc failed !\n"); 302 return(0); 303 } 304 } 305 ctxt->spaceTab[ctxt->spaceNr] = val; 306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 307 return(ctxt->spaceNr++); 308} 309 310static int spacePop(xmlParserCtxtPtr ctxt) { 311 int ret; 312 if (ctxt->spaceNr <= 0) return(0); 313 ctxt->spaceNr--; 314 if (ctxt->spaceNr > 0) 315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 316 else 317 ctxt->space = NULL; 318 ret = ctxt->spaceTab[ctxt->spaceNr]; 319 ctxt->spaceTab[ctxt->spaceNr] = -1; 320 return(ret); 321} 322 323/* 324 * Macros for accessing the content. Those should be used only by the parser, 325 * and not exported. 326 * 327 * Dirty macros, i.e. one often need to make assumption on the context to 328 * use them 329 * 330 * CUR_PTR return the current pointer to the xmlChar to be parsed. 331 * To be used with extreme caution since operations consuming 332 * characters may move the input buffer to a different location ! 333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 334 * This should be used internally by the parser 335 * only to compare to ASCII values otherwise it would break when 336 * running with UTF-8 encoding. 337 * RAW same as CUR but in the input buffer, bypass any token 338 * extraction that may have been done 339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 340 * to compare on ASCII based substring. 341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 342 * strings within the parser. 343 * 344 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 345 * 346 * NEXT Skip to the next character, this does the proper decoding 347 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 348 * NEXTL(l) Skip l xmlChar in the input buffer 349 * CUR_CHAR(l) returns the current unicode character (int), set l 350 * to the number of xmlChars used for the encoding [0-5]. 351 * CUR_SCHAR same but operate on a string instead of the context 352 * COPY_BUF copy the current unicode char to the target buffer, increment 353 * the index 354 * GROW, SHRINK handling of input buffers 355 */ 356 357#define RAW (*ctxt->input->cur) 358#define CUR (*ctxt->input->cur) 359#define NXT(val) ctxt->input->cur[(val)] 360#define CUR_PTR ctxt->input->cur 361 362#define SKIP(val) do { \ 363 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 364 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 365 if ((*ctxt->input->cur == 0) && \ 366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 367 xmlPopInput(ctxt); \ 368 } while (0) 369 370#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ 371 xmlSHRINK (ctxt); 372 373static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 374 xmlParserInputShrink(ctxt->input); 375 if ((*ctxt->input->cur == 0) && 376 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 377 xmlPopInput(ctxt); 378 } 379 380#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ 381 xmlGROW (ctxt); 382 383static void xmlGROW (xmlParserCtxtPtr ctxt) { 384 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 385 if ((*ctxt->input->cur == 0) && 386 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 387 xmlPopInput(ctxt); 388 } 389 390#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 391 392#define NEXT xmlNextChar(ctxt) 393 394#define NEXT1 { \ 395 ctxt->input->cur++; \ 396 ctxt->nbChars++; \ 397 if (*ctxt->input->cur == 0) \ 398 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 399 } 400 401#define NEXTL(l) do { \ 402 if (*(ctxt->input->cur) == '\n') { \ 403 ctxt->input->line++; ctxt->input->col = 1; \ 404 } else ctxt->input->col++; \ 405 ctxt->input->cur += l; \ 406 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 407 } while (0) 408 409#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 410#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 411 412#define COPY_BUF(l,b,i,v) \ 413 if (l == 1) b[i++] = (xmlChar) v; \ 414 else i += xmlCopyCharMultiByte(&b[i],v) 415 416/** 417 * xmlSkipBlankChars: 418 * @ctxt: the XML parser context 419 * 420 * skip all blanks character found at that point in the input streams. 421 * It pops up finished entities in the process if allowable at that point. 422 * 423 * Returns the number of space chars skipped 424 */ 425 426int 427xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 428 int res = 0; 429 430 /* 431 * It's Okay to use CUR/NEXT here since all the blanks are on 432 * the ASCII range. 433 */ 434 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 435 const xmlChar *cur; 436 /* 437 * if we are in the document content, go really fast 438 */ 439 cur = ctxt->input->cur; 440 while (IS_BLANK(*cur)) { 441 if (*cur == '\n') { 442 ctxt->input->line++; ctxt->input->col = 1; 443 } 444 cur++; 445 res++; 446 if (*cur == 0) { 447 ctxt->input->cur = cur; 448 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 449 cur = ctxt->input->cur; 450 } 451 } 452 ctxt->input->cur = cur; 453 } else { 454 int cur; 455 do { 456 cur = CUR; 457 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 458 NEXT; 459 cur = CUR; 460 res++; 461 } 462 while ((cur == 0) && (ctxt->inputNr > 1) && 463 (ctxt->instate != XML_PARSER_COMMENT)) { 464 xmlPopInput(ctxt); 465 cur = CUR; 466 } 467 /* 468 * Need to handle support of entities branching here 469 */ 470 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 471 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 472 } 473 return(res); 474} 475 476/************************************************************************ 477 * * 478 * Commodity functions to handle entities * 479 * * 480 ************************************************************************/ 481 482/** 483 * xmlPopInput: 484 * @ctxt: an XML parser context 485 * 486 * xmlPopInput: the current input pointed by ctxt->input came to an end 487 * pop it and return the next char. 488 * 489 * Returns the current xmlChar in the parser context 490 */ 491xmlChar 492xmlPopInput(xmlParserCtxtPtr ctxt) { 493 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 494 if (xmlParserDebugEntities) 495 xmlGenericError(xmlGenericErrorContext, 496 "Popping input %d\n", ctxt->inputNr); 497 xmlFreeInputStream(inputPop(ctxt)); 498 if ((*ctxt->input->cur == 0) && 499 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 500 return(xmlPopInput(ctxt)); 501 return(CUR); 502} 503 504/** 505 * xmlPushInput: 506 * @ctxt: an XML parser context 507 * @input: an XML parser input fragment (entity, XML fragment ...). 508 * 509 * xmlPushInput: switch to a new input stream which is stacked on top 510 * of the previous one(s). 511 */ 512void 513xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 514 if (input == NULL) return; 515 516 if (xmlParserDebugEntities) { 517 if ((ctxt->input != NULL) && (ctxt->input->filename)) 518 xmlGenericError(xmlGenericErrorContext, 519 "%s(%d): ", ctxt->input->filename, 520 ctxt->input->line); 521 xmlGenericError(xmlGenericErrorContext, 522 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 523 } 524 inputPush(ctxt, input); 525 GROW; 526} 527 528/** 529 * xmlParseCharRef: 530 * @ctxt: an XML parser context 531 * 532 * parse Reference declarations 533 * 534 * [66] CharRef ::= '&#' [0-9]+ ';' | 535 * '&#x' [0-9a-fA-F]+ ';' 536 * 537 * [ WFC: Legal Character ] 538 * Characters referred to using character references must match the 539 * production for Char. 540 * 541 * Returns the value parsed (as an int), 0 in case of error 542 */ 543int 544xmlParseCharRef(xmlParserCtxtPtr ctxt) { 545 unsigned int val = 0; 546 int count = 0; 547 548 /* 549 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 550 */ 551 if ((RAW == '&') && (NXT(1) == '#') && 552 (NXT(2) == 'x')) { 553 SKIP(3); 554 GROW; 555 while (RAW != ';') { /* loop blocked by count */ 556 if (count++ > 20) { 557 count = 0; 558 GROW; 559 } 560 if ((RAW >= '0') && (RAW <= '9')) 561 val = val * 16 + (CUR - '0'); 562 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 563 val = val * 16 + (CUR - 'a') + 10; 564 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 565 val = val * 16 + (CUR - 'A') + 10; 566 else { 567 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 569 ctxt->sax->error(ctxt->userData, 570 "xmlParseCharRef: invalid hexadecimal value\n"); 571 ctxt->wellFormed = 0; 572 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 573 val = 0; 574 break; 575 } 576 NEXT; 577 count++; 578 } 579 if (RAW == ';') { 580 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 581 ctxt->nbChars ++; 582 ctxt->input->cur++; 583 } 584 } else if ((RAW == '&') && (NXT(1) == '#')) { 585 SKIP(2); 586 GROW; 587 while (RAW != ';') { /* loop blocked by count */ 588 if (count++ > 20) { 589 count = 0; 590 GROW; 591 } 592 if ((RAW >= '0') && (RAW <= '9')) 593 val = val * 10 + (CUR - '0'); 594 else { 595 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 597 ctxt->sax->error(ctxt->userData, 598 "xmlParseCharRef: invalid decimal value\n"); 599 ctxt->wellFormed = 0; 600 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 601 val = 0; 602 break; 603 } 604 NEXT; 605 count++; 606 } 607 if (RAW == ';') { 608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 609 ctxt->nbChars ++; 610 ctxt->input->cur++; 611 } 612 } else { 613 ctxt->errNo = XML_ERR_INVALID_CHARREF; 614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 615 ctxt->sax->error(ctxt->userData, 616 "xmlParseCharRef: invalid value\n"); 617 ctxt->wellFormed = 0; 618 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 619 } 620 621 /* 622 * [ WFC: Legal Character ] 623 * Characters referred to using character references must match the 624 * production for Char. 625 */ 626 if (IS_CHAR(val)) { 627 return(val); 628 } else { 629 ctxt->errNo = XML_ERR_INVALID_CHAR; 630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 631 ctxt->sax->error(ctxt->userData, 632 "xmlParseCharRef: invalid xmlChar value %d\n", 633 val); 634 ctxt->wellFormed = 0; 635 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 636 } 637 return(0); 638} 639 640/** 641 * xmlParseStringCharRef: 642 * @ctxt: an XML parser context 643 * @str: a pointer to an index in the string 644 * 645 * parse Reference declarations, variant parsing from a string rather 646 * than an an input flow. 647 * 648 * [66] CharRef ::= '&#' [0-9]+ ';' | 649 * '&#x' [0-9a-fA-F]+ ';' 650 * 651 * [ WFC: Legal Character ] 652 * Characters referred to using character references must match the 653 * production for Char. 654 * 655 * Returns the value parsed (as an int), 0 in case of error, str will be 656 * updated to the current value of the index 657 */ 658static int 659xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 660 const xmlChar *ptr; 661 xmlChar cur; 662 int val = 0; 663 664 if ((str == NULL) || (*str == NULL)) return(0); 665 ptr = *str; 666 cur = *ptr; 667 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 668 ptr += 3; 669 cur = *ptr; 670 while (cur != ';') { /* Non input consuming loop */ 671 if ((cur >= '0') && (cur <= '9')) 672 val = val * 16 + (cur - '0'); 673 else if ((cur >= 'a') && (cur <= 'f')) 674 val = val * 16 + (cur - 'a') + 10; 675 else if ((cur >= 'A') && (cur <= 'F')) 676 val = val * 16 + (cur - 'A') + 10; 677 else { 678 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 680 ctxt->sax->error(ctxt->userData, 681 "xmlParseStringCharRef: invalid hexadecimal value\n"); 682 ctxt->wellFormed = 0; 683 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 684 val = 0; 685 break; 686 } 687 ptr++; 688 cur = *ptr; 689 } 690 if (cur == ';') 691 ptr++; 692 } else if ((cur == '&') && (ptr[1] == '#')){ 693 ptr += 2; 694 cur = *ptr; 695 while (cur != ';') { /* Non input consuming loops */ 696 if ((cur >= '0') && (cur <= '9')) 697 val = val * 10 + (cur - '0'); 698 else { 699 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 701 ctxt->sax->error(ctxt->userData, 702 "xmlParseStringCharRef: invalid decimal value\n"); 703 ctxt->wellFormed = 0; 704 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 705 val = 0; 706 break; 707 } 708 ptr++; 709 cur = *ptr; 710 } 711 if (cur == ';') 712 ptr++; 713 } else { 714 ctxt->errNo = XML_ERR_INVALID_CHARREF; 715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 716 ctxt->sax->error(ctxt->userData, 717 "xmlParseStringCharRef: invalid value\n"); 718 ctxt->wellFormed = 0; 719 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 720 return(0); 721 } 722 *str = ptr; 723 724 /* 725 * [ WFC: Legal Character ] 726 * Characters referred to using character references must match the 727 * production for Char. 728 */ 729 if (IS_CHAR(val)) { 730 return(val); 731 } else { 732 ctxt->errNo = XML_ERR_INVALID_CHAR; 733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 734 ctxt->sax->error(ctxt->userData, 735 "xmlParseStringCharRef: invalid xmlChar value %d\n", val); 736 ctxt->wellFormed = 0; 737 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 738 } 739 return(0); 740} 741 742/** 743 * xmlNewBlanksWrapperInputStream: 744 * @ctxt: an XML parser context 745 * @entity: an Entity pointer 746 * 747 * Create a new input stream for wrapping 748 * blanks around a PEReference 749 * 750 * Returns the new input stream or NULL 751 */ 752 753static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 754 755static xmlParserInputPtr 756xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 757 xmlParserInputPtr input; 758 xmlChar *buffer; 759 size_t length; 760 if (entity == NULL) { 761 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 763 ctxt->sax->error(ctxt->userData, 764 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n"); 765 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 766 return(NULL); 767 } 768 if (xmlParserDebugEntities) 769 xmlGenericError(xmlGenericErrorContext, 770 "new blanks wrapper for entity: %s\n", entity->name); 771 input = xmlNewInputStream(ctxt); 772 if (input == NULL) { 773 return(NULL); 774 } 775 length = xmlStrlen(entity->name) + 5; 776 buffer = xmlMalloc(length); 777 if (buffer == NULL) { 778 return(NULL); 779 } 780 buffer [0] = ' '; 781 buffer [1] = '%'; 782 buffer [length-3] = ';'; 783 buffer [length-2] = ' '; 784 buffer [length-1] = 0; 785 memcpy(buffer + 2, entity->name, length - 5); 786 input->free = deallocblankswrapper; 787 input->base = buffer; 788 input->cur = buffer; 789 input->length = length; 790 input->end = &buffer[length]; 791 return(input); 792} 793 794/** 795 * xmlParserHandlePEReference: 796 * @ctxt: the parser context 797 * 798 * [69] PEReference ::= '%' Name ';' 799 * 800 * [ WFC: No Recursion ] 801 * A parsed entity must not contain a recursive 802 * reference to itself, either directly or indirectly. 803 * 804 * [ WFC: Entity Declared ] 805 * In a document without any DTD, a document with only an internal DTD 806 * subset which contains no parameter entity references, or a document 807 * with "standalone='yes'", ... ... The declaration of a parameter 808 * entity must precede any reference to it... 809 * 810 * [ VC: Entity Declared ] 811 * In a document with an external subset or external parameter entities 812 * with "standalone='no'", ... ... The declaration of a parameter entity 813 * must precede any reference to it... 814 * 815 * [ WFC: In DTD ] 816 * Parameter-entity references may only appear in the DTD. 817 * NOTE: misleading but this is handled. 818 * 819 * A PEReference may have been detected in the current input stream 820 * the handling is done accordingly to 821 * http://www.w3.org/TR/REC-xml#entproc 822 * i.e. 823 * - Included in literal in entity values 824 * - Included as Parameter Entity reference within DTDs 825 */ 826void 827xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 828 xmlChar *name; 829 xmlEntityPtr entity = NULL; 830 xmlParserInputPtr input; 831 832 if (RAW != '%') return; 833 switch(ctxt->instate) { 834 case XML_PARSER_CDATA_SECTION: 835 return; 836 case XML_PARSER_COMMENT: 837 return; 838 case XML_PARSER_START_TAG: 839 return; 840 case XML_PARSER_END_TAG: 841 return; 842 case XML_PARSER_EOF: 843 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 845 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 846 ctxt->wellFormed = 0; 847 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 848 return; 849 case XML_PARSER_PROLOG: 850 case XML_PARSER_START: 851 case XML_PARSER_MISC: 852 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 854 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 855 ctxt->wellFormed = 0; 856 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 857 return; 858 case XML_PARSER_ENTITY_DECL: 859 case XML_PARSER_CONTENT: 860 case XML_PARSER_ATTRIBUTE_VALUE: 861 case XML_PARSER_PI: 862 case XML_PARSER_SYSTEM_LITERAL: 863 case XML_PARSER_PUBLIC_LITERAL: 864 /* we just ignore it there */ 865 return; 866 case XML_PARSER_EPILOG: 867 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 869 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 870 ctxt->wellFormed = 0; 871 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 872 return; 873 case XML_PARSER_ENTITY_VALUE: 874 /* 875 * NOTE: in the case of entity values, we don't do the 876 * substitution here since we need the literal 877 * entity value to be able to save the internal 878 * subset of the document. 879 * This will be handled by xmlStringDecodeEntities 880 */ 881 return; 882 case XML_PARSER_DTD: 883 /* 884 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 885 * In the internal DTD subset, parameter-entity references 886 * can occur only where markup declarations can occur, not 887 * within markup declarations. 888 * In that case this is handled in xmlParseMarkupDecl 889 */ 890 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 891 return; 892 if (IS_BLANK(NXT(1)) || NXT(1) == 0) 893 return; 894 break; 895 case XML_PARSER_IGNORE: 896 return; 897 } 898 899 NEXT; 900 name = xmlParseName(ctxt); 901 if (xmlParserDebugEntities) 902 xmlGenericError(xmlGenericErrorContext, 903 "PEReference: %s\n", name); 904 if (name == NULL) { 905 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 907 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n"); 908 ctxt->wellFormed = 0; 909 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 910 } else { 911 if (RAW == ';') { 912 NEXT; 913 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 914 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 915 if (entity == NULL) { 916 917 /* 918 * [ WFC: Entity Declared ] 919 * In a document without any DTD, a document with only an 920 * internal DTD subset which contains no parameter entity 921 * references, or a document with "standalone='yes'", ... 922 * ... The declaration of a parameter entity must precede 923 * any reference to it... 924 */ 925 if ((ctxt->standalone == 1) || 926 ((ctxt->hasExternalSubset == 0) && 927 (ctxt->hasPErefs == 0))) { 928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 929 ctxt->sax->error(ctxt->userData, 930 "PEReference: %%%s; not found\n", name); 931 ctxt->wellFormed = 0; 932 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 933 } else { 934 /* 935 * [ VC: Entity Declared ] 936 * In a document with an external subset or external 937 * parameter entities with "standalone='no'", ... 938 * ... The declaration of a parameter entity must precede 939 * any reference to it... 940 */ 941 if ((!ctxt->disableSAX) && 942 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 943 ctxt->vctxt.error(ctxt->vctxt.userData, 944 "PEReference: %%%s; not found\n", name); 945 } else if ((!ctxt->disableSAX) && 946 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 947 ctxt->sax->warning(ctxt->userData, 948 "PEReference: %%%s; not found\n", name); 949 ctxt->valid = 0; 950 } 951 } else if (ctxt->input->free != deallocblankswrapper) { 952 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 953 xmlPushInput(ctxt, input); 954 } else { 955 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 956 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 957 xmlChar start[4]; 958 xmlCharEncoding enc; 959 960 /* 961 * handle the extra spaces added before and after 962 * c.f. http://www.w3.org/TR/REC-xml#as-PE 963 * this is done independently. 964 */ 965 input = xmlNewEntityInputStream(ctxt, entity); 966 xmlPushInput(ctxt, input); 967 968 /* 969 * Get the 4 first bytes and decode the charset 970 * if enc != XML_CHAR_ENCODING_NONE 971 * plug some encoding conversion routines. 972 */ 973 GROW 974 if (entity->length >= 4) { 975 start[0] = RAW; 976 start[1] = NXT(1); 977 start[2] = NXT(2); 978 start[3] = NXT(3); 979 enc = xmlDetectCharEncoding(start, 4); 980 if (enc != XML_CHAR_ENCODING_NONE) { 981 xmlSwitchEncoding(ctxt, enc); 982 } 983 } 984 985 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 986 (RAW == '<') && (NXT(1) == '?') && 987 (NXT(2) == 'x') && (NXT(3) == 'm') && 988 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 989 xmlParseTextDecl(ctxt); 990 } 991 } else { 992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 993 ctxt->sax->error(ctxt->userData, 994 "xmlParserHandlePEReference: %s is not a parameter entity\n", 995 name); 996 ctxt->wellFormed = 0; 997 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 998 } 999 } 1000 } else { 1001 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 1002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1003 ctxt->sax->error(ctxt->userData, 1004 "xmlParserHandlePEReference: expecting ';'\n"); 1005 ctxt->wellFormed = 0; 1006 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 1007 } 1008 xmlFree(name); 1009 } 1010} 1011 1012/* 1013 * Macro used to grow the current buffer. 1014 */ 1015#define growBuffer(buffer) { \ 1016 buffer##_size *= 2; \ 1017 buffer = (xmlChar *) \ 1018 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1019 if (buffer == NULL) { \ 1020 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ 1021 return(NULL); \ 1022 } \ 1023} 1024 1025/** 1026 * xmlStringDecodeEntities: 1027 * @ctxt: the parser context 1028 * @str: the input string 1029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1030 * @end: an end marker xmlChar, 0 if none 1031 * @end2: an end marker xmlChar, 0 if none 1032 * @end3: an end marker xmlChar, 0 if none 1033 * 1034 * Takes a entity string content and process to do the adequate substitutions. 1035 * 1036 * [67] Reference ::= EntityRef | CharRef 1037 * 1038 * [69] PEReference ::= '%' Name ';' 1039 * 1040 * Returns A newly allocated string with the substitution done. The caller 1041 * must deallocate it ! 1042 */ 1043xmlChar * 1044xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 1045 xmlChar end, xmlChar end2, xmlChar end3) { 1046 xmlChar *buffer = NULL; 1047 int buffer_size = 0; 1048 1049 xmlChar *current = NULL; 1050 xmlEntityPtr ent; 1051 int c,l; 1052 int nbchars = 0; 1053 1054 if (str == NULL) 1055 return(NULL); 1056 1057 if (ctxt->depth > 40) { 1058 ctxt->errNo = XML_ERR_ENTITY_LOOP; 1059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1060 ctxt->sax->error(ctxt->userData, 1061 "Detected entity reference loop\n"); 1062 ctxt->wellFormed = 0; 1063 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 1064 return(NULL); 1065 } 1066 1067 /* 1068 * allocate a translation buffer. 1069 */ 1070 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1071 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 1072 if (buffer == NULL) { 1073 xmlGenericError(xmlGenericErrorContext, 1074 "xmlStringDecodeEntities: malloc failed"); 1075 return(NULL); 1076 } 1077 1078 /* 1079 * OK loop until we reach one of the ending char or a size limit. 1080 * we are operating on already parsed values. 1081 */ 1082 c = CUR_SCHAR(str, l); 1083 while ((c != 0) && (c != end) && /* non input consuming loop */ 1084 (c != end2) && (c != end3)) { 1085 1086 if (c == 0) break; 1087 if ((c == '&') && (str[1] == '#')) { 1088 int val = xmlParseStringCharRef(ctxt, &str); 1089 if (val != 0) { 1090 COPY_BUF(0,buffer,nbchars,val); 1091 } 1092 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1093 if (xmlParserDebugEntities) 1094 xmlGenericError(xmlGenericErrorContext, 1095 "String decoding Entity Reference: %.30s\n", 1096 str); 1097 ent = xmlParseStringEntityRef(ctxt, &str); 1098 if ((ent != NULL) && 1099 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1100 if (ent->content != NULL) { 1101 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1102 } else { 1103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1104 ctxt->sax->error(ctxt->userData, 1105 "internal error entity has no content\n"); 1106 } 1107 } else if ((ent != NULL) && (ent->content != NULL)) { 1108 xmlChar *rep; 1109 1110 ctxt->depth++; 1111 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1112 0, 0, 0); 1113 ctxt->depth--; 1114 if (rep != NULL) { 1115 current = rep; 1116 while (*current != 0) { /* non input consuming loop */ 1117 buffer[nbchars++] = *current++; 1118 if (nbchars > 1119 buffer_size - XML_PARSER_BUFFER_SIZE) { 1120 growBuffer(buffer); 1121 } 1122 } 1123 xmlFree(rep); 1124 } 1125 } else if (ent != NULL) { 1126 int i = xmlStrlen(ent->name); 1127 const xmlChar *cur = ent->name; 1128 1129 buffer[nbchars++] = '&'; 1130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1131 growBuffer(buffer); 1132 } 1133 for (;i > 0;i--) 1134 buffer[nbchars++] = *cur++; 1135 buffer[nbchars++] = ';'; 1136 } 1137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1138 if (xmlParserDebugEntities) 1139 xmlGenericError(xmlGenericErrorContext, 1140 "String decoding PE Reference: %.30s\n", str); 1141 ent = xmlParseStringPEReference(ctxt, &str); 1142 if (ent != NULL) { 1143 xmlChar *rep; 1144 1145 ctxt->depth++; 1146 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1147 0, 0, 0); 1148 ctxt->depth--; 1149 if (rep != NULL) { 1150 current = rep; 1151 while (*current != 0) { /* non input consuming loop */ 1152 buffer[nbchars++] = *current++; 1153 if (nbchars > 1154 buffer_size - XML_PARSER_BUFFER_SIZE) { 1155 growBuffer(buffer); 1156 } 1157 } 1158 xmlFree(rep); 1159 } 1160 } 1161 } else { 1162 COPY_BUF(l,buffer,nbchars,c); 1163 str += l; 1164 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1165 growBuffer(buffer); 1166 } 1167 } 1168 c = CUR_SCHAR(str, l); 1169 } 1170 buffer[nbchars++] = 0; 1171 return(buffer); 1172} 1173 1174 1175/************************************************************************ 1176 * * 1177 * Commodity functions to handle xmlChars * 1178 * * 1179 ************************************************************************/ 1180 1181/** 1182 * xmlStrndup: 1183 * @cur: the input xmlChar * 1184 * @len: the len of @cur 1185 * 1186 * a strndup for array of xmlChar's 1187 * 1188 * Returns a new xmlChar * or NULL 1189 */ 1190xmlChar * 1191xmlStrndup(const xmlChar *cur, int len) { 1192 xmlChar *ret; 1193 1194 if ((cur == NULL) || (len < 0)) return(NULL); 1195 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1196 if (ret == NULL) { 1197 xmlGenericError(xmlGenericErrorContext, 1198 "malloc of %ld byte failed\n", 1199 (len + 1) * (long)sizeof(xmlChar)); 1200 return(NULL); 1201 } 1202 memcpy(ret, cur, len * sizeof(xmlChar)); 1203 ret[len] = 0; 1204 return(ret); 1205} 1206 1207/** 1208 * xmlStrdup: 1209 * @cur: the input xmlChar * 1210 * 1211 * a strdup for array of xmlChar's. Since they are supposed to be 1212 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1213 * a termination mark of '0'. 1214 * 1215 * Returns a new xmlChar * or NULL 1216 */ 1217xmlChar * 1218xmlStrdup(const xmlChar *cur) { 1219 const xmlChar *p = cur; 1220 1221 if (cur == NULL) return(NULL); 1222 while (*p != 0) p++; /* non input consuming */ 1223 return(xmlStrndup(cur, p - cur)); 1224} 1225 1226/** 1227 * xmlCharStrndup: 1228 * @cur: the input char * 1229 * @len: the len of @cur 1230 * 1231 * a strndup for char's to xmlChar's 1232 * 1233 * Returns a new xmlChar * or NULL 1234 */ 1235 1236xmlChar * 1237xmlCharStrndup(const char *cur, int len) { 1238 int i; 1239 xmlChar *ret; 1240 1241 if ((cur == NULL) || (len < 0)) return(NULL); 1242 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1243 if (ret == NULL) { 1244 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1245 (len + 1) * (long)sizeof(xmlChar)); 1246 return(NULL); 1247 } 1248 for (i = 0;i < len;i++) 1249 ret[i] = (xmlChar) cur[i]; 1250 ret[len] = 0; 1251 return(ret); 1252} 1253 1254/** 1255 * xmlCharStrdup: 1256 * @cur: the input char * 1257 * 1258 * a strdup for char's to xmlChar's 1259 * 1260 * Returns a new xmlChar * or NULL 1261 */ 1262 1263xmlChar * 1264xmlCharStrdup(const char *cur) { 1265 const char *p = cur; 1266 1267 if (cur == NULL) return(NULL); 1268 while (*p != '\0') p++; /* non input consuming */ 1269 return(xmlCharStrndup(cur, p - cur)); 1270} 1271 1272/** 1273 * xmlStrcmp: 1274 * @str1: the first xmlChar * 1275 * @str2: the second xmlChar * 1276 * 1277 * a strcmp for xmlChar's 1278 * 1279 * Returns the integer result of the comparison 1280 */ 1281 1282int 1283xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1284 register int tmp; 1285 1286 if (str1 == str2) return(0); 1287 if (str1 == NULL) return(-1); 1288 if (str2 == NULL) return(1); 1289 do { 1290 tmp = *str1++ - *str2; 1291 if (tmp != 0) return(tmp); 1292 } while (*str2++ != 0); 1293 return 0; 1294} 1295 1296/** 1297 * xmlStrEqual: 1298 * @str1: the first xmlChar * 1299 * @str2: the second xmlChar * 1300 * 1301 * Check if both string are equal of have same content 1302 * Should be a bit more readable and faster than xmlStrEqual() 1303 * 1304 * Returns 1 if they are equal, 0 if they are different 1305 */ 1306 1307int 1308xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1309 if (str1 == str2) return(1); 1310 if (str1 == NULL) return(0); 1311 if (str2 == NULL) return(0); 1312 do { 1313 if (*str1++ != *str2) return(0); 1314 } while (*str2++); 1315 return(1); 1316} 1317 1318/** 1319 * xmlStrncmp: 1320 * @str1: the first xmlChar * 1321 * @str2: the second xmlChar * 1322 * @len: the max comparison length 1323 * 1324 * a strncmp for xmlChar's 1325 * 1326 * Returns the integer result of the comparison 1327 */ 1328 1329int 1330xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1331 register int tmp; 1332 1333 if (len <= 0) return(0); 1334 if (str1 == str2) return(0); 1335 if (str1 == NULL) return(-1); 1336 if (str2 == NULL) return(1); 1337 do { 1338 tmp = *str1++ - *str2; 1339 if (tmp != 0 || --len == 0) return(tmp); 1340 } while (*str2++ != 0); 1341 return 0; 1342} 1343 1344static const xmlChar casemap[256] = { 1345 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1346 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1347 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1348 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1349 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1350 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1351 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1352 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1353 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1354 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1355 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1356 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1357 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1360 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1361 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1362 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1363 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1364 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1365 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1366 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1367 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1368 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1369 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1370 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1371 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1372 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1373 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1374 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1375 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1376 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1377}; 1378 1379/** 1380 * xmlStrcasecmp: 1381 * @str1: the first xmlChar * 1382 * @str2: the second xmlChar * 1383 * 1384 * a strcasecmp for xmlChar's 1385 * 1386 * Returns the integer result of the comparison 1387 */ 1388 1389int 1390xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1391 register int tmp; 1392 1393 if (str1 == str2) return(0); 1394 if (str1 == NULL) return(-1); 1395 if (str2 == NULL) return(1); 1396 do { 1397 tmp = casemap[*str1++] - casemap[*str2]; 1398 if (tmp != 0) return(tmp); 1399 } while (*str2++ != 0); 1400 return 0; 1401} 1402 1403/** 1404 * xmlStrncasecmp: 1405 * @str1: the first xmlChar * 1406 * @str2: the second xmlChar * 1407 * @len: the max comparison length 1408 * 1409 * a strncasecmp for xmlChar's 1410 * 1411 * Returns the integer result of the comparison 1412 */ 1413 1414int 1415xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1416 register int tmp; 1417 1418 if (len <= 0) return(0); 1419 if (str1 == str2) return(0); 1420 if (str1 == NULL) return(-1); 1421 if (str2 == NULL) return(1); 1422 do { 1423 tmp = casemap[*str1++] - casemap[*str2]; 1424 if (tmp != 0 || --len == 0) return(tmp); 1425 } while (*str2++ != 0); 1426 return 0; 1427} 1428 1429/** 1430 * xmlStrchr: 1431 * @str: the xmlChar * array 1432 * @val: the xmlChar to search 1433 * 1434 * a strchr for xmlChar's 1435 * 1436 * Returns the xmlChar * for the first occurrence or NULL. 1437 */ 1438 1439const xmlChar * 1440xmlStrchr(const xmlChar *str, xmlChar val) { 1441 if (str == NULL) return(NULL); 1442 while (*str != 0) { /* non input consuming */ 1443 if (*str == val) return((xmlChar *) str); 1444 str++; 1445 } 1446 return(NULL); 1447} 1448 1449/** 1450 * xmlStrstr: 1451 * @str: the xmlChar * array (haystack) 1452 * @val: the xmlChar to search (needle) 1453 * 1454 * a strstr for xmlChar's 1455 * 1456 * Returns the xmlChar * for the first occurrence or NULL. 1457 */ 1458 1459const xmlChar * 1460xmlStrstr(const xmlChar *str, const xmlChar *val) { 1461 int n; 1462 1463 if (str == NULL) return(NULL); 1464 if (val == NULL) return(NULL); 1465 n = xmlStrlen(val); 1466 1467 if (n == 0) return(str); 1468 while (*str != 0) { /* non input consuming */ 1469 if (*str == *val) { 1470 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1471 } 1472 str++; 1473 } 1474 return(NULL); 1475} 1476 1477/** 1478 * xmlStrcasestr: 1479 * @str: the xmlChar * array (haystack) 1480 * @val: the xmlChar to search (needle) 1481 * 1482 * a case-ignoring strstr for xmlChar's 1483 * 1484 * Returns the xmlChar * for the first occurrence or NULL. 1485 */ 1486 1487const xmlChar * 1488xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1489 int n; 1490 1491 if (str == NULL) return(NULL); 1492 if (val == NULL) return(NULL); 1493 n = xmlStrlen(val); 1494 1495 if (n == 0) return(str); 1496 while (*str != 0) { /* non input consuming */ 1497 if (casemap[*str] == casemap[*val]) 1498 if (!xmlStrncasecmp(str, val, n)) return(str); 1499 str++; 1500 } 1501 return(NULL); 1502} 1503 1504/** 1505 * xmlStrsub: 1506 * @str: the xmlChar * array (haystack) 1507 * @start: the index of the first char (zero based) 1508 * @len: the length of the substring 1509 * 1510 * Extract a substring of a given string 1511 * 1512 * Returns the xmlChar * for the first occurrence or NULL. 1513 */ 1514 1515xmlChar * 1516xmlStrsub(const xmlChar *str, int start, int len) { 1517 int i; 1518 1519 if (str == NULL) return(NULL); 1520 if (start < 0) return(NULL); 1521 if (len < 0) return(NULL); 1522 1523 for (i = 0;i < start;i++) { 1524 if (*str == 0) return(NULL); 1525 str++; 1526 } 1527 if (*str == 0) return(NULL); 1528 return(xmlStrndup(str, len)); 1529} 1530 1531/** 1532 * xmlStrlen: 1533 * @str: the xmlChar * array 1534 * 1535 * length of a xmlChar's string 1536 * 1537 * Returns the number of xmlChar contained in the ARRAY. 1538 */ 1539 1540int 1541xmlStrlen(const xmlChar *str) { 1542 int len = 0; 1543 1544 if (str == NULL) return(0); 1545 while (*str != 0) { /* non input consuming */ 1546 str++; 1547 len++; 1548 } 1549 return(len); 1550} 1551 1552/** 1553 * xmlStrncat: 1554 * @cur: the original xmlChar * array 1555 * @add: the xmlChar * array added 1556 * @len: the length of @add 1557 * 1558 * a strncat for array of xmlChar's, it will extend @cur with the len 1559 * first bytes of @add. 1560 * 1561 * Returns a new xmlChar *, the original @cur is reallocated if needed 1562 * and should not be freed 1563 */ 1564 1565xmlChar * 1566xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1567 int size; 1568 xmlChar *ret; 1569 1570 if ((add == NULL) || (len == 0)) 1571 return(cur); 1572 if (cur == NULL) 1573 return(xmlStrndup(add, len)); 1574 1575 size = xmlStrlen(cur); 1576 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1577 if (ret == NULL) { 1578 xmlGenericError(xmlGenericErrorContext, 1579 "xmlStrncat: realloc of %ld byte failed\n", 1580 (size + len + 1) * (long)sizeof(xmlChar)); 1581 return(cur); 1582 } 1583 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1584 ret[size + len] = 0; 1585 return(ret); 1586} 1587 1588/** 1589 * xmlStrcat: 1590 * @cur: the original xmlChar * array 1591 * @add: the xmlChar * array added 1592 * 1593 * a strcat for array of xmlChar's. Since they are supposed to be 1594 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1595 * a termination mark of '0'. 1596 * 1597 * Returns a new xmlChar * containing the concatenated string. 1598 */ 1599xmlChar * 1600xmlStrcat(xmlChar *cur, const xmlChar *add) { 1601 const xmlChar *p = add; 1602 1603 if (add == NULL) return(cur); 1604 if (cur == NULL) 1605 return(xmlStrdup(add)); 1606 1607 while (*p != 0) p++; /* non input consuming */ 1608 return(xmlStrncat(cur, add, p - add)); 1609} 1610 1611/************************************************************************ 1612 * * 1613 * Commodity functions, cleanup needed ? * 1614 * * 1615 ************************************************************************/ 1616 1617/** 1618 * areBlanks: 1619 * @ctxt: an XML parser context 1620 * @str: a xmlChar * 1621 * @len: the size of @str 1622 * 1623 * Is this a sequence of blank chars that one can ignore ? 1624 * 1625 * Returns 1 if ignorable 0 otherwise. 1626 */ 1627 1628static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1629 int i, ret; 1630 xmlNodePtr lastChild; 1631 1632 /* 1633 * Don't spend time trying to differentiate them, the same callback is 1634 * used ! 1635 */ 1636 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 1637 return(0); 1638 1639 /* 1640 * Check for xml:space value. 1641 */ 1642 if (*(ctxt->space) == 1) 1643 return(0); 1644 1645 /* 1646 * Check that the string is made of blanks 1647 */ 1648 for (i = 0;i < len;i++) 1649 if (!(IS_BLANK(str[i]))) return(0); 1650 1651 /* 1652 * Look if the element is mixed content in the DTD if available 1653 */ 1654 if (ctxt->node == NULL) return(0); 1655 if (ctxt->myDoc != NULL) { 1656 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1657 if (ret == 0) return(1); 1658 if (ret == 1) return(0); 1659 } 1660 1661 /* 1662 * Otherwise, heuristic :-\ 1663 */ 1664 if (RAW != '<') return(0); 1665 if ((ctxt->node->children == NULL) && 1666 (RAW == '<') && (NXT(1) == '/')) return(0); 1667 1668 lastChild = xmlGetLastChild(ctxt->node); 1669 if (lastChild == NULL) { 1670 if ((ctxt->node->type != XML_ELEMENT_NODE) && 1671 (ctxt->node->content != NULL)) return(0); 1672 } else if (xmlNodeIsText(lastChild)) 1673 return(0); 1674 else if ((ctxt->node->children != NULL) && 1675 (xmlNodeIsText(ctxt->node->children))) 1676 return(0); 1677 return(1); 1678} 1679 1680/************************************************************************ 1681 * * 1682 * Extra stuff for namespace support * 1683 * Relates to http://www.w3.org/TR/WD-xml-names * 1684 * * 1685 ************************************************************************/ 1686 1687/** 1688 * xmlSplitQName: 1689 * @ctxt: an XML parser context 1690 * @name: an XML parser context 1691 * @prefix: a xmlChar ** 1692 * 1693 * parse an UTF8 encoded XML qualified name string 1694 * 1695 * [NS 5] QName ::= (Prefix ':')? LocalPart 1696 * 1697 * [NS 6] Prefix ::= NCName 1698 * 1699 * [NS 7] LocalPart ::= NCName 1700 * 1701 * Returns the local part, and prefix is updated 1702 * to get the Prefix if any. 1703 */ 1704 1705xmlChar * 1706xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1707 xmlChar buf[XML_MAX_NAMELEN + 5]; 1708 xmlChar *buffer = NULL; 1709 int len = 0; 1710 int max = XML_MAX_NAMELEN; 1711 xmlChar *ret = NULL; 1712 const xmlChar *cur = name; 1713 int c; 1714 1715 *prefix = NULL; 1716 1717#ifndef XML_XML_NAMESPACE 1718 /* xml: prefix is not really a namespace */ 1719 if ((cur[0] == 'x') && (cur[1] == 'm') && 1720 (cur[2] == 'l') && (cur[3] == ':')) 1721 return(xmlStrdup(name)); 1722#endif 1723 1724 /* nasty but valid */ 1725 if (cur[0] == ':') 1726 return(xmlStrdup(name)); 1727 1728 c = *cur++; 1729 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1730 buf[len++] = c; 1731 c = *cur++; 1732 } 1733 if (len >= max) { 1734 /* 1735 * Okay someone managed to make a huge name, so he's ready to pay 1736 * for the processing speed. 1737 */ 1738 max = len * 2; 1739 1740 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1741 if (buffer == NULL) { 1742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1743 ctxt->sax->error(ctxt->userData, 1744 "xmlSplitQName: out of memory\n"); 1745 return(NULL); 1746 } 1747 memcpy(buffer, buf, len); 1748 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1749 if (len + 10 > max) { 1750 max *= 2; 1751 buffer = (xmlChar *) xmlRealloc(buffer, 1752 max * sizeof(xmlChar)); 1753 if (buffer == NULL) { 1754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1755 ctxt->sax->error(ctxt->userData, 1756 "xmlSplitQName: out of memory\n"); 1757 return(NULL); 1758 } 1759 } 1760 buffer[len++] = c; 1761 c = *cur++; 1762 } 1763 buffer[len] = 0; 1764 } 1765 1766 if (buffer == NULL) 1767 ret = xmlStrndup(buf, len); 1768 else { 1769 ret = buffer; 1770 buffer = NULL; 1771 max = XML_MAX_NAMELEN; 1772 } 1773 1774 1775 if (c == ':') { 1776 c = *cur; 1777 if (c == 0) return(ret); 1778 *prefix = ret; 1779 len = 0; 1780 1781 /* 1782 * Check that the first character is proper to start 1783 * a new name 1784 */ 1785 if (!(((c >= 0x61) && (c <= 0x7A)) || 1786 ((c >= 0x41) && (c <= 0x5A)) || 1787 (c == '_') || (c == ':'))) { 1788 int l; 1789 int first = CUR_SCHAR(cur, l); 1790 1791 if (!IS_LETTER(first) && (first != '_')) { 1792 if ((ctxt != NULL) && (ctxt->sax != NULL) && 1793 (ctxt->sax->error != NULL)) 1794 ctxt->sax->error(ctxt->userData, 1795 "Name %s is not XML Namespace compliant\n", 1796 name); 1797 } 1798 } 1799 cur++; 1800 1801 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1802 buf[len++] = c; 1803 c = *cur++; 1804 } 1805 if (len >= max) { 1806 /* 1807 * Okay someone managed to make a huge name, so he's ready to pay 1808 * for the processing speed. 1809 */ 1810 max = len * 2; 1811 1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1813 if (buffer == NULL) { 1814 if ((ctxt != NULL) && (ctxt->sax != NULL) && 1815 (ctxt->sax->error != NULL)) 1816 ctxt->sax->error(ctxt->userData, 1817 "xmlSplitQName: out of memory\n"); 1818 return(NULL); 1819 } 1820 memcpy(buffer, buf, len); 1821 while (c != 0) { /* tested bigname2.xml */ 1822 if (len + 10 > max) { 1823 max *= 2; 1824 buffer = (xmlChar *) xmlRealloc(buffer, 1825 max * sizeof(xmlChar)); 1826 if (buffer == NULL) { 1827 if ((ctxt != NULL) && (ctxt->sax != NULL) && 1828 (ctxt->sax->error != NULL)) 1829 ctxt->sax->error(ctxt->userData, 1830 "xmlSplitQName: out of memory\n"); 1831 return(NULL); 1832 } 1833 } 1834 buffer[len++] = c; 1835 c = *cur++; 1836 } 1837 buffer[len] = 0; 1838 } 1839 1840 if (buffer == NULL) 1841 ret = xmlStrndup(buf, len); 1842 else { 1843 ret = buffer; 1844 } 1845 } 1846 1847 return(ret); 1848} 1849 1850/************************************************************************ 1851 * * 1852 * The parser itself * 1853 * Relates to http://www.w3.org/TR/REC-xml * 1854 * * 1855 ************************************************************************/ 1856 1857static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1858/** 1859 * xmlParseName: 1860 * @ctxt: an XML parser context 1861 * 1862 * parse an XML name. 1863 * 1864 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1865 * CombiningChar | Extender 1866 * 1867 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1868 * 1869 * [6] Names ::= Name (S Name)* 1870 * 1871 * Returns the Name parsed or NULL 1872 */ 1873 1874xmlChar * 1875xmlParseName(xmlParserCtxtPtr ctxt) { 1876 const xmlChar *in; 1877 xmlChar *ret; 1878 int count = 0; 1879 1880 GROW; 1881 1882 /* 1883 * Accelerator for simple ASCII names 1884 */ 1885 in = ctxt->input->cur; 1886 if (((*in >= 0x61) && (*in <= 0x7A)) || 1887 ((*in >= 0x41) && (*in <= 0x5A)) || 1888 (*in == '_') || (*in == ':')) { 1889 in++; 1890 while (((*in >= 0x61) && (*in <= 0x7A)) || 1891 ((*in >= 0x41) && (*in <= 0x5A)) || 1892 ((*in >= 0x30) && (*in <= 0x39)) || 1893 (*in == '_') || (*in == '-') || 1894 (*in == ':') || (*in == '.')) 1895 in++; 1896 if ((*in > 0) && (*in < 0x80)) { 1897 count = in - ctxt->input->cur; 1898 ret = xmlStrndup(ctxt->input->cur, count); 1899 ctxt->input->cur = in; 1900 return(ret); 1901 } 1902 } 1903 return(xmlParseNameComplex(ctxt)); 1904} 1905 1906/** 1907 * xmlParseNameAndCompare: 1908 * @ctxt: an XML parser context 1909 * 1910 * parse an XML name and compares for match 1911 * (specialized for endtag parsing) 1912 * 1913 * 1914 * Returns NULL for an illegal name, (xmlChar*) 1 for success 1915 * and the name for mismatch 1916 */ 1917 1918static xmlChar * 1919xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 1920 const xmlChar *cmp = other; 1921 const xmlChar *in; 1922 xmlChar *ret; 1923 1924 GROW; 1925 1926 in = ctxt->input->cur; 1927 while (*in != 0 && *in == *cmp) { 1928 ++in; 1929 ++cmp; 1930 } 1931 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { 1932 /* success */ 1933 ctxt->input->cur = in; 1934 return (xmlChar*) 1; 1935 } 1936 /* failure (or end of input buffer), check with full function */ 1937 ret = xmlParseName (ctxt); 1938 if (ret != 0 && xmlStrEqual (ret, other)) { 1939 xmlFree (ret); 1940 return (xmlChar*) 1; 1941 } 1942 return ret; 1943} 1944 1945static xmlChar * 1946xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1947 xmlChar buf[XML_MAX_NAMELEN + 5]; 1948 int len = 0, l; 1949 int c; 1950 int count = 0; 1951 1952 /* 1953 * Handler for more complex cases 1954 */ 1955 GROW; 1956 c = CUR_CHAR(l); 1957 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1958 (!IS_LETTER(c) && (c != '_') && 1959 (c != ':'))) { 1960 return(NULL); 1961 } 1962 1963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1964 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1965 (c == '.') || (c == '-') || 1966 (c == '_') || (c == ':') || 1967 (IS_COMBINING(c)) || 1968 (IS_EXTENDER(c)))) { 1969 if (count++ > 100) { 1970 count = 0; 1971 GROW; 1972 } 1973 COPY_BUF(l,buf,len,c); 1974 NEXTL(l); 1975 c = CUR_CHAR(l); 1976 if (len >= XML_MAX_NAMELEN) { 1977 /* 1978 * Okay someone managed to make a huge name, so he's ready to pay 1979 * for the processing speed. 1980 */ 1981 xmlChar *buffer; 1982 int max = len * 2; 1983 1984 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1985 if (buffer == NULL) { 1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1987 ctxt->sax->error(ctxt->userData, 1988 "xmlParseNameComplex: out of memory\n"); 1989 return(NULL); 1990 } 1991 memcpy(buffer, buf, len); 1992 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1993 (c == '.') || (c == '-') || 1994 (c == '_') || (c == ':') || 1995 (IS_COMBINING(c)) || 1996 (IS_EXTENDER(c))) { 1997 if (count++ > 100) { 1998 count = 0; 1999 GROW; 2000 } 2001 if (len + 10 > max) { 2002 max *= 2; 2003 buffer = (xmlChar *) xmlRealloc(buffer, 2004 max * sizeof(xmlChar)); 2005 if (buffer == NULL) { 2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2007 ctxt->sax->error(ctxt->userData, 2008 "xmlParseNameComplex: out of memory\n"); 2009 return(NULL); 2010 } 2011 } 2012 COPY_BUF(l,buffer,len,c); 2013 NEXTL(l); 2014 c = CUR_CHAR(l); 2015 } 2016 buffer[len] = 0; 2017 return(buffer); 2018 } 2019 } 2020 return(xmlStrndup(buf, len)); 2021} 2022 2023/** 2024 * xmlParseStringName: 2025 * @ctxt: an XML parser context 2026 * @str: a pointer to the string pointer (IN/OUT) 2027 * 2028 * parse an XML name. 2029 * 2030 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2031 * CombiningChar | Extender 2032 * 2033 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2034 * 2035 * [6] Names ::= Name (S Name)* 2036 * 2037 * Returns the Name parsed or NULL. The @str pointer 2038 * is updated to the current location in the string. 2039 */ 2040 2041static xmlChar * 2042xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2043 xmlChar buf[XML_MAX_NAMELEN + 5]; 2044 const xmlChar *cur = *str; 2045 int len = 0, l; 2046 int c; 2047 2048 c = CUR_SCHAR(cur, l); 2049 if (!IS_LETTER(c) && (c != '_') && 2050 (c != ':')) { 2051 return(NULL); 2052 } 2053 2054 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2055 (c == '.') || (c == '-') || 2056 (c == '_') || (c == ':') || 2057 (IS_COMBINING(c)) || 2058 (IS_EXTENDER(c))) { 2059 COPY_BUF(l,buf,len,c); 2060 cur += l; 2061 c = CUR_SCHAR(cur, l); 2062 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2063 /* 2064 * Okay someone managed to make a huge name, so he's ready to pay 2065 * for the processing speed. 2066 */ 2067 xmlChar *buffer; 2068 int max = len * 2; 2069 2070 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 2071 if (buffer == NULL) { 2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2073 ctxt->sax->error(ctxt->userData, 2074 "xmlParseStringName: out of memory\n"); 2075 return(NULL); 2076 } 2077 memcpy(buffer, buf, len); 2078 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2079 (c == '.') || (c == '-') || 2080 (c == '_') || (c == ':') || 2081 (IS_COMBINING(c)) || 2082 (IS_EXTENDER(c))) { 2083 if (len + 10 > max) { 2084 max *= 2; 2085 buffer = (xmlChar *) xmlRealloc(buffer, 2086 max * sizeof(xmlChar)); 2087 if (buffer == NULL) { 2088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2089 ctxt->sax->error(ctxt->userData, 2090 "xmlParseStringName: out of memory\n"); 2091 return(NULL); 2092 } 2093 } 2094 COPY_BUF(l,buffer,len,c); 2095 cur += l; 2096 c = CUR_SCHAR(cur, l); 2097 } 2098 buffer[len] = 0; 2099 *str = cur; 2100 return(buffer); 2101 } 2102 } 2103 *str = cur; 2104 return(xmlStrndup(buf, len)); 2105} 2106 2107/** 2108 * xmlParseNmtoken: 2109 * @ctxt: an XML parser context 2110 * 2111 * parse an XML Nmtoken. 2112 * 2113 * [7] Nmtoken ::= (NameChar)+ 2114 * 2115 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2116 * 2117 * Returns the Nmtoken parsed or NULL 2118 */ 2119 2120xmlChar * 2121xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2122 xmlChar buf[XML_MAX_NAMELEN + 5]; 2123 int len = 0, l; 2124 int c; 2125 int count = 0; 2126 2127 GROW; 2128 c = CUR_CHAR(l); 2129 2130 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2131 (c == '.') || (c == '-') || 2132 (c == '_') || (c == ':') || 2133 (IS_COMBINING(c)) || 2134 (IS_EXTENDER(c))) { 2135 if (count++ > 100) { 2136 count = 0; 2137 GROW; 2138 } 2139 COPY_BUF(l,buf,len,c); 2140 NEXTL(l); 2141 c = CUR_CHAR(l); 2142 if (len >= XML_MAX_NAMELEN) { 2143 /* 2144 * Okay someone managed to make a huge token, so he's ready to pay 2145 * for the processing speed. 2146 */ 2147 xmlChar *buffer; 2148 int max = len * 2; 2149 2150 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 2151 if (buffer == NULL) { 2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2153 ctxt->sax->error(ctxt->userData, 2154 "xmlParseNmtoken: out of memory\n"); 2155 return(NULL); 2156 } 2157 memcpy(buffer, buf, len); 2158 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2159 (c == '.') || (c == '-') || 2160 (c == '_') || (c == ':') || 2161 (IS_COMBINING(c)) || 2162 (IS_EXTENDER(c))) { 2163 if (count++ > 100) { 2164 count = 0; 2165 GROW; 2166 } 2167 if (len + 10 > max) { 2168 max *= 2; 2169 buffer = (xmlChar *) xmlRealloc(buffer, 2170 max * sizeof(xmlChar)); 2171 if (buffer == NULL) { 2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2173 ctxt->sax->error(ctxt->userData, 2174 "xmlParseNmtoken: out of memory\n"); 2175 return(NULL); 2176 } 2177 } 2178 COPY_BUF(l,buffer,len,c); 2179 NEXTL(l); 2180 c = CUR_CHAR(l); 2181 } 2182 buffer[len] = 0; 2183 return(buffer); 2184 } 2185 } 2186 if (len == 0) 2187 return(NULL); 2188 return(xmlStrndup(buf, len)); 2189} 2190 2191/** 2192 * xmlParseEntityValue: 2193 * @ctxt: an XML parser context 2194 * @orig: if non-NULL store a copy of the original entity value 2195 * 2196 * parse a value for ENTITY declarations 2197 * 2198 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2199 * "'" ([^%&'] | PEReference | Reference)* "'" 2200 * 2201 * Returns the EntityValue parsed with reference substituted or NULL 2202 */ 2203 2204xmlChar * 2205xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2206 xmlChar *buf = NULL; 2207 int len = 0; 2208 int size = XML_PARSER_BUFFER_SIZE; 2209 int c, l; 2210 xmlChar stop; 2211 xmlChar *ret = NULL; 2212 const xmlChar *cur = NULL; 2213 xmlParserInputPtr input; 2214 2215 if (RAW == '"') stop = '"'; 2216 else if (RAW == '\'') stop = '\''; 2217 else { 2218 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2220 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2221 ctxt->wellFormed = 0; 2222 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2223 return(NULL); 2224 } 2225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2226 if (buf == NULL) { 2227 xmlGenericError(xmlGenericErrorContext, 2228 "malloc of %d byte failed\n", size); 2229 return(NULL); 2230 } 2231 2232 /* 2233 * The content of the entity definition is copied in a buffer. 2234 */ 2235 2236 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2237 input = ctxt->input; 2238 GROW; 2239 NEXT; 2240 c = CUR_CHAR(l); 2241 /* 2242 * NOTE: 4.4.5 Included in Literal 2243 * When a parameter entity reference appears in a literal entity 2244 * value, ... a single or double quote character in the replacement 2245 * text is always treated as a normal data character and will not 2246 * terminate the literal. 2247 * In practice it means we stop the loop only when back at parsing 2248 * the initial entity and the quote is found 2249 */ 2250 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2251 (ctxt->input != input))) { 2252 if (len + 5 >= size) { 2253 size *= 2; 2254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2255 if (buf == NULL) { 2256 xmlGenericError(xmlGenericErrorContext, 2257 "realloc of %d byte failed\n", size); 2258 return(NULL); 2259 } 2260 } 2261 COPY_BUF(l,buf,len,c); 2262 NEXTL(l); 2263 /* 2264 * Pop-up of finished entities. 2265 */ 2266 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2267 xmlPopInput(ctxt); 2268 2269 GROW; 2270 c = CUR_CHAR(l); 2271 if (c == 0) { 2272 GROW; 2273 c = CUR_CHAR(l); 2274 } 2275 } 2276 buf[len] = 0; 2277 2278 /* 2279 * Raise problem w.r.t. '&' and '%' being used in non-entities 2280 * reference constructs. Note Charref will be handled in 2281 * xmlStringDecodeEntities() 2282 */ 2283 cur = buf; 2284 while (*cur != 0) { /* non input consuming */ 2285 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2286 xmlChar *name; 2287 xmlChar tmp = *cur; 2288 2289 cur++; 2290 name = xmlParseStringName(ctxt, &cur); 2291 if ((name == NULL) || (*cur != ';')) { 2292 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 2293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2294 ctxt->sax->error(ctxt->userData, 2295 "EntityValue: '%c' forbidden except for entities references\n", 2296 tmp); 2297 ctxt->wellFormed = 0; 2298 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2299 } 2300 if ((tmp == '%') && (ctxt->inSubset == 1) && 2301 (ctxt->inputNr == 1)) { 2302 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2304 ctxt->sax->error(ctxt->userData, 2305 "EntityValue: PEReferences forbidden in internal subset\n", 2306 tmp); 2307 ctxt->wellFormed = 0; 2308 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2309 } 2310 if (name != NULL) 2311 xmlFree(name); 2312 } 2313 cur++; 2314 } 2315 2316 /* 2317 * Then PEReference entities are substituted. 2318 */ 2319 if (c != stop) { 2320 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2322 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2323 ctxt->wellFormed = 0; 2324 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2325 xmlFree(buf); 2326 } else { 2327 NEXT; 2328 /* 2329 * NOTE: 4.4.7 Bypassed 2330 * When a general entity reference appears in the EntityValue in 2331 * an entity declaration, it is bypassed and left as is. 2332 * so XML_SUBSTITUTE_REF is not set here. 2333 */ 2334 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2335 0, 0, 0); 2336 if (orig != NULL) 2337 *orig = buf; 2338 else 2339 xmlFree(buf); 2340 } 2341 2342 return(ret); 2343} 2344 2345/** 2346 * xmlParseAttValue: 2347 * @ctxt: an XML parser context 2348 * 2349 * parse a value for an attribute 2350 * Note: the parser won't do substitution of entities here, this 2351 * will be handled later in xmlStringGetNodeList 2352 * 2353 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2354 * "'" ([^<&'] | Reference)* "'" 2355 * 2356 * 3.3.3 Attribute-Value Normalization: 2357 * Before the value of an attribute is passed to the application or 2358 * checked for validity, the XML processor must normalize it as follows: 2359 * - a character reference is processed by appending the referenced 2360 * character to the attribute value 2361 * - an entity reference is processed by recursively processing the 2362 * replacement text of the entity 2363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2364 * appending #x20 to the normalized value, except that only a single 2365 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2366 * parsed entity or the literal entity value of an internal parsed entity 2367 * - other characters are processed by appending them to the normalized value 2368 * If the declared value is not CDATA, then the XML processor must further 2369 * process the normalized attribute value by discarding any leading and 2370 * trailing space (#x20) characters, and by replacing sequences of space 2371 * (#x20) characters by a single space (#x20) character. 2372 * All attributes for which no declaration has been read should be treated 2373 * by a non-validating parser as if declared CDATA. 2374 * 2375 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2376 */ 2377 2378xmlChar * 2379xmlParseAttValueComplex(xmlParserCtxtPtr ctxt); 2380 2381xmlChar * 2382xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2383 xmlChar limit = 0; 2384 const xmlChar *in = NULL; 2385 xmlChar *ret = NULL; 2386 SHRINK; 2387 GROW; 2388 in = (xmlChar *) CUR_PTR; 2389 if (*in != '"' && *in != '\'') { 2390 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2392 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2393 ctxt->wellFormed = 0; 2394 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2395 return(NULL); 2396 } 2397 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2398 limit = *in; 2399 ++in; 2400 2401 while (*in != limit && *in >= 0x20 && *in <= 0x7f && 2402 *in != '&' && *in != '<' 2403 ) { 2404 ++in; 2405 } 2406 if (*in != limit) { 2407 return xmlParseAttValueComplex(ctxt); 2408 } 2409 ++in; 2410 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2); 2411 CUR_PTR = in; 2412 return ret; 2413} 2414 2415/** 2416 * xmlParseAttValueComplex: 2417 * @ctxt: an XML parser context 2418 * 2419 * parse a value for an attribute, this is the fallback function 2420 * of xmlParseAttValue() when the attribute parsing requires handling 2421 * of non-ASCII characters. 2422 * 2423 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2424 */ 2425xmlChar * 2426xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) { 2427 xmlChar limit = 0; 2428 xmlChar *buf = NULL; 2429 int len = 0; 2430 int buf_size = 0; 2431 int c, l; 2432 xmlChar *current = NULL; 2433 xmlEntityPtr ent; 2434 2435 2436 SHRINK; 2437 if (NXT(0) == '"') { 2438 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2439 limit = '"'; 2440 NEXT; 2441 } else if (NXT(0) == '\'') { 2442 limit = '\''; 2443 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2444 NEXT; 2445 } else { 2446 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2448 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2449 ctxt->wellFormed = 0; 2450 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2451 return(NULL); 2452 } 2453 2454 /* 2455 * allocate a translation buffer. 2456 */ 2457 buf_size = XML_PARSER_BUFFER_SIZE; 2458 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2459 if (buf == NULL) { 2460 xmlGenericError(xmlGenericErrorContext, 2461 "xmlParseAttValue: malloc failed"); 2462 return(NULL); 2463 } 2464 2465 /* 2466 * OK loop until we reach one of the ending char or a size limit. 2467 */ 2468 c = CUR_CHAR(l); 2469 while ((NXT(0) != limit) && /* checked */ 2470 (c != '<')) { 2471 if (c == 0) break; 2472 if (c == '&') { 2473 if (NXT(1) == '#') { 2474 int val = xmlParseCharRef(ctxt); 2475 if (val == '&') { 2476 if (ctxt->replaceEntities) { 2477 if (len > buf_size - 10) { 2478 growBuffer(buf); 2479 } 2480 buf[len++] = '&'; 2481 } else { 2482 /* 2483 * The reparsing will be done in xmlStringGetNodeList() 2484 * called by the attribute() function in SAX.c 2485 */ 2486 static xmlChar buffer[6] = "&"; 2487 2488 if (len > buf_size - 10) { 2489 growBuffer(buf); 2490 } 2491 current = &buffer[0]; 2492 while (*current != 0) { /* non input consuming */ 2493 buf[len++] = *current++; 2494 } 2495 } 2496 } else { 2497 if (len > buf_size - 10) { 2498 growBuffer(buf); 2499 } 2500 len += xmlCopyChar(0, &buf[len], val); 2501 } 2502 } else { 2503 ent = xmlParseEntityRef(ctxt); 2504 if ((ent != NULL) && 2505 (ctxt->replaceEntities != 0)) { 2506 xmlChar *rep; 2507 2508 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2509 rep = xmlStringDecodeEntities(ctxt, ent->content, 2510 XML_SUBSTITUTE_REF, 0, 0, 0); 2511 if (rep != NULL) { 2512 current = rep; 2513 while (*current != 0) { /* non input consuming */ 2514 buf[len++] = *current++; 2515 if (len > buf_size - 10) { 2516 growBuffer(buf); 2517 } 2518 } 2519 xmlFree(rep); 2520 } 2521 } else { 2522 if (len > buf_size - 10) { 2523 growBuffer(buf); 2524 } 2525 if (ent->content != NULL) 2526 buf[len++] = ent->content[0]; 2527 } 2528 } else if (ent != NULL) { 2529 int i = xmlStrlen(ent->name); 2530 const xmlChar *cur = ent->name; 2531 2532 /* 2533 * This may look absurd but is needed to detect 2534 * entities problems 2535 */ 2536 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2537 (ent->content != NULL)) { 2538 xmlChar *rep; 2539 rep = xmlStringDecodeEntities(ctxt, ent->content, 2540 XML_SUBSTITUTE_REF, 0, 0, 0); 2541 if (rep != NULL) 2542 xmlFree(rep); 2543 } 2544 2545 /* 2546 * Just output the reference 2547 */ 2548 buf[len++] = '&'; 2549 if (len > buf_size - i - 10) { 2550 growBuffer(buf); 2551 } 2552 for (;i > 0;i--) 2553 buf[len++] = *cur++; 2554 buf[len++] = ';'; 2555 } 2556 } 2557 } else { 2558 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2559 COPY_BUF(l,buf,len,0x20); 2560 if (len > buf_size - 10) { 2561 growBuffer(buf); 2562 } 2563 } else { 2564 COPY_BUF(l,buf,len,c); 2565 if (len > buf_size - 10) { 2566 growBuffer(buf); 2567 } 2568 } 2569 NEXTL(l); 2570 } 2571 GROW; 2572 c = CUR_CHAR(l); 2573 } 2574 buf[len++] = 0; 2575 if (RAW == '<') { 2576 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2578 ctxt->sax->error(ctxt->userData, 2579 "Unescaped '<' not allowed in attributes values\n"); 2580 ctxt->wellFormed = 0; 2581 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2582 } else if (RAW != limit) { 2583 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2585 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2586 ctxt->wellFormed = 0; 2587 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2588 } else 2589 NEXT; 2590 return(buf); 2591} 2592 2593/** 2594 * xmlParseSystemLiteral: 2595 * @ctxt: an XML parser context 2596 * 2597 * parse an XML Literal 2598 * 2599 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2600 * 2601 * Returns the SystemLiteral parsed or NULL 2602 */ 2603 2604xmlChar * 2605xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2606 xmlChar *buf = NULL; 2607 int len = 0; 2608 int size = XML_PARSER_BUFFER_SIZE; 2609 int cur, l; 2610 xmlChar stop; 2611 int state = ctxt->instate; 2612 int count = 0; 2613 2614 SHRINK; 2615 if (RAW == '"') { 2616 NEXT; 2617 stop = '"'; 2618 } else if (RAW == '\'') { 2619 NEXT; 2620 stop = '\''; 2621 } else { 2622 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2624 ctxt->sax->error(ctxt->userData, 2625 "SystemLiteral \" or ' expected\n"); 2626 ctxt->wellFormed = 0; 2627 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2628 return(NULL); 2629 } 2630 2631 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2632 if (buf == NULL) { 2633 xmlGenericError(xmlGenericErrorContext, 2634 "malloc of %d byte failed\n", size); 2635 return(NULL); 2636 } 2637 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2638 cur = CUR_CHAR(l); 2639 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2640 if (len + 5 >= size) { 2641 size *= 2; 2642 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2643 if (buf == NULL) { 2644 xmlGenericError(xmlGenericErrorContext, 2645 "realloc of %d byte failed\n", size); 2646 ctxt->instate = (xmlParserInputState) state; 2647 return(NULL); 2648 } 2649 } 2650 count++; 2651 if (count > 50) { 2652 GROW; 2653 count = 0; 2654 } 2655 COPY_BUF(l,buf,len,cur); 2656 NEXTL(l); 2657 cur = CUR_CHAR(l); 2658 if (cur == 0) { 2659 GROW; 2660 SHRINK; 2661 cur = CUR_CHAR(l); 2662 } 2663 } 2664 buf[len] = 0; 2665 ctxt->instate = (xmlParserInputState) state; 2666 if (!IS_CHAR(cur)) { 2667 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2669 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2670 ctxt->wellFormed = 0; 2671 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2672 } else { 2673 NEXT; 2674 } 2675 return(buf); 2676} 2677 2678/** 2679 * xmlParsePubidLiteral: 2680 * @ctxt: an XML parser context 2681 * 2682 * parse an XML public literal 2683 * 2684 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2685 * 2686 * Returns the PubidLiteral parsed or NULL. 2687 */ 2688 2689xmlChar * 2690xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2691 xmlChar *buf = NULL; 2692 int len = 0; 2693 int size = XML_PARSER_BUFFER_SIZE; 2694 xmlChar cur; 2695 xmlChar stop; 2696 int count = 0; 2697 xmlParserInputState oldstate = ctxt->instate; 2698 2699 SHRINK; 2700 if (RAW == '"') { 2701 NEXT; 2702 stop = '"'; 2703 } else if (RAW == '\'') { 2704 NEXT; 2705 stop = '\''; 2706 } else { 2707 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2709 ctxt->sax->error(ctxt->userData, 2710 "SystemLiteral \" or ' expected\n"); 2711 ctxt->wellFormed = 0; 2712 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2713 return(NULL); 2714 } 2715 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2716 if (buf == NULL) { 2717 xmlGenericError(xmlGenericErrorContext, 2718 "malloc of %d byte failed\n", size); 2719 return(NULL); 2720 } 2721 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 2722 cur = CUR; 2723 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2724 if (len + 1 >= size) { 2725 size *= 2; 2726 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2727 if (buf == NULL) { 2728 xmlGenericError(xmlGenericErrorContext, 2729 "realloc of %d byte failed\n", size); 2730 return(NULL); 2731 } 2732 } 2733 buf[len++] = cur; 2734 count++; 2735 if (count > 50) { 2736 GROW; 2737 count = 0; 2738 } 2739 NEXT; 2740 cur = CUR; 2741 if (cur == 0) { 2742 GROW; 2743 SHRINK; 2744 cur = CUR; 2745 } 2746 } 2747 buf[len] = 0; 2748 if (cur != stop) { 2749 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2751 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2752 ctxt->wellFormed = 0; 2753 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2754 } else { 2755 NEXT; 2756 } 2757 ctxt->instate = oldstate; 2758 return(buf); 2759} 2760 2761void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2762/** 2763 * xmlParseCharData: 2764 * @ctxt: an XML parser context 2765 * @cdata: int indicating whether we are within a CDATA section 2766 * 2767 * parse a CharData section. 2768 * if we are within a CDATA section ']]>' marks an end of section. 2769 * 2770 * The right angle bracket (>) may be represented using the string ">", 2771 * and must, for compatibility, be escaped using ">" or a character 2772 * reference when it appears in the string "]]>" in content, when that 2773 * string is not marking the end of a CDATA section. 2774 * 2775 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2776 */ 2777 2778void 2779xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2780 const xmlChar *in; 2781 int nbchar = 0; 2782 int line = ctxt->input->line; 2783 int col = ctxt->input->col; 2784 2785 SHRINK; 2786 GROW; 2787 /* 2788 * Accelerated common case where input don't need to be 2789 * modified before passing it to the handler. 2790 */ 2791 if (!cdata) { 2792 in = ctxt->input->cur; 2793 do { 2794get_more: 2795 while (((*in >= 0x20) && (*in != '<') && (*in != ']') && 2796 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2797 in++; 2798 if (*in == 0xA) { 2799 ctxt->input->line++; 2800 in++; 2801 while (*in == 0xA) { 2802 ctxt->input->line++; 2803 in++; 2804 } 2805 goto get_more; 2806 } 2807 if (*in == ']') { 2808 if ((in[1] == ']') && (in[2] == '>')) { 2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2811 ctxt->sax->error(ctxt->userData, 2812 "Sequence ']]>' not allowed in content\n"); 2813 ctxt->input->cur = in; 2814 ctxt->wellFormed = 0; 2815 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2816 return; 2817 } 2818 in++; 2819 goto get_more; 2820 } 2821 nbchar = in - ctxt->input->cur; 2822 if (nbchar > 0) { 2823 if (IS_BLANK(*ctxt->input->cur)) { 2824 const xmlChar *tmp = ctxt->input->cur; 2825 ctxt->input->cur = in; 2826 if (areBlanks(ctxt, tmp, nbchar)) { 2827 if (ctxt->sax->ignorableWhitespace != NULL) 2828 ctxt->sax->ignorableWhitespace(ctxt->userData, 2829 tmp, nbchar); 2830 } else { 2831 if (ctxt->sax->characters != NULL) 2832 ctxt->sax->characters(ctxt->userData, 2833 tmp, nbchar); 2834 } 2835 line = ctxt->input->line; 2836 col = ctxt->input->col; 2837 } else { 2838 if (ctxt->sax->characters != NULL) 2839 ctxt->sax->characters(ctxt->userData, 2840 ctxt->input->cur, nbchar); 2841 line = ctxt->input->line; 2842 col = ctxt->input->col; 2843 } 2844 } 2845 ctxt->input->cur = in; 2846 if (*in == 0xD) { 2847 in++; 2848 if (*in == 0xA) { 2849 ctxt->input->cur = in; 2850 in++; 2851 ctxt->input->line++; 2852 continue; /* while */ 2853 } 2854 in--; 2855 } 2856 if (*in == '<') { 2857 return; 2858 } 2859 if (*in == '&') { 2860 return; 2861 } 2862 SHRINK; 2863 GROW; 2864 in = ctxt->input->cur; 2865 } while ((*in >= 0x20) && (*in <= 0x7F)); 2866 nbchar = 0; 2867 } 2868 ctxt->input->line = line; 2869 ctxt->input->col = col; 2870 xmlParseCharDataComplex(ctxt, cdata); 2871} 2872 2873/** 2874 * xmlParseCharDataComplex: 2875 * @ctxt: an XML parser context 2876 * @cdata: int indicating whether we are within a CDATA section 2877 * 2878 * parse a CharData section.this is the fallback function 2879 * of xmlParseCharData() when the parsing requires handling 2880 * of non-ASCII characters. 2881 */ 2882void 2883xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2884 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2885 int nbchar = 0; 2886 int cur, l; 2887 int count = 0; 2888 2889 SHRINK; 2890 GROW; 2891 cur = CUR_CHAR(l); 2892 while ((cur != '<') && /* checked */ 2893 (cur != '&') && 2894 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2895 if ((cur == ']') && (NXT(1) == ']') && 2896 (NXT(2) == '>')) { 2897 if (cdata) break; 2898 else { 2899 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2901 ctxt->sax->error(ctxt->userData, 2902 "Sequence ']]>' not allowed in content\n"); 2903 /* Should this be relaxed ??? I see a "must here */ 2904 ctxt->wellFormed = 0; 2905 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2906 } 2907 } 2908 COPY_BUF(l,buf,nbchar,cur); 2909 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2910 /* 2911 * OK the segment is to be consumed as chars. 2912 */ 2913 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2914 if (areBlanks(ctxt, buf, nbchar)) { 2915 if (ctxt->sax->ignorableWhitespace != NULL) 2916 ctxt->sax->ignorableWhitespace(ctxt->userData, 2917 buf, nbchar); 2918 } else { 2919 if (ctxt->sax->characters != NULL) 2920 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2921 } 2922 } 2923 nbchar = 0; 2924 } 2925 count++; 2926 if (count > 50) { 2927 GROW; 2928 count = 0; 2929 } 2930 NEXTL(l); 2931 cur = CUR_CHAR(l); 2932 } 2933 if (nbchar != 0) { 2934 /* 2935 * OK the segment is to be consumed as chars. 2936 */ 2937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2938 if (areBlanks(ctxt, buf, nbchar)) { 2939 if (ctxt->sax->ignorableWhitespace != NULL) 2940 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2941 } else { 2942 if (ctxt->sax->characters != NULL) 2943 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2944 } 2945 } 2946 } 2947} 2948 2949/** 2950 * xmlParseExternalID: 2951 * @ctxt: an XML parser context 2952 * @publicID: a xmlChar** receiving PubidLiteral 2953 * @strict: indicate whether we should restrict parsing to only 2954 * production [75], see NOTE below 2955 * 2956 * Parse an External ID or a Public ID 2957 * 2958 * NOTE: Productions [75] and [83] interact badly since [75] can generate 2959 * 'PUBLIC' S PubidLiteral S SystemLiteral 2960 * 2961 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2962 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2963 * 2964 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2965 * 2966 * Returns the function returns SystemLiteral and in the second 2967 * case publicID receives PubidLiteral, is strict is off 2968 * it is possible to return NULL and have publicID set. 2969 */ 2970 2971xmlChar * 2972xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2973 xmlChar *URI = NULL; 2974 2975 SHRINK; 2976 2977 *publicID = NULL; 2978 if ((RAW == 'S') && (NXT(1) == 'Y') && 2979 (NXT(2) == 'S') && (NXT(3) == 'T') && 2980 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2981 SKIP(6); 2982 if (!IS_BLANK(CUR)) { 2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2985 ctxt->sax->error(ctxt->userData, 2986 "Space required after 'SYSTEM'\n"); 2987 ctxt->wellFormed = 0; 2988 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2989 } 2990 SKIP_BLANKS; 2991 URI = xmlParseSystemLiteral(ctxt); 2992 if (URI == NULL) { 2993 ctxt->errNo = XML_ERR_URI_REQUIRED; 2994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2995 ctxt->sax->error(ctxt->userData, 2996 "xmlParseExternalID: SYSTEM, no URI\n"); 2997 ctxt->wellFormed = 0; 2998 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 2999 } 3000 } else if ((RAW == 'P') && (NXT(1) == 'U') && 3001 (NXT(2) == 'B') && (NXT(3) == 'L') && 3002 (NXT(4) == 'I') && (NXT(5) == 'C')) { 3003 SKIP(6); 3004 if (!IS_BLANK(CUR)) { 3005 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3007 ctxt->sax->error(ctxt->userData, 3008 "Space required after 'PUBLIC'\n"); 3009 ctxt->wellFormed = 0; 3010 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3011 } 3012 SKIP_BLANKS; 3013 *publicID = xmlParsePubidLiteral(ctxt); 3014 if (*publicID == NULL) { 3015 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3017 ctxt->sax->error(ctxt->userData, 3018 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 3019 ctxt->wellFormed = 0; 3020 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3021 } 3022 if (strict) { 3023 /* 3024 * We don't handle [83] so "S SystemLiteral" is required. 3025 */ 3026 if (!IS_BLANK(CUR)) { 3027 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3029 ctxt->sax->error(ctxt->userData, 3030 "Space required after the Public Identifier\n"); 3031 ctxt->wellFormed = 0; 3032 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3033 } 3034 } else { 3035 /* 3036 * We handle [83] so we return immediately, if 3037 * "S SystemLiteral" is not detected. From a purely parsing 3038 * point of view that's a nice mess. 3039 */ 3040 const xmlChar *ptr; 3041 GROW; 3042 3043 ptr = CUR_PTR; 3044 if (!IS_BLANK(*ptr)) return(NULL); 3045 3046 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3047 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3048 } 3049 SKIP_BLANKS; 3050 URI = xmlParseSystemLiteral(ctxt); 3051 if (URI == NULL) { 3052 ctxt->errNo = XML_ERR_URI_REQUIRED; 3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3054 ctxt->sax->error(ctxt->userData, 3055 "xmlParseExternalID: PUBLIC, no URI\n"); 3056 ctxt->wellFormed = 0; 3057 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3058 } 3059 } 3060 return(URI); 3061} 3062 3063/** 3064 * xmlParseComment: 3065 * @ctxt: an XML parser context 3066 * 3067 * Skip an XML (SGML) comment <!-- .... --> 3068 * The spec says that "For compatibility, the string "--" (double-hyphen) 3069 * must not occur within comments. " 3070 * 3071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3072 */ 3073void 3074xmlParseComment(xmlParserCtxtPtr ctxt) { 3075 xmlChar *buf = NULL; 3076 int len; 3077 int size = XML_PARSER_BUFFER_SIZE; 3078 int q, ql; 3079 int r, rl; 3080 int cur, l; 3081 xmlParserInputState state; 3082 xmlParserInputPtr input = ctxt->input; 3083 int count = 0; 3084 3085 /* 3086 * Check that there is a comment right here. 3087 */ 3088 if ((RAW != '<') || (NXT(1) != '!') || 3089 (NXT(2) != '-') || (NXT(3) != '-')) return; 3090 3091 state = ctxt->instate; 3092 ctxt->instate = XML_PARSER_COMMENT; 3093 SHRINK; 3094 SKIP(4); 3095 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3096 if (buf == NULL) { 3097 xmlGenericError(xmlGenericErrorContext, 3098 "malloc of %d byte failed\n", size); 3099 ctxt->instate = state; 3100 return; 3101 } 3102 q = CUR_CHAR(ql); 3103 NEXTL(ql); 3104 r = CUR_CHAR(rl); 3105 NEXTL(rl); 3106 cur = CUR_CHAR(l); 3107 len = 0; 3108 while (IS_CHAR(cur) && /* checked */ 3109 ((cur != '>') || 3110 (r != '-') || (q != '-'))) { 3111 if ((r == '-') && (q == '-')) { 3112 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3114 ctxt->sax->error(ctxt->userData, 3115 "Comment must not contain '--' (double-hyphen)`\n"); 3116 ctxt->wellFormed = 0; 3117 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3118 } 3119 if (len + 5 >= size) { 3120 size *= 2; 3121 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3122 if (buf == NULL) { 3123 xmlGenericError(xmlGenericErrorContext, 3124 "realloc of %d byte failed\n", size); 3125 ctxt->instate = state; 3126 return; 3127 } 3128 } 3129 COPY_BUF(ql,buf,len,q); 3130 q = r; 3131 ql = rl; 3132 r = cur; 3133 rl = l; 3134 3135 count++; 3136 if (count > 50) { 3137 GROW; 3138 count = 0; 3139 } 3140 NEXTL(l); 3141 cur = CUR_CHAR(l); 3142 if (cur == 0) { 3143 SHRINK; 3144 GROW; 3145 cur = CUR_CHAR(l); 3146 } 3147 } 3148 buf[len] = 0; 3149 if (!IS_CHAR(cur)) { 3150 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3152 ctxt->sax->error(ctxt->userData, 3153 "Comment not terminated \n<!--%.50s\n", buf); 3154 ctxt->wellFormed = 0; 3155 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3156 xmlFree(buf); 3157 } else { 3158 if (input != ctxt->input) { 3159 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3161 ctxt->sax->error(ctxt->userData, 3162"Comment doesn't start and stop in the same entity\n"); 3163 ctxt->wellFormed = 0; 3164 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3165 } 3166 NEXT; 3167 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3168 (!ctxt->disableSAX)) 3169 ctxt->sax->comment(ctxt->userData, buf); 3170 xmlFree(buf); 3171 } 3172 ctxt->instate = state; 3173} 3174 3175/** 3176 * xmlParsePITarget: 3177 * @ctxt: an XML parser context 3178 * 3179 * parse the name of a PI 3180 * 3181 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3182 * 3183 * Returns the PITarget name or NULL 3184 */ 3185 3186xmlChar * 3187xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3188 xmlChar *name; 3189 3190 name = xmlParseName(ctxt); 3191 if ((name != NULL) && 3192 ((name[0] == 'x') || (name[0] == 'X')) && 3193 ((name[1] == 'm') || (name[1] == 'M')) && 3194 ((name[2] == 'l') || (name[2] == 'L'))) { 3195 int i; 3196 if ((name[0] == 'x') && (name[1] == 'm') && 3197 (name[2] == 'l') && (name[3] == 0)) { 3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3200 ctxt->sax->error(ctxt->userData, 3201 "XML declaration allowed only at the start of the document\n"); 3202 ctxt->wellFormed = 0; 3203 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3204 return(name); 3205 } else if (name[3] == 0) { 3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3208 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 3209 ctxt->wellFormed = 0; 3210 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3211 return(name); 3212 } 3213 for (i = 0;;i++) { 3214 if (xmlW3CPIs[i] == NULL) break; 3215 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3216 return(name); 3217 } 3218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 3219 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3220 ctxt->sax->warning(ctxt->userData, 3221 "xmlParsePITarget: invalid name prefix 'xml'\n"); 3222 } 3223 } 3224 return(name); 3225} 3226 3227#ifdef LIBXML_CATALOG_ENABLED 3228/** 3229 * xmlParseCatalogPI: 3230 * @ctxt: an XML parser context 3231 * @catalog: the PI value string 3232 * 3233 * parse an XML Catalog Processing Instruction. 3234 * 3235 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3236 * 3237 * Occurs only if allowed by the user and if happening in the Misc 3238 * part of the document before any doctype informations 3239 * This will add the given catalog to the parsing context in order 3240 * to be used if there is a resolution need further down in the document 3241 */ 3242 3243static void 3244xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3245 xmlChar *URL = NULL; 3246 const xmlChar *tmp, *base; 3247 xmlChar marker; 3248 3249 tmp = catalog; 3250 while (IS_BLANK(*tmp)) tmp++; 3251 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3252 goto error; 3253 tmp += 7; 3254 while (IS_BLANK(*tmp)) tmp++; 3255 if (*tmp != '=') { 3256 return; 3257 } 3258 tmp++; 3259 while (IS_BLANK(*tmp)) tmp++; 3260 marker = *tmp; 3261 if ((marker != '\'') && (marker != '"')) 3262 goto error; 3263 tmp++; 3264 base = tmp; 3265 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3266 if (*tmp == 0) 3267 goto error; 3268 URL = xmlStrndup(base, tmp - base); 3269 tmp++; 3270 while (IS_BLANK(*tmp)) tmp++; 3271 if (*tmp != 0) 3272 goto error; 3273 3274 if (URL != NULL) { 3275 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3276 xmlFree(URL); 3277 } 3278 return; 3279 3280error: 3281 ctxt->errNo = XML_WAR_CATALOG_PI; 3282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3283 ctxt->sax->warning(ctxt->userData, 3284 "Catalog PI syntax error: %s\n", catalog); 3285 if (URL != NULL) 3286 xmlFree(URL); 3287} 3288#endif 3289 3290/** 3291 * xmlParsePI: 3292 * @ctxt: an XML parser context 3293 * 3294 * parse an XML Processing Instruction. 3295 * 3296 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3297 * 3298 * The processing is transfered to SAX once parsed. 3299 */ 3300 3301void 3302xmlParsePI(xmlParserCtxtPtr ctxt) { 3303 xmlChar *buf = NULL; 3304 int len = 0; 3305 int size = XML_PARSER_BUFFER_SIZE; 3306 int cur, l; 3307 xmlChar *target; 3308 xmlParserInputState state; 3309 int count = 0; 3310 3311 if ((RAW == '<') && (NXT(1) == '?')) { 3312 xmlParserInputPtr input = ctxt->input; 3313 state = ctxt->instate; 3314 ctxt->instate = XML_PARSER_PI; 3315 /* 3316 * this is a Processing Instruction. 3317 */ 3318 SKIP(2); 3319 SHRINK; 3320 3321 /* 3322 * Parse the target name and check for special support like 3323 * namespace. 3324 */ 3325 target = xmlParsePITarget(ctxt); 3326 if (target != NULL) { 3327 if ((RAW == '?') && (NXT(1) == '>')) { 3328 if (input != ctxt->input) { 3329 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3331 ctxt->sax->error(ctxt->userData, 3332 "PI declaration doesn't start and stop in the same entity\n"); 3333 ctxt->wellFormed = 0; 3334 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3335 } 3336 SKIP(2); 3337 3338 /* 3339 * SAX: PI detected. 3340 */ 3341 if ((ctxt->sax) && (!ctxt->disableSAX) && 3342 (ctxt->sax->processingInstruction != NULL)) 3343 ctxt->sax->processingInstruction(ctxt->userData, 3344 target, NULL); 3345 ctxt->instate = state; 3346 xmlFree(target); 3347 return; 3348 } 3349 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3350 if (buf == NULL) { 3351 xmlGenericError(xmlGenericErrorContext, 3352 "malloc of %d byte failed\n", size); 3353 ctxt->instate = state; 3354 return; 3355 } 3356 cur = CUR; 3357 if (!IS_BLANK(cur)) { 3358 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3360 ctxt->sax->error(ctxt->userData, 3361 "xmlParsePI: PI %s space expected\n", target); 3362 ctxt->wellFormed = 0; 3363 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3364 } 3365 SKIP_BLANKS; 3366 cur = CUR_CHAR(l); 3367 while (IS_CHAR(cur) && /* checked */ 3368 ((cur != '?') || (NXT(1) != '>'))) { 3369 if (len + 5 >= size) { 3370 size *= 2; 3371 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3372 if (buf == NULL) { 3373 xmlGenericError(xmlGenericErrorContext, 3374 "realloc of %d byte failed\n", size); 3375 ctxt->instate = state; 3376 return; 3377 } 3378 } 3379 count++; 3380 if (count > 50) { 3381 GROW; 3382 count = 0; 3383 } 3384 COPY_BUF(l,buf,len,cur); 3385 NEXTL(l); 3386 cur = CUR_CHAR(l); 3387 if (cur == 0) { 3388 SHRINK; 3389 GROW; 3390 cur = CUR_CHAR(l); 3391 } 3392 } 3393 buf[len] = 0; 3394 if (cur != '?') { 3395 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3397 ctxt->sax->error(ctxt->userData, 3398 "xmlParsePI: PI %s never end ...\n", target); 3399 ctxt->wellFormed = 0; 3400 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3401 } else { 3402 if (input != ctxt->input) { 3403 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3405 ctxt->sax->error(ctxt->userData, 3406 "PI declaration doesn't start and stop in the same entity\n"); 3407 ctxt->wellFormed = 0; 3408 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3409 } 3410 SKIP(2); 3411 3412#ifdef LIBXML_CATALOG_ENABLED 3413 if (((state == XML_PARSER_MISC) || 3414 (state == XML_PARSER_START)) && 3415 (xmlStrEqual(target, XML_CATALOG_PI))) { 3416 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3417 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3418 (allow == XML_CATA_ALLOW_ALL)) 3419 xmlParseCatalogPI(ctxt, buf); 3420 } 3421#endif 3422 3423 3424 /* 3425 * SAX: PI detected. 3426 */ 3427 if ((ctxt->sax) && (!ctxt->disableSAX) && 3428 (ctxt->sax->processingInstruction != NULL)) 3429 ctxt->sax->processingInstruction(ctxt->userData, 3430 target, buf); 3431 } 3432 xmlFree(buf); 3433 xmlFree(target); 3434 } else { 3435 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3437 ctxt->sax->error(ctxt->userData, 3438 "xmlParsePI : no target name\n"); 3439 ctxt->wellFormed = 0; 3440 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3441 } 3442 ctxt->instate = state; 3443 } 3444} 3445 3446/** 3447 * xmlParseNotationDecl: 3448 * @ctxt: an XML parser context 3449 * 3450 * parse a notation declaration 3451 * 3452 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3453 * 3454 * Hence there is actually 3 choices: 3455 * 'PUBLIC' S PubidLiteral 3456 * 'PUBLIC' S PubidLiteral S SystemLiteral 3457 * and 'SYSTEM' S SystemLiteral 3458 * 3459 * See the NOTE on xmlParseExternalID(). 3460 */ 3461 3462void 3463xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3464 xmlChar *name; 3465 xmlChar *Pubid; 3466 xmlChar *Systemid; 3467 3468 if ((RAW == '<') && (NXT(1) == '!') && 3469 (NXT(2) == 'N') && (NXT(3) == 'O') && 3470 (NXT(4) == 'T') && (NXT(5) == 'A') && 3471 (NXT(6) == 'T') && (NXT(7) == 'I') && 3472 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3473 xmlParserInputPtr input = ctxt->input; 3474 SHRINK; 3475 SKIP(10); 3476 if (!IS_BLANK(CUR)) { 3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3479 ctxt->sax->error(ctxt->userData, 3480 "Space required after '<!NOTATION'\n"); 3481 ctxt->wellFormed = 0; 3482 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3483 return; 3484 } 3485 SKIP_BLANKS; 3486 3487 name = xmlParseName(ctxt); 3488 if (name == NULL) { 3489 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3491 ctxt->sax->error(ctxt->userData, 3492 "NOTATION: Name expected here\n"); 3493 ctxt->wellFormed = 0; 3494 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3495 return; 3496 } 3497 if (!IS_BLANK(CUR)) { 3498 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3500 ctxt->sax->error(ctxt->userData, 3501 "Space required after the NOTATION name'\n"); 3502 ctxt->wellFormed = 0; 3503 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3504 return; 3505 } 3506 SKIP_BLANKS; 3507 3508 /* 3509 * Parse the IDs. 3510 */ 3511 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3512 SKIP_BLANKS; 3513 3514 if (RAW == '>') { 3515 if (input != ctxt->input) { 3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3518 ctxt->sax->error(ctxt->userData, 3519"Notation declaration doesn't start and stop in the same entity\n"); 3520 ctxt->wellFormed = 0; 3521 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3522 } 3523 NEXT; 3524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3525 (ctxt->sax->notationDecl != NULL)) 3526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3527 } else { 3528 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3530 ctxt->sax->error(ctxt->userData, 3531 "'>' required to close NOTATION declaration\n"); 3532 ctxt->wellFormed = 0; 3533 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3534 } 3535 xmlFree(name); 3536 if (Systemid != NULL) xmlFree(Systemid); 3537 if (Pubid != NULL) xmlFree(Pubid); 3538 } 3539} 3540 3541/** 3542 * xmlParseEntityDecl: 3543 * @ctxt: an XML parser context 3544 * 3545 * parse <!ENTITY declarations 3546 * 3547 * [70] EntityDecl ::= GEDecl | PEDecl 3548 * 3549 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3550 * 3551 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3552 * 3553 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3554 * 3555 * [74] PEDef ::= EntityValue | ExternalID 3556 * 3557 * [76] NDataDecl ::= S 'NDATA' S Name 3558 * 3559 * [ VC: Notation Declared ] 3560 * The Name must match the declared name of a notation. 3561 */ 3562 3563void 3564xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3565 xmlChar *name = NULL; 3566 xmlChar *value = NULL; 3567 xmlChar *URI = NULL, *literal = NULL; 3568 xmlChar *ndata = NULL; 3569 int isParameter = 0; 3570 xmlChar *orig = NULL; 3571 int skipped; 3572 3573 GROW; 3574 if ((RAW == '<') && (NXT(1) == '!') && 3575 (NXT(2) == 'E') && (NXT(3) == 'N') && 3576 (NXT(4) == 'T') && (NXT(5) == 'I') && 3577 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3578 xmlParserInputPtr input = ctxt->input; 3579 SHRINK; 3580 SKIP(8); 3581 skipped = SKIP_BLANKS; 3582 if (skipped == 0) { 3583 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3585 ctxt->sax->error(ctxt->userData, 3586 "Space required after '<!ENTITY'\n"); 3587 ctxt->wellFormed = 0; 3588 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3589 } 3590 3591 if (RAW == '%') { 3592 NEXT; 3593 skipped = SKIP_BLANKS; 3594 if (skipped == 0) { 3595 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3597 ctxt->sax->error(ctxt->userData, 3598 "Space required after '%'\n"); 3599 ctxt->wellFormed = 0; 3600 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3601 } 3602 isParameter = 1; 3603 } 3604 3605 name = xmlParseName(ctxt); 3606 if (name == NULL) { 3607 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3609 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3610 ctxt->wellFormed = 0; 3611 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3612 return; 3613 } 3614 skipped = SKIP_BLANKS; 3615 if (skipped == 0) { 3616 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3618 ctxt->sax->error(ctxt->userData, 3619 "Space required after the entity name\n"); 3620 ctxt->wellFormed = 0; 3621 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3622 } 3623 3624 ctxt->instate = XML_PARSER_ENTITY_DECL; 3625 /* 3626 * handle the various case of definitions... 3627 */ 3628 if (isParameter) { 3629 if ((RAW == '"') || (RAW == '\'')) { 3630 value = xmlParseEntityValue(ctxt, &orig); 3631 if (value) { 3632 if ((ctxt->sax != NULL) && 3633 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3634 ctxt->sax->entityDecl(ctxt->userData, name, 3635 XML_INTERNAL_PARAMETER_ENTITY, 3636 NULL, NULL, value); 3637 } 3638 } else { 3639 URI = xmlParseExternalID(ctxt, &literal, 1); 3640 if ((URI == NULL) && (literal == NULL)) { 3641 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3643 ctxt->sax->error(ctxt->userData, 3644 "Entity value required\n"); 3645 ctxt->wellFormed = 0; 3646 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3647 } 3648 if (URI) { 3649 xmlURIPtr uri; 3650 3651 uri = xmlParseURI((const char *) URI); 3652 if (uri == NULL) { 3653 ctxt->errNo = XML_ERR_INVALID_URI; 3654 if ((ctxt->sax != NULL) && 3655 (!ctxt->disableSAX) && 3656 (ctxt->sax->error != NULL)) 3657 ctxt->sax->error(ctxt->userData, 3658 "Invalid URI: %s\n", URI); 3659 /* 3660 * This really ought to be a well formedness error 3661 * but the XML Core WG decided otherwise c.f. issue 3662 * E26 of the XML erratas. 3663 */ 3664 } else { 3665 if (uri->fragment != NULL) { 3666 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3667 if ((ctxt->sax != NULL) && 3668 (!ctxt->disableSAX) && 3669 (ctxt->sax->error != NULL)) 3670 ctxt->sax->error(ctxt->userData, 3671 "Fragment not allowed: %s\n", URI); 3672 /* 3673 * Okay this is foolish to block those but not 3674 * invalid URIs. 3675 */ 3676 ctxt->wellFormed = 0; 3677 } else { 3678 if ((ctxt->sax != NULL) && 3679 (!ctxt->disableSAX) && 3680 (ctxt->sax->entityDecl != NULL)) 3681 ctxt->sax->entityDecl(ctxt->userData, name, 3682 XML_EXTERNAL_PARAMETER_ENTITY, 3683 literal, URI, NULL); 3684 } 3685 xmlFreeURI(uri); 3686 } 3687 } 3688 } 3689 } else { 3690 if ((RAW == '"') || (RAW == '\'')) { 3691 value = xmlParseEntityValue(ctxt, &orig); 3692 if ((ctxt->sax != NULL) && 3693 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3694 ctxt->sax->entityDecl(ctxt->userData, name, 3695 XML_INTERNAL_GENERAL_ENTITY, 3696 NULL, NULL, value); 3697 /* 3698 * For expat compatibility in SAX mode. 3699 */ 3700 if ((ctxt->myDoc == NULL) || 3701 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 3702 if (ctxt->myDoc == NULL) { 3703 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3704 } 3705 if (ctxt->myDoc->intSubset == NULL) 3706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3707 BAD_CAST "fake", NULL, NULL); 3708 3709 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 3710 NULL, NULL, value); 3711 } 3712 } else { 3713 URI = xmlParseExternalID(ctxt, &literal, 1); 3714 if ((URI == NULL) && (literal == NULL)) { 3715 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3717 ctxt->sax->error(ctxt->userData, 3718 "Entity value required\n"); 3719 ctxt->wellFormed = 0; 3720 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3721 } 3722 if (URI) { 3723 xmlURIPtr uri; 3724 3725 uri = xmlParseURI((const char *)URI); 3726 if (uri == NULL) { 3727 ctxt->errNo = XML_ERR_INVALID_URI; 3728 if ((ctxt->sax != NULL) && 3729 (!ctxt->disableSAX) && 3730 (ctxt->sax->error != NULL)) 3731 ctxt->sax->error(ctxt->userData, 3732 "Invalid URI: %s\n", URI); 3733 /* 3734 * This really ought to be a well formedness error 3735 * but the XML Core WG decided otherwise c.f. issue 3736 * E26 of the XML erratas. 3737 */ 3738 } else { 3739 if (uri->fragment != NULL) { 3740 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3741 if ((ctxt->sax != NULL) && 3742 (!ctxt->disableSAX) && 3743 (ctxt->sax->error != NULL)) 3744 ctxt->sax->error(ctxt->userData, 3745 "Fragment not allowed: %s\n", URI); 3746 /* 3747 * Okay this is foolish to block those but not 3748 * invalid URIs. 3749 */ 3750 ctxt->wellFormed = 0; 3751 } 3752 xmlFreeURI(uri); 3753 } 3754 } 3755 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3758 ctxt->sax->error(ctxt->userData, 3759 "Space required before 'NDATA'\n"); 3760 ctxt->wellFormed = 0; 3761 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3762 } 3763 SKIP_BLANKS; 3764 if ((RAW == 'N') && (NXT(1) == 'D') && 3765 (NXT(2) == 'A') && (NXT(3) == 'T') && 3766 (NXT(4) == 'A')) { 3767 SKIP(5); 3768 if (!IS_BLANK(CUR)) { 3769 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3771 ctxt->sax->error(ctxt->userData, 3772 "Space required after 'NDATA'\n"); 3773 ctxt->wellFormed = 0; 3774 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3775 } 3776 SKIP_BLANKS; 3777 ndata = xmlParseName(ctxt); 3778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3779 (ctxt->sax->unparsedEntityDecl != NULL)) 3780 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3781 literal, URI, ndata); 3782 } else { 3783 if ((ctxt->sax != NULL) && 3784 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3785 ctxt->sax->entityDecl(ctxt->userData, name, 3786 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3787 literal, URI, NULL); 3788 /* 3789 * For expat compatibility in SAX mode. 3790 * assuming the entity repalcement was asked for 3791 */ 3792 if ((ctxt->replaceEntities != 0) && 3793 ((ctxt->myDoc == NULL) || 3794 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 3795 if (ctxt->myDoc == NULL) { 3796 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3797 } 3798 3799 if (ctxt->myDoc->intSubset == NULL) 3800 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3801 BAD_CAST "fake", NULL, NULL); 3802 entityDecl(ctxt, name, 3803 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3804 literal, URI, NULL); 3805 } 3806 } 3807 } 3808 } 3809 SKIP_BLANKS; 3810 if (RAW != '>') { 3811 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3813 ctxt->sax->error(ctxt->userData, 3814 "xmlParseEntityDecl: entity %s not terminated\n", name); 3815 ctxt->wellFormed = 0; 3816 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3817 } else { 3818 if (input != ctxt->input) { 3819 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3821 ctxt->sax->error(ctxt->userData, 3822"Entity declaration doesn't start and stop in the same entity\n"); 3823 ctxt->wellFormed = 0; 3824 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3825 } 3826 NEXT; 3827 } 3828 if (orig != NULL) { 3829 /* 3830 * Ugly mechanism to save the raw entity value. 3831 */ 3832 xmlEntityPtr cur = NULL; 3833 3834 if (isParameter) { 3835 if ((ctxt->sax != NULL) && 3836 (ctxt->sax->getParameterEntity != NULL)) 3837 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3838 } else { 3839 if ((ctxt->sax != NULL) && 3840 (ctxt->sax->getEntity != NULL)) 3841 cur = ctxt->sax->getEntity(ctxt->userData, name); 3842 if ((cur == NULL) && (ctxt->userData==ctxt)) { 3843 cur = getEntity(ctxt, name); 3844 } 3845 } 3846 if (cur != NULL) { 3847 if (cur->orig != NULL) 3848 xmlFree(orig); 3849 else 3850 cur->orig = orig; 3851 } else 3852 xmlFree(orig); 3853 } 3854 if (name != NULL) xmlFree(name); 3855 if (value != NULL) xmlFree(value); 3856 if (URI != NULL) xmlFree(URI); 3857 if (literal != NULL) xmlFree(literal); 3858 if (ndata != NULL) xmlFree(ndata); 3859 } 3860} 3861 3862/** 3863 * xmlParseDefaultDecl: 3864 * @ctxt: an XML parser context 3865 * @value: Receive a possible fixed default value for the attribute 3866 * 3867 * Parse an attribute default declaration 3868 * 3869 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3870 * 3871 * [ VC: Required Attribute ] 3872 * if the default declaration is the keyword #REQUIRED, then the 3873 * attribute must be specified for all elements of the type in the 3874 * attribute-list declaration. 3875 * 3876 * [ VC: Attribute Default Legal ] 3877 * The declared default value must meet the lexical constraints of 3878 * the declared attribute type c.f. xmlValidateAttributeDecl() 3879 * 3880 * [ VC: Fixed Attribute Default ] 3881 * if an attribute has a default value declared with the #FIXED 3882 * keyword, instances of that attribute must match the default value. 3883 * 3884 * [ WFC: No < in Attribute Values ] 3885 * handled in xmlParseAttValue() 3886 * 3887 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3888 * or XML_ATTRIBUTE_FIXED. 3889 */ 3890 3891int 3892xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3893 int val; 3894 xmlChar *ret; 3895 3896 *value = NULL; 3897 if ((RAW == '#') && (NXT(1) == 'R') && 3898 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3899 (NXT(4) == 'U') && (NXT(5) == 'I') && 3900 (NXT(6) == 'R') && (NXT(7) == 'E') && 3901 (NXT(8) == 'D')) { 3902 SKIP(9); 3903 return(XML_ATTRIBUTE_REQUIRED); 3904 } 3905 if ((RAW == '#') && (NXT(1) == 'I') && 3906 (NXT(2) == 'M') && (NXT(3) == 'P') && 3907 (NXT(4) == 'L') && (NXT(5) == 'I') && 3908 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3909 SKIP(8); 3910 return(XML_ATTRIBUTE_IMPLIED); 3911 } 3912 val = XML_ATTRIBUTE_NONE; 3913 if ((RAW == '#') && (NXT(1) == 'F') && 3914 (NXT(2) == 'I') && (NXT(3) == 'X') && 3915 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3916 SKIP(6); 3917 val = XML_ATTRIBUTE_FIXED; 3918 if (!IS_BLANK(CUR)) { 3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3921 ctxt->sax->error(ctxt->userData, 3922 "Space required after '#FIXED'\n"); 3923 ctxt->wellFormed = 0; 3924 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3925 } 3926 SKIP_BLANKS; 3927 } 3928 ret = xmlParseAttValue(ctxt); 3929 ctxt->instate = XML_PARSER_DTD; 3930 if (ret == NULL) { 3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3932 ctxt->sax->error(ctxt->userData, 3933 "Attribute default value declaration error\n"); 3934 ctxt->wellFormed = 0; 3935 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3936 } else 3937 *value = ret; 3938 return(val); 3939} 3940 3941/** 3942 * xmlParseNotationType: 3943 * @ctxt: an XML parser context 3944 * 3945 * parse an Notation attribute type. 3946 * 3947 * Note: the leading 'NOTATION' S part has already being parsed... 3948 * 3949 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3950 * 3951 * [ VC: Notation Attributes ] 3952 * Values of this type must match one of the notation names included 3953 * in the declaration; all notation names in the declaration must be declared. 3954 * 3955 * Returns: the notation attribute tree built while parsing 3956 */ 3957 3958xmlEnumerationPtr 3959xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3960 xmlChar *name; 3961 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3962 3963 if (RAW != '(') { 3964 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3966 ctxt->sax->error(ctxt->userData, 3967 "'(' required to start 'NOTATION'\n"); 3968 ctxt->wellFormed = 0; 3969 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3970 return(NULL); 3971 } 3972 SHRINK; 3973 do { 3974 NEXT; 3975 SKIP_BLANKS; 3976 name = xmlParseName(ctxt); 3977 if (name == NULL) { 3978 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3980 ctxt->sax->error(ctxt->userData, 3981 "Name expected in NOTATION declaration\n"); 3982 ctxt->wellFormed = 0; 3983 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 3984 return(ret); 3985 } 3986 cur = xmlCreateEnumeration(name); 3987 xmlFree(name); 3988 if (cur == NULL) return(ret); 3989 if (last == NULL) ret = last = cur; 3990 else { 3991 last->next = cur; 3992 last = cur; 3993 } 3994 SKIP_BLANKS; 3995 } while (RAW == '|'); 3996 if (RAW != ')') { 3997 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3999 ctxt->sax->error(ctxt->userData, 4000 "')' required to finish NOTATION declaration\n"); 4001 ctxt->wellFormed = 0; 4002 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4003 if ((last != NULL) && (last != ret)) 4004 xmlFreeEnumeration(last); 4005 return(ret); 4006 } 4007 NEXT; 4008 return(ret); 4009} 4010 4011/** 4012 * xmlParseEnumerationType: 4013 * @ctxt: an XML parser context 4014 * 4015 * parse an Enumeration attribute type. 4016 * 4017 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4018 * 4019 * [ VC: Enumeration ] 4020 * Values of this type must match one of the Nmtoken tokens in 4021 * the declaration 4022 * 4023 * Returns: the enumeration attribute tree built while parsing 4024 */ 4025 4026xmlEnumerationPtr 4027xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4028 xmlChar *name; 4029 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4030 4031 if (RAW != '(') { 4032 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4034 ctxt->sax->error(ctxt->userData, 4035 "'(' required to start ATTLIST enumeration\n"); 4036 ctxt->wellFormed = 0; 4037 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4038 return(NULL); 4039 } 4040 SHRINK; 4041 do { 4042 NEXT; 4043 SKIP_BLANKS; 4044 name = xmlParseNmtoken(ctxt); 4045 if (name == NULL) { 4046 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4048 ctxt->sax->error(ctxt->userData, 4049 "NmToken expected in ATTLIST enumeration\n"); 4050 ctxt->wellFormed = 0; 4051 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4052 return(ret); 4053 } 4054 cur = xmlCreateEnumeration(name); 4055 xmlFree(name); 4056 if (cur == NULL) return(ret); 4057 if (last == NULL) ret = last = cur; 4058 else { 4059 last->next = cur; 4060 last = cur; 4061 } 4062 SKIP_BLANKS; 4063 } while (RAW == '|'); 4064 if (RAW != ')') { 4065 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 4066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4067 ctxt->sax->error(ctxt->userData, 4068 "')' required to finish ATTLIST enumeration\n"); 4069 ctxt->wellFormed = 0; 4070 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4071 return(ret); 4072 } 4073 NEXT; 4074 return(ret); 4075} 4076 4077/** 4078 * xmlParseEnumeratedType: 4079 * @ctxt: an XML parser context 4080 * @tree: the enumeration tree built while parsing 4081 * 4082 * parse an Enumerated attribute type. 4083 * 4084 * [57] EnumeratedType ::= NotationType | Enumeration 4085 * 4086 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4087 * 4088 * 4089 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4090 */ 4091 4092int 4093xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4094 if ((RAW == 'N') && (NXT(1) == 'O') && 4095 (NXT(2) == 'T') && (NXT(3) == 'A') && 4096 (NXT(4) == 'T') && (NXT(5) == 'I') && 4097 (NXT(6) == 'O') && (NXT(7) == 'N')) { 4098 SKIP(8); 4099 if (!IS_BLANK(CUR)) { 4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4102 ctxt->sax->error(ctxt->userData, 4103 "Space required after 'NOTATION'\n"); 4104 ctxt->wellFormed = 0; 4105 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4106 return(0); 4107 } 4108 SKIP_BLANKS; 4109 *tree = xmlParseNotationType(ctxt); 4110 if (*tree == NULL) return(0); 4111 return(XML_ATTRIBUTE_NOTATION); 4112 } 4113 *tree = xmlParseEnumerationType(ctxt); 4114 if (*tree == NULL) return(0); 4115 return(XML_ATTRIBUTE_ENUMERATION); 4116} 4117 4118/** 4119 * xmlParseAttributeType: 4120 * @ctxt: an XML parser context 4121 * @tree: the enumeration tree built while parsing 4122 * 4123 * parse the Attribute list def for an element 4124 * 4125 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4126 * 4127 * [55] StringType ::= 'CDATA' 4128 * 4129 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4130 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4131 * 4132 * Validity constraints for attribute values syntax are checked in 4133 * xmlValidateAttributeValue() 4134 * 4135 * [ VC: ID ] 4136 * Values of type ID must match the Name production. A name must not 4137 * appear more than once in an XML document as a value of this type; 4138 * i.e., ID values must uniquely identify the elements which bear them. 4139 * 4140 * [ VC: One ID per Element Type ] 4141 * No element type may have more than one ID attribute specified. 4142 * 4143 * [ VC: ID Attribute Default ] 4144 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4145 * 4146 * [ VC: IDREF ] 4147 * Values of type IDREF must match the Name production, and values 4148 * of type IDREFS must match Names; each IDREF Name must match the value 4149 * of an ID attribute on some element in the XML document; i.e. IDREF 4150 * values must match the value of some ID attribute. 4151 * 4152 * [ VC: Entity Name ] 4153 * Values of type ENTITY must match the Name production, values 4154 * of type ENTITIES must match Names; each Entity Name must match the 4155 * name of an unparsed entity declared in the DTD. 4156 * 4157 * [ VC: Name Token ] 4158 * Values of type NMTOKEN must match the Nmtoken production; values 4159 * of type NMTOKENS must match Nmtokens. 4160 * 4161 * Returns the attribute type 4162 */ 4163int 4164xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4165 SHRINK; 4166 if ((RAW == 'C') && (NXT(1) == 'D') && 4167 (NXT(2) == 'A') && (NXT(3) == 'T') && 4168 (NXT(4) == 'A')) { 4169 SKIP(5); 4170 return(XML_ATTRIBUTE_CDATA); 4171 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4172 (NXT(2) == 'R') && (NXT(3) == 'E') && 4173 (NXT(4) == 'F') && (NXT(5) == 'S')) { 4174 SKIP(6); 4175 return(XML_ATTRIBUTE_IDREFS); 4176 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4177 (NXT(2) == 'R') && (NXT(3) == 'E') && 4178 (NXT(4) == 'F')) { 4179 SKIP(5); 4180 return(XML_ATTRIBUTE_IDREF); 4181 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4182 SKIP(2); 4183 return(XML_ATTRIBUTE_ID); 4184 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4185 (NXT(2) == 'T') && (NXT(3) == 'I') && 4186 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 4187 SKIP(6); 4188 return(XML_ATTRIBUTE_ENTITY); 4189 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4190 (NXT(2) == 'T') && (NXT(3) == 'I') && 4191 (NXT(4) == 'T') && (NXT(5) == 'I') && 4192 (NXT(6) == 'E') && (NXT(7) == 'S')) { 4193 SKIP(8); 4194 return(XML_ATTRIBUTE_ENTITIES); 4195 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4196 (NXT(2) == 'T') && (NXT(3) == 'O') && 4197 (NXT(4) == 'K') && (NXT(5) == 'E') && 4198 (NXT(6) == 'N') && (NXT(7) == 'S')) { 4199 SKIP(8); 4200 return(XML_ATTRIBUTE_NMTOKENS); 4201 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4202 (NXT(2) == 'T') && (NXT(3) == 'O') && 4203 (NXT(4) == 'K') && (NXT(5) == 'E') && 4204 (NXT(6) == 'N')) { 4205 SKIP(7); 4206 return(XML_ATTRIBUTE_NMTOKEN); 4207 } 4208 return(xmlParseEnumeratedType(ctxt, tree)); 4209} 4210 4211/** 4212 * xmlParseAttributeListDecl: 4213 * @ctxt: an XML parser context 4214 * 4215 * : parse the Attribute list def for an element 4216 * 4217 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4218 * 4219 * [53] AttDef ::= S Name S AttType S DefaultDecl 4220 * 4221 */ 4222void 4223xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4224 xmlChar *elemName; 4225 xmlChar *attrName; 4226 xmlEnumerationPtr tree; 4227 4228 if ((RAW == '<') && (NXT(1) == '!') && 4229 (NXT(2) == 'A') && (NXT(3) == 'T') && 4230 (NXT(4) == 'T') && (NXT(5) == 'L') && 4231 (NXT(6) == 'I') && (NXT(7) == 'S') && 4232 (NXT(8) == 'T')) { 4233 xmlParserInputPtr input = ctxt->input; 4234 4235 SKIP(9); 4236 if (!IS_BLANK(CUR)) { 4237 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4239 ctxt->sax->error(ctxt->userData, 4240 "Space required after '<!ATTLIST'\n"); 4241 ctxt->wellFormed = 0; 4242 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4243 } 4244 SKIP_BLANKS; 4245 elemName = xmlParseName(ctxt); 4246 if (elemName == NULL) { 4247 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4249 ctxt->sax->error(ctxt->userData, 4250 "ATTLIST: no name for Element\n"); 4251 ctxt->wellFormed = 0; 4252 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4253 return; 4254 } 4255 SKIP_BLANKS; 4256 GROW; 4257 while (RAW != '>') { 4258 const xmlChar *check = CUR_PTR; 4259 int type; 4260 int def; 4261 xmlChar *defaultValue = NULL; 4262 4263 GROW; 4264 tree = NULL; 4265 attrName = xmlParseName(ctxt); 4266 if (attrName == NULL) { 4267 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4269 ctxt->sax->error(ctxt->userData, 4270 "ATTLIST: no name for Attribute\n"); 4271 ctxt->wellFormed = 0; 4272 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4273 break; 4274 } 4275 GROW; 4276 if (!IS_BLANK(CUR)) { 4277 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4279 ctxt->sax->error(ctxt->userData, 4280 "Space required after the attribute name\n"); 4281 ctxt->wellFormed = 0; 4282 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4283 if (attrName != NULL) 4284 xmlFree(attrName); 4285 if (defaultValue != NULL) 4286 xmlFree(defaultValue); 4287 break; 4288 } 4289 SKIP_BLANKS; 4290 4291 type = xmlParseAttributeType(ctxt, &tree); 4292 if (type <= 0) { 4293 if (attrName != NULL) 4294 xmlFree(attrName); 4295 if (defaultValue != NULL) 4296 xmlFree(defaultValue); 4297 break; 4298 } 4299 4300 GROW; 4301 if (!IS_BLANK(CUR)) { 4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4304 ctxt->sax->error(ctxt->userData, 4305 "Space required after the attribute type\n"); 4306 ctxt->wellFormed = 0; 4307 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4308 if (attrName != NULL) 4309 xmlFree(attrName); 4310 if (defaultValue != NULL) 4311 xmlFree(defaultValue); 4312 if (tree != NULL) 4313 xmlFreeEnumeration(tree); 4314 break; 4315 } 4316 SKIP_BLANKS; 4317 4318 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4319 if (def <= 0) { 4320 if (attrName != NULL) 4321 xmlFree(attrName); 4322 if (defaultValue != NULL) 4323 xmlFree(defaultValue); 4324 if (tree != NULL) 4325 xmlFreeEnumeration(tree); 4326 break; 4327 } 4328 4329 GROW; 4330 if (RAW != '>') { 4331 if (!IS_BLANK(CUR)) { 4332 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4334 ctxt->sax->error(ctxt->userData, 4335 "Space required after the attribute default value\n"); 4336 ctxt->wellFormed = 0; 4337 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4338 if (attrName != NULL) 4339 xmlFree(attrName); 4340 if (defaultValue != NULL) 4341 xmlFree(defaultValue); 4342 if (tree != NULL) 4343 xmlFreeEnumeration(tree); 4344 break; 4345 } 4346 SKIP_BLANKS; 4347 } 4348 if (check == CUR_PTR) { 4349 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4351 ctxt->sax->error(ctxt->userData, 4352 "xmlParseAttributeListDecl: detected internal error\n"); 4353 if (attrName != NULL) 4354 xmlFree(attrName); 4355 if (defaultValue != NULL) 4356 xmlFree(defaultValue); 4357 if (tree != NULL) 4358 xmlFreeEnumeration(tree); 4359 break; 4360 } 4361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4362 (ctxt->sax->attributeDecl != NULL)) 4363 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4364 type, def, defaultValue, tree); 4365 if (attrName != NULL) 4366 xmlFree(attrName); 4367 if (defaultValue != NULL) 4368 xmlFree(defaultValue); 4369 GROW; 4370 } 4371 if (RAW == '>') { 4372 if (input != ctxt->input) { 4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4375 ctxt->sax->error(ctxt->userData, 4376"Attribute list declaration doesn't start and stop in the same entity\n"); 4377 ctxt->wellFormed = 0; 4378 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4379 } 4380 NEXT; 4381 } 4382 4383 xmlFree(elemName); 4384 } 4385} 4386 4387/** 4388 * xmlParseElementMixedContentDecl: 4389 * @ctxt: an XML parser context 4390 * @inputchk: the input used for the current entity, needed for boundary checks 4391 * 4392 * parse the declaration for a Mixed Element content 4393 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4394 * 4395 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4396 * '(' S? '#PCDATA' S? ')' 4397 * 4398 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4399 * 4400 * [ VC: No Duplicate Types ] 4401 * The same name must not appear more than once in a single 4402 * mixed-content declaration. 4403 * 4404 * returns: the list of the xmlElementContentPtr describing the element choices 4405 */ 4406xmlElementContentPtr 4407xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { 4408 xmlElementContentPtr ret = NULL, cur = NULL, n; 4409 xmlChar *elem = NULL; 4410 4411 GROW; 4412 if ((RAW == '#') && (NXT(1) == 'P') && 4413 (NXT(2) == 'C') && (NXT(3) == 'D') && 4414 (NXT(4) == 'A') && (NXT(5) == 'T') && 4415 (NXT(6) == 'A')) { 4416 SKIP(7); 4417 SKIP_BLANKS; 4418 SHRINK; 4419 if (RAW == ')') { 4420 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4422 if (ctxt->vctxt.error != NULL) 4423 ctxt->vctxt.error(ctxt->vctxt.userData, 4424"Element content declaration doesn't start and stop in the same entity\n"); 4425 ctxt->valid = 0; 4426 } 4427 NEXT; 4428 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4429 if (RAW == '*') { 4430 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4431 NEXT; 4432 } 4433 return(ret); 4434 } 4435 if ((RAW == '(') || (RAW == '|')) { 4436 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4437 if (ret == NULL) return(NULL); 4438 } 4439 while (RAW == '|') { 4440 NEXT; 4441 if (elem == NULL) { 4442 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4443 if (ret == NULL) return(NULL); 4444 ret->c1 = cur; 4445 if (cur != NULL) 4446 cur->parent = ret; 4447 cur = ret; 4448 } else { 4449 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4450 if (n == NULL) return(NULL); 4451 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4452 if (n->c1 != NULL) 4453 n->c1->parent = n; 4454 cur->c2 = n; 4455 if (n != NULL) 4456 n->parent = cur; 4457 cur = n; 4458 xmlFree(elem); 4459 } 4460 SKIP_BLANKS; 4461 elem = xmlParseName(ctxt); 4462 if (elem == NULL) { 4463 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4465 ctxt->sax->error(ctxt->userData, 4466 "xmlParseElementMixedContentDecl : Name expected\n"); 4467 ctxt->wellFormed = 0; 4468 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4469 xmlFreeElementContent(cur); 4470 return(NULL); 4471 } 4472 SKIP_BLANKS; 4473 GROW; 4474 } 4475 if ((RAW == ')') && (NXT(1) == '*')) { 4476 if (elem != NULL) { 4477 cur->c2 = xmlNewElementContent(elem, 4478 XML_ELEMENT_CONTENT_ELEMENT); 4479 if (cur->c2 != NULL) 4480 cur->c2->parent = cur; 4481 xmlFree(elem); 4482 } 4483 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4484 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4486 if (ctxt->vctxt.error != NULL) 4487 ctxt->vctxt.error(ctxt->vctxt.userData, 4488"Element content declaration doesn't start and stop in the same entity\n"); 4489 ctxt->valid = 0; 4490 } 4491 SKIP(2); 4492 } else { 4493 if (elem != NULL) xmlFree(elem); 4494 xmlFreeElementContent(ret); 4495 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4497 ctxt->sax->error(ctxt->userData, 4498 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4499 ctxt->wellFormed = 0; 4500 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4501 return(NULL); 4502 } 4503 4504 } else { 4505 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4507 ctxt->sax->error(ctxt->userData, 4508 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4509 ctxt->wellFormed = 0; 4510 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4511 } 4512 return(ret); 4513} 4514 4515/** 4516 * xmlParseElementChildrenContentDecl: 4517 * @ctxt: an XML parser context 4518 * @inputchk: the input used for the current entity, needed for boundary checks 4519 * 4520 * parse the declaration for a Mixed Element content 4521 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4522 * 4523 * 4524 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4525 * 4526 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4527 * 4528 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4529 * 4530 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4531 * 4532 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4533 * TODO Parameter-entity replacement text must be properly nested 4534 * with parenthesized groups. That is to say, if either of the 4535 * opening or closing parentheses in a choice, seq, or Mixed 4536 * construct is contained in the replacement text for a parameter 4537 * entity, both must be contained in the same replacement text. For 4538 * interoperability, if a parameter-entity reference appears in a 4539 * choice, seq, or Mixed construct, its replacement text should not 4540 * be empty, and neither the first nor last non-blank character of 4541 * the replacement text should be a connector (| or ,). 4542 * 4543 * Returns the tree of xmlElementContentPtr describing the element 4544 * hierarchy. 4545 */ 4546xmlElementContentPtr 4547xmlParseElementChildrenContentDecl 4548(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { 4549 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4550 xmlChar *elem; 4551 xmlChar type = 0; 4552 4553 SKIP_BLANKS; 4554 GROW; 4555 if (RAW == '(') { 4556 xmlParserInputPtr input = ctxt->input; 4557 4558 /* Recurse on first child */ 4559 NEXT; 4560 SKIP_BLANKS; 4561 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input); 4562 SKIP_BLANKS; 4563 GROW; 4564 } else { 4565 elem = xmlParseName(ctxt); 4566 if (elem == NULL) { 4567 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4569 ctxt->sax->error(ctxt->userData, 4570 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4571 ctxt->wellFormed = 0; 4572 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4573 return(NULL); 4574 } 4575 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4576 GROW; 4577 if (RAW == '?') { 4578 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4579 NEXT; 4580 } else if (RAW == '*') { 4581 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4582 NEXT; 4583 } else if (RAW == '+') { 4584 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4585 NEXT; 4586 } else { 4587 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4588 } 4589 xmlFree(elem); 4590 GROW; 4591 } 4592 SKIP_BLANKS; 4593 SHRINK; 4594 while (RAW != ')') { 4595 /* 4596 * Each loop we parse one separator and one element. 4597 */ 4598 if (RAW == ',') { 4599 if (type == 0) type = CUR; 4600 4601 /* 4602 * Detect "Name | Name , Name" error 4603 */ 4604 else if (type != CUR) { 4605 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4607 ctxt->sax->error(ctxt->userData, 4608 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4609 type); 4610 ctxt->wellFormed = 0; 4611 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4612 if ((last != NULL) && (last != ret)) 4613 xmlFreeElementContent(last); 4614 if (ret != NULL) 4615 xmlFreeElementContent(ret); 4616 return(NULL); 4617 } 4618 NEXT; 4619 4620 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4621 if (op == NULL) { 4622 if ((last != NULL) && (last != ret)) 4623 xmlFreeElementContent(last); 4624 xmlFreeElementContent(ret); 4625 return(NULL); 4626 } 4627 if (last == NULL) { 4628 op->c1 = ret; 4629 if (ret != NULL) 4630 ret->parent = op; 4631 ret = cur = op; 4632 } else { 4633 cur->c2 = op; 4634 if (op != NULL) 4635 op->parent = cur; 4636 op->c1 = last; 4637 if (last != NULL) 4638 last->parent = op; 4639 cur =op; 4640 last = NULL; 4641 } 4642 } else if (RAW == '|') { 4643 if (type == 0) type = CUR; 4644 4645 /* 4646 * Detect "Name , Name | Name" error 4647 */ 4648 else if (type != CUR) { 4649 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4651 ctxt->sax->error(ctxt->userData, 4652 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4653 type); 4654 ctxt->wellFormed = 0; 4655 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4656 if ((last != NULL) && (last != ret)) 4657 xmlFreeElementContent(last); 4658 if (ret != NULL) 4659 xmlFreeElementContent(ret); 4660 return(NULL); 4661 } 4662 NEXT; 4663 4664 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4665 if (op == NULL) { 4666 if ((last != NULL) && (last != ret)) 4667 xmlFreeElementContent(last); 4668 if (ret != NULL) 4669 xmlFreeElementContent(ret); 4670 return(NULL); 4671 } 4672 if (last == NULL) { 4673 op->c1 = ret; 4674 if (ret != NULL) 4675 ret->parent = op; 4676 ret = cur = op; 4677 } else { 4678 cur->c2 = op; 4679 if (op != NULL) 4680 op->parent = cur; 4681 op->c1 = last; 4682 if (last != NULL) 4683 last->parent = op; 4684 cur =op; 4685 last = NULL; 4686 } 4687 } else { 4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4690 ctxt->sax->error(ctxt->userData, 4691 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4692 ctxt->wellFormed = 0; 4693 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4694 if (ret != NULL) 4695 xmlFreeElementContent(ret); 4696 return(NULL); 4697 } 4698 GROW; 4699 SKIP_BLANKS; 4700 GROW; 4701 if (RAW == '(') { 4702 xmlParserInputPtr input = ctxt->input; 4703 /* Recurse on second child */ 4704 NEXT; 4705 SKIP_BLANKS; 4706 last = xmlParseElementChildrenContentDecl(ctxt, input); 4707 SKIP_BLANKS; 4708 } else { 4709 elem = xmlParseName(ctxt); 4710 if (elem == NULL) { 4711 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4713 ctxt->sax->error(ctxt->userData, 4714 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4715 ctxt->wellFormed = 0; 4716 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4717 if (ret != NULL) 4718 xmlFreeElementContent(ret); 4719 return(NULL); 4720 } 4721 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4722 xmlFree(elem); 4723 if (RAW == '?') { 4724 last->ocur = XML_ELEMENT_CONTENT_OPT; 4725 NEXT; 4726 } else if (RAW == '*') { 4727 last->ocur = XML_ELEMENT_CONTENT_MULT; 4728 NEXT; 4729 } else if (RAW == '+') { 4730 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4731 NEXT; 4732 } else { 4733 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4734 } 4735 } 4736 SKIP_BLANKS; 4737 GROW; 4738 } 4739 if ((cur != NULL) && (last != NULL)) { 4740 cur->c2 = last; 4741 if (last != NULL) 4742 last->parent = cur; 4743 } 4744 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4746 if (ctxt->vctxt.error != NULL) 4747 ctxt->vctxt.error(ctxt->vctxt.userData, 4748"Element content declaration doesn't start and stop in the same entity\n"); 4749 ctxt->valid = 0; 4750 } 4751 NEXT; 4752 if (RAW == '?') { 4753 if (ret != NULL) 4754 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4755 NEXT; 4756 } else if (RAW == '*') { 4757 if (ret != NULL) { 4758 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4759 cur = ret; 4760 /* 4761 * Some normalization: 4762 * (a | b* | c?)* == (a | b | c)* 4763 */ 4764 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4765 if ((cur->c1 != NULL) && 4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4769 if ((cur->c2 != NULL) && 4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4773 cur = cur->c2; 4774 } 4775 } 4776 NEXT; 4777 } else if (RAW == '+') { 4778 if (ret != NULL) { 4779 int found = 0; 4780 4781 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4782 /* 4783 * Some normalization: 4784 * (a | b*)+ == (a | b)* 4785 * (a | b?)+ == (a | b)* 4786 */ 4787 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4788 if ((cur->c1 != NULL) && 4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4792 found = 1; 4793 } 4794 if ((cur->c2 != NULL) && 4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4798 found = 1; 4799 } 4800 cur = cur->c2; 4801 } 4802 if (found) 4803 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4804 } 4805 NEXT; 4806 } 4807 return(ret); 4808} 4809 4810/** 4811 * xmlParseElementContentDecl: 4812 * @ctxt: an XML parser context 4813 * @name: the name of the element being defined. 4814 * @result: the Element Content pointer will be stored here if any 4815 * 4816 * parse the declaration for an Element content either Mixed or Children, 4817 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4818 * 4819 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4820 * 4821 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4822 */ 4823 4824int 4825xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4826 xmlElementContentPtr *result) { 4827 4828 xmlElementContentPtr tree = NULL; 4829 xmlParserInputPtr input = ctxt->input; 4830 int res; 4831 4832 *result = NULL; 4833 4834 if (RAW != '(') { 4835 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4837 ctxt->sax->error(ctxt->userData, 4838 "xmlParseElementContentDecl : %s '(' expected\n", name); 4839 ctxt->wellFormed = 0; 4840 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4841 return(-1); 4842 } 4843 NEXT; 4844 GROW; 4845 SKIP_BLANKS; 4846 if ((RAW == '#') && (NXT(1) == 'P') && 4847 (NXT(2) == 'C') && (NXT(3) == 'D') && 4848 (NXT(4) == 'A') && (NXT(5) == 'T') && 4849 (NXT(6) == 'A')) { 4850 tree = xmlParseElementMixedContentDecl(ctxt, input); 4851 res = XML_ELEMENT_TYPE_MIXED; 4852 } else { 4853 tree = xmlParseElementChildrenContentDecl(ctxt, input); 4854 res = XML_ELEMENT_TYPE_ELEMENT; 4855 } 4856 SKIP_BLANKS; 4857 *result = tree; 4858 return(res); 4859} 4860 4861/** 4862 * xmlParseElementDecl: 4863 * @ctxt: an XML parser context 4864 * 4865 * parse an Element declaration. 4866 * 4867 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4868 * 4869 * [ VC: Unique Element Type Declaration ] 4870 * No element type may be declared more than once 4871 * 4872 * Returns the type of the element, or -1 in case of error 4873 */ 4874int 4875xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4876 xmlChar *name; 4877 int ret = -1; 4878 xmlElementContentPtr content = NULL; 4879 4880 GROW; 4881 if ((RAW == '<') && (NXT(1) == '!') && 4882 (NXT(2) == 'E') && (NXT(3) == 'L') && 4883 (NXT(4) == 'E') && (NXT(5) == 'M') && 4884 (NXT(6) == 'E') && (NXT(7) == 'N') && 4885 (NXT(8) == 'T')) { 4886 xmlParserInputPtr input = ctxt->input; 4887 4888 SKIP(9); 4889 if (!IS_BLANK(CUR)) { 4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4892 ctxt->sax->error(ctxt->userData, 4893 "Space required after 'ELEMENT'\n"); 4894 ctxt->wellFormed = 0; 4895 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4896 } 4897 SKIP_BLANKS; 4898 name = xmlParseName(ctxt); 4899 if (name == NULL) { 4900 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4902 ctxt->sax->error(ctxt->userData, 4903 "xmlParseElementDecl: no name for Element\n"); 4904 ctxt->wellFormed = 0; 4905 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4906 return(-1); 4907 } 4908 while ((RAW == 0) && (ctxt->inputNr > 1)) 4909 xmlPopInput(ctxt); 4910 if (!IS_BLANK(CUR)) { 4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4913 ctxt->sax->error(ctxt->userData, 4914 "Space required after the element name\n"); 4915 ctxt->wellFormed = 0; 4916 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4917 } 4918 SKIP_BLANKS; 4919 if ((RAW == 'E') && (NXT(1) == 'M') && 4920 (NXT(2) == 'P') && (NXT(3) == 'T') && 4921 (NXT(4) == 'Y')) { 4922 SKIP(5); 4923 /* 4924 * Element must always be empty. 4925 */ 4926 ret = XML_ELEMENT_TYPE_EMPTY; 4927 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4928 (NXT(2) == 'Y')) { 4929 SKIP(3); 4930 /* 4931 * Element is a generic container. 4932 */ 4933 ret = XML_ELEMENT_TYPE_ANY; 4934 } else if (RAW == '(') { 4935 ret = xmlParseElementContentDecl(ctxt, name, &content); 4936 } else { 4937 /* 4938 * [ WFC: PEs in Internal Subset ] error handling. 4939 */ 4940 if ((RAW == '%') && (ctxt->external == 0) && 4941 (ctxt->inputNr == 1)) { 4942 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4944 ctxt->sax->error(ctxt->userData, 4945 "PEReference: forbidden within markup decl in internal subset\n"); 4946 } else { 4947 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4949 ctxt->sax->error(ctxt->userData, 4950 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4951 } 4952 ctxt->wellFormed = 0; 4953 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4954 if (name != NULL) xmlFree(name); 4955 return(-1); 4956 } 4957 4958 SKIP_BLANKS; 4959 /* 4960 * Pop-up of finished entities. 4961 */ 4962 while ((RAW == 0) && (ctxt->inputNr > 1)) 4963 xmlPopInput(ctxt); 4964 SKIP_BLANKS; 4965 4966 if (RAW != '>') { 4967 ctxt->errNo = XML_ERR_GT_REQUIRED; 4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4969 ctxt->sax->error(ctxt->userData, 4970 "xmlParseElementDecl: expected '>' at the end\n"); 4971 ctxt->wellFormed = 0; 4972 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4973 } else { 4974 if (input != ctxt->input) { 4975 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4977 ctxt->sax->error(ctxt->userData, 4978"Element declaration doesn't start and stop in the same entity\n"); 4979 ctxt->wellFormed = 0; 4980 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 4981 } 4982 4983 NEXT; 4984 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4985 (ctxt->sax->elementDecl != NULL)) 4986 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4987 content); 4988 } 4989 if (content != NULL) { 4990 xmlFreeElementContent(content); 4991 } 4992 if (name != NULL) { 4993 xmlFree(name); 4994 } 4995 } 4996 return(ret); 4997} 4998 4999/** 5000 * xmlParseConditionalSections 5001 * @ctxt: an XML parser context 5002 * 5003 * [61] conditionalSect ::= includeSect | ignoreSect 5004 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5005 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5006 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5007 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5008 */ 5009 5010static void 5011xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5012 SKIP(3); 5013 SKIP_BLANKS; 5014 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 5015 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 5016 (NXT(6) == 'E')) { 5017 SKIP(7); 5018 SKIP_BLANKS; 5019 if (RAW != '[') { 5020 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 5021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5022 ctxt->sax->error(ctxt->userData, 5023 "XML conditional section '[' expected\n"); 5024 ctxt->wellFormed = 0; 5025 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5026 } else { 5027 NEXT; 5028 } 5029 if (xmlParserDebugEntities) { 5030 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5031 xmlGenericError(xmlGenericErrorContext, 5032 "%s(%d): ", ctxt->input->filename, 5033 ctxt->input->line); 5034 xmlGenericError(xmlGenericErrorContext, 5035 "Entering INCLUDE Conditional Section\n"); 5036 } 5037 5038 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5039 (NXT(2) != '>'))) { 5040 const xmlChar *check = CUR_PTR; 5041 int cons = ctxt->input->consumed; 5042 5043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5044 xmlParseConditionalSections(ctxt); 5045 } else if (IS_BLANK(CUR)) { 5046 NEXT; 5047 } else if (RAW == '%') { 5048 xmlParsePEReference(ctxt); 5049 } else 5050 xmlParseMarkupDecl(ctxt); 5051 5052 /* 5053 * Pop-up of finished entities. 5054 */ 5055 while ((RAW == 0) && (ctxt->inputNr > 1)) 5056 xmlPopInput(ctxt); 5057 5058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5059 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5061 ctxt->sax->error(ctxt->userData, 5062 "Content error in the external subset\n"); 5063 ctxt->wellFormed = 0; 5064 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5065 break; 5066 } 5067 } 5068 if (xmlParserDebugEntities) { 5069 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5070 xmlGenericError(xmlGenericErrorContext, 5071 "%s(%d): ", ctxt->input->filename, 5072 ctxt->input->line); 5073 xmlGenericError(xmlGenericErrorContext, 5074 "Leaving INCLUDE Conditional Section\n"); 5075 } 5076 5077 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 5078 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 5079 int state; 5080 int instate; 5081 int depth = 0; 5082 5083 SKIP(6); 5084 SKIP_BLANKS; 5085 if (RAW != '[') { 5086 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5088 ctxt->sax->error(ctxt->userData, 5089 "XML conditional section '[' expected\n"); 5090 ctxt->wellFormed = 0; 5091 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5092 } else { 5093 NEXT; 5094 } 5095 if (xmlParserDebugEntities) { 5096 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5097 xmlGenericError(xmlGenericErrorContext, 5098 "%s(%d): ", ctxt->input->filename, 5099 ctxt->input->line); 5100 xmlGenericError(xmlGenericErrorContext, 5101 "Entering IGNORE Conditional Section\n"); 5102 } 5103 5104 /* 5105 * Parse up to the end of the conditional section 5106 * But disable SAX event generating DTD building in the meantime 5107 */ 5108 state = ctxt->disableSAX; 5109 instate = ctxt->instate; 5110 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5111 ctxt->instate = XML_PARSER_IGNORE; 5112 5113 while ((depth >= 0) && (RAW != 0)) { 5114 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5115 depth++; 5116 SKIP(3); 5117 continue; 5118 } 5119 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5120 if (--depth >= 0) SKIP(3); 5121 continue; 5122 } 5123 NEXT; 5124 continue; 5125 } 5126 5127 ctxt->disableSAX = state; 5128 ctxt->instate = instate; 5129 5130 if (xmlParserDebugEntities) { 5131 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5132 xmlGenericError(xmlGenericErrorContext, 5133 "%s(%d): ", ctxt->input->filename, 5134 ctxt->input->line); 5135 xmlGenericError(xmlGenericErrorContext, 5136 "Leaving IGNORE Conditional Section\n"); 5137 } 5138 5139 } else { 5140 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5142 ctxt->sax->error(ctxt->userData, 5143 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 5144 ctxt->wellFormed = 0; 5145 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5146 } 5147 5148 if (RAW == 0) 5149 SHRINK; 5150 5151 if (RAW == 0) { 5152 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5154 ctxt->sax->error(ctxt->userData, 5155 "XML conditional section not closed\n"); 5156 ctxt->wellFormed = 0; 5157 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5158 } else { 5159 SKIP(3); 5160 } 5161} 5162 5163/** 5164 * xmlParseMarkupDecl: 5165 * @ctxt: an XML parser context 5166 * 5167 * parse Markup declarations 5168 * 5169 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5170 * NotationDecl | PI | Comment 5171 * 5172 * [ VC: Proper Declaration/PE Nesting ] 5173 * Parameter-entity replacement text must be properly nested with 5174 * markup declarations. That is to say, if either the first character 5175 * or the last character of a markup declaration (markupdecl above) is 5176 * contained in the replacement text for a parameter-entity reference, 5177 * both must be contained in the same replacement text. 5178 * 5179 * [ WFC: PEs in Internal Subset ] 5180 * In the internal DTD subset, parameter-entity references can occur 5181 * only where markup declarations can occur, not within markup declarations. 5182 * (This does not apply to references that occur in external parameter 5183 * entities or to the external subset.) 5184 */ 5185void 5186xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5187 GROW; 5188 xmlParseElementDecl(ctxt); 5189 xmlParseAttributeListDecl(ctxt); 5190 xmlParseEntityDecl(ctxt); 5191 xmlParseNotationDecl(ctxt); 5192 xmlParsePI(ctxt); 5193 xmlParseComment(ctxt); 5194 /* 5195 * This is only for internal subset. On external entities, 5196 * the replacement is done before parsing stage 5197 */ 5198 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5199 xmlParsePEReference(ctxt); 5200 5201 /* 5202 * Conditional sections are allowed from entities included 5203 * by PE References in the internal subset. 5204 */ 5205 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5206 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5207 xmlParseConditionalSections(ctxt); 5208 } 5209 } 5210 5211 ctxt->instate = XML_PARSER_DTD; 5212} 5213 5214/** 5215 * xmlParseTextDecl: 5216 * @ctxt: an XML parser context 5217 * 5218 * parse an XML declaration header for external entities 5219 * 5220 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5221 * 5222 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5223 */ 5224 5225void 5226xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5227 xmlChar *version; 5228 5229 /* 5230 * We know that '<?xml' is here. 5231 */ 5232 if ((RAW == '<') && (NXT(1) == '?') && 5233 (NXT(2) == 'x') && (NXT(3) == 'm') && 5234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5235 SKIP(5); 5236 } else { 5237 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 5238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5239 ctxt->sax->error(ctxt->userData, 5240 "Text declaration '<?xml' required\n"); 5241 ctxt->wellFormed = 0; 5242 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5243 5244 return; 5245 } 5246 5247 if (!IS_BLANK(CUR)) { 5248 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5250 ctxt->sax->error(ctxt->userData, 5251 "Space needed after '<?xml'\n"); 5252 ctxt->wellFormed = 0; 5253 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5254 } 5255 SKIP_BLANKS; 5256 5257 /* 5258 * We may have the VersionInfo here. 5259 */ 5260 version = xmlParseVersionInfo(ctxt); 5261 if (version == NULL) 5262 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5263 else { 5264 if (!IS_BLANK(CUR)) { 5265 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5267 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 5268 ctxt->wellFormed = 0; 5269 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5270 } 5271 } 5272 ctxt->input->version = version; 5273 5274 /* 5275 * We must have the encoding declaration 5276 */ 5277 xmlParseEncodingDecl(ctxt); 5278 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5279 /* 5280 * The XML REC instructs us to stop parsing right here 5281 */ 5282 return; 5283 } 5284 5285 SKIP_BLANKS; 5286 if ((RAW == '?') && (NXT(1) == '>')) { 5287 SKIP(2); 5288 } else if (RAW == '>') { 5289 /* Deprecated old WD ... */ 5290 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5292 ctxt->sax->error(ctxt->userData, 5293 "XML declaration must end-up with '?>'\n"); 5294 ctxt->wellFormed = 0; 5295 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5296 NEXT; 5297 } else { 5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5300 ctxt->sax->error(ctxt->userData, 5301 "parsing XML declaration: '?>' expected\n"); 5302 ctxt->wellFormed = 0; 5303 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5304 MOVETO_ENDTAG(CUR_PTR); 5305 NEXT; 5306 } 5307} 5308 5309/** 5310 * xmlParseExternalSubset: 5311 * @ctxt: an XML parser context 5312 * @ExternalID: the external identifier 5313 * @SystemID: the system identifier (or URL) 5314 * 5315 * parse Markup declarations from an external subset 5316 * 5317 * [30] extSubset ::= textDecl? extSubsetDecl 5318 * 5319 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5320 */ 5321void 5322xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5323 const xmlChar *SystemID) { 5324 GROW; 5325 if ((RAW == '<') && (NXT(1) == '?') && 5326 (NXT(2) == 'x') && (NXT(3) == 'm') && 5327 (NXT(4) == 'l')) { 5328 xmlParseTextDecl(ctxt); 5329 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5330 /* 5331 * The XML REC instructs us to stop parsing right here 5332 */ 5333 ctxt->instate = XML_PARSER_EOF; 5334 return; 5335 } 5336 } 5337 if (ctxt->myDoc == NULL) { 5338 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5339 } 5340 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5341 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5342 5343 ctxt->instate = XML_PARSER_DTD; 5344 ctxt->external = 1; 5345 while (((RAW == '<') && (NXT(1) == '?')) || 5346 ((RAW == '<') && (NXT(1) == '!')) || 5347 (RAW == '%') || IS_BLANK(CUR)) { 5348 const xmlChar *check = CUR_PTR; 5349 int cons = ctxt->input->consumed; 5350 5351 GROW; 5352 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5353 xmlParseConditionalSections(ctxt); 5354 } else if (IS_BLANK(CUR)) { 5355 NEXT; 5356 } else if (RAW == '%') { 5357 xmlParsePEReference(ctxt); 5358 } else 5359 xmlParseMarkupDecl(ctxt); 5360 5361 /* 5362 * Pop-up of finished entities. 5363 */ 5364 while ((RAW == 0) && (ctxt->inputNr > 1)) 5365 xmlPopInput(ctxt); 5366 5367 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5368 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5370 ctxt->sax->error(ctxt->userData, 5371 "Content error in the external subset\n"); 5372 ctxt->wellFormed = 0; 5373 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5374 break; 5375 } 5376 } 5377 5378 if (RAW != 0) { 5379 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5381 ctxt->sax->error(ctxt->userData, 5382 "Extra content at the end of the document\n"); 5383 ctxt->wellFormed = 0; 5384 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5385 } 5386 5387} 5388 5389/** 5390 * xmlParseReference: 5391 * @ctxt: an XML parser context 5392 * 5393 * parse and handle entity references in content, depending on the SAX 5394 * interface, this may end-up in a call to character() if this is a 5395 * CharRef, a predefined entity, if there is no reference() callback. 5396 * or if the parser was asked to switch to that mode. 5397 * 5398 * [67] Reference ::= EntityRef | CharRef 5399 */ 5400void 5401xmlParseReference(xmlParserCtxtPtr ctxt) { 5402 xmlEntityPtr ent; 5403 xmlChar *val; 5404 if (RAW != '&') return; 5405 5406 if (NXT(1) == '#') { 5407 int i = 0; 5408 xmlChar out[10]; 5409 int hex = NXT(2); 5410 int value = xmlParseCharRef(ctxt); 5411 5412 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5413 /* 5414 * So we are using non-UTF-8 buffers 5415 * Check that the char fit on 8bits, if not 5416 * generate a CharRef. 5417 */ 5418 if (value <= 0xFF) { 5419 out[0] = value; 5420 out[1] = 0; 5421 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5422 (!ctxt->disableSAX)) 5423 ctxt->sax->characters(ctxt->userData, out, 1); 5424 } else { 5425 if ((hex == 'x') || (hex == 'X')) 5426 snprintf((char *)out, sizeof(out), "#x%X", value); 5427 else 5428 snprintf((char *)out, sizeof(out), "#%d", value); 5429 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5430 (!ctxt->disableSAX)) 5431 ctxt->sax->reference(ctxt->userData, out); 5432 } 5433 } else { 5434 /* 5435 * Just encode the value in UTF-8 5436 */ 5437 COPY_BUF(0 ,out, i, value); 5438 out[i] = 0; 5439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5440 (!ctxt->disableSAX)) 5441 ctxt->sax->characters(ctxt->userData, out, i); 5442 } 5443 } else { 5444 ent = xmlParseEntityRef(ctxt); 5445 if (ent == NULL) return; 5446 if (!ctxt->wellFormed) 5447 return; 5448 if ((ent->name != NULL) && 5449 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5450 xmlNodePtr list = NULL; 5451 int ret; 5452 5453 5454 /* 5455 * The first reference to the entity trigger a parsing phase 5456 * where the ent->children is filled with the result from 5457 * the parsing. 5458 */ 5459 if (ent->children == NULL) { 5460 xmlChar *value; 5461 value = ent->content; 5462 5463 /* 5464 * Check that this entity is well formed 5465 */ 5466 if ((value != NULL) && 5467 (value[1] == 0) && (value[0] == '<') && 5468 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5469 /* 5470 * DONE: get definite answer on this !!! 5471 * Lots of entity decls are used to declare a single 5472 * char 5473 * <!ENTITY lt "<"> 5474 * Which seems to be valid since 5475 * 2.4: The ampersand character (&) and the left angle 5476 * bracket (<) may appear in their literal form only 5477 * when used ... They are also legal within the literal 5478 * entity value of an internal entity declaration;i 5479 * see "4.3.2 Well-Formed Parsed Entities". 5480 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5481 * Looking at the OASIS test suite and James Clark 5482 * tests, this is broken. However the XML REC uses 5483 * it. Is the XML REC not well-formed ???? 5484 * This is a hack to avoid this problem 5485 * 5486 * ANSWER: since lt gt amp .. are already defined, 5487 * this is a redefinition and hence the fact that the 5488 * content is not well balanced is not a Wf error, this 5489 * is lousy but acceptable. 5490 */ 5491 list = xmlNewDocText(ctxt->myDoc, value); 5492 if (list != NULL) { 5493 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5494 (ent->children == NULL)) { 5495 ent->children = list; 5496 ent->last = list; 5497 ent->owner = 1; 5498 list->parent = (xmlNodePtr) ent; 5499 } else { 5500 xmlFreeNodeList(list); 5501 } 5502 } else if (list != NULL) { 5503 xmlFreeNodeList(list); 5504 } 5505 } else { 5506 /* 5507 * 4.3.2: An internal general parsed entity is well-formed 5508 * if its replacement text matches the production labeled 5509 * content. 5510 */ 5511 5512 void *user_data; 5513 /* 5514 * This is a bit hackish but this seems the best 5515 * way to make sure both SAX and DOM entity support 5516 * behaves okay. 5517 */ 5518 if (ctxt->userData == ctxt) 5519 user_data = NULL; 5520 else 5521 user_data = ctxt->userData; 5522 5523 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5524 ctxt->depth++; 5525 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 5526 value, user_data, &list); 5527 ctxt->depth--; 5528 } else if (ent->etype == 5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5530 ctxt->depth++; 5531 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5532 ctxt->sax, user_data, ctxt->depth, 5533 ent->URI, ent->ExternalID, &list); 5534 ctxt->depth--; 5535 } else { 5536 ret = -1; 5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5538 ctxt->sax->error(ctxt->userData, 5539 "Internal: invalid entity type\n"); 5540 } 5541 if (ret == XML_ERR_ENTITY_LOOP) { 5542 ctxt->errNo = XML_ERR_ENTITY_LOOP; 5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5544 ctxt->sax->error(ctxt->userData, 5545 "Detected entity reference loop\n"); 5546 ctxt->wellFormed = 0; 5547 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5548 return; 5549 } else if ((ret == 0) && (list != NULL)) { 5550 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5551 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5552 (ent->children == NULL)) { 5553 ent->children = list; 5554 if (ctxt->replaceEntities) { 5555 /* 5556 * Prune it directly in the generated document 5557 * except for single text nodes. 5558 */ 5559 if ((list->type == XML_TEXT_NODE) && 5560 (list->next == NULL)) { 5561 list->parent = (xmlNodePtr) ent; 5562 list = NULL; 5563 ent->owner = 1; 5564 } else { 5565 ent->owner = 0; 5566 while (list != NULL) { 5567 list->parent = (xmlNodePtr) ctxt->node; 5568 list->doc = ctxt->myDoc; 5569 if (list->next == NULL) 5570 ent->last = list; 5571 list = list->next; 5572 } 5573 list = ent->children; 5574 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5575 xmlAddEntityReference(ent, list, NULL); 5576 } 5577 } else { 5578 ent->owner = 1; 5579 while (list != NULL) { 5580 list->parent = (xmlNodePtr) ent; 5581 if (list->next == NULL) 5582 ent->last = list; 5583 list = list->next; 5584 } 5585 } 5586 } else { 5587 xmlFreeNodeList(list); 5588 list = NULL; 5589 } 5590 } else if (ret > 0) { 5591 ctxt->errNo = ret; 5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5593 ctxt->sax->error(ctxt->userData, 5594 "Entity value required\n"); 5595 ctxt->wellFormed = 0; 5596 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5597 } else if (list != NULL) { 5598 xmlFreeNodeList(list); 5599 list = NULL; 5600 } 5601 } 5602 } 5603 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5604 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5605 /* 5606 * Create a node. 5607 */ 5608 ctxt->sax->reference(ctxt->userData, ent->name); 5609 return; 5610 } else if (ctxt->replaceEntities) { 5611 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5612 /* 5613 * Seems we are generating the DOM content, do 5614 * a simple tree copy for all references except the first 5615 * In the first occurrence list contains the replacement 5616 */ 5617 if (list == NULL) { 5618 xmlNodePtr new = NULL, cur, firstChild = NULL; 5619 cur = ent->children; 5620 while (cur != NULL) { 5621 new = xmlCopyNode(cur, 1); 5622 if (new != NULL) { 5623 new->_private = cur->_private; 5624 if (firstChild == NULL){ 5625 firstChild = new; 5626 } 5627 xmlAddChild(ctxt->node, new); 5628 } 5629 if (cur == ent->last) 5630 break; 5631 cur = cur->next; 5632 } 5633 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5634 xmlAddEntityReference(ent, firstChild, new); 5635 } else { 5636 /* 5637 * the name change is to avoid coalescing of the 5638 * node with a possible previous text one which 5639 * would make ent->children a dangling pointer 5640 */ 5641 if (ent->children->type == XML_TEXT_NODE) 5642 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5643 if ((ent->last != ent->children) && 5644 (ent->last->type == XML_TEXT_NODE)) 5645 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5646 xmlAddChildList(ctxt->node, ent->children); 5647 } 5648 5649 /* 5650 * This is to avoid a nasty side effect, see 5651 * characters() in SAX.c 5652 */ 5653 ctxt->nodemem = 0; 5654 ctxt->nodelen = 0; 5655 return; 5656 } else { 5657 /* 5658 * Probably running in SAX mode 5659 */ 5660 xmlParserInputPtr input; 5661 5662 input = xmlNewEntityInputStream(ctxt, ent); 5663 xmlPushInput(ctxt, input); 5664 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5665 (RAW == '<') && (NXT(1) == '?') && 5666 (NXT(2) == 'x') && (NXT(3) == 'm') && 5667 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5668 xmlParseTextDecl(ctxt); 5669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5670 /* 5671 * The XML REC instructs us to stop parsing right here 5672 */ 5673 ctxt->instate = XML_PARSER_EOF; 5674 return; 5675 } 5676 if (input->standalone == 1) { 5677 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5679 ctxt->sax->error(ctxt->userData, 5680 "external parsed entities cannot be standalone\n"); 5681 ctxt->wellFormed = 0; 5682 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5683 } 5684 } 5685 return; 5686 } 5687 } 5688 } else { 5689 val = ent->content; 5690 if (val == NULL) return; 5691 /* 5692 * inline the entity. 5693 */ 5694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5695 (!ctxt->disableSAX)) 5696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5697 } 5698 } 5699} 5700 5701/** 5702 * xmlParseEntityRef: 5703 * @ctxt: an XML parser context 5704 * 5705 * parse ENTITY references declarations 5706 * 5707 * [68] EntityRef ::= '&' Name ';' 5708 * 5709 * [ WFC: Entity Declared ] 5710 * In a document without any DTD, a document with only an internal DTD 5711 * subset which contains no parameter entity references, or a document 5712 * with "standalone='yes'", the Name given in the entity reference 5713 * must match that in an entity declaration, except that well-formed 5714 * documents need not declare any of the following entities: amp, lt, 5715 * gt, apos, quot. The declaration of a parameter entity must precede 5716 * any reference to it. Similarly, the declaration of a general entity 5717 * must precede any reference to it which appears in a default value in an 5718 * attribute-list declaration. Note that if entities are declared in the 5719 * external subset or in external parameter entities, a non-validating 5720 * processor is not obligated to read and process their declarations; 5721 * for such documents, the rule that an entity must be declared is a 5722 * well-formedness constraint only if standalone='yes'. 5723 * 5724 * [ WFC: Parsed Entity ] 5725 * An entity reference must not contain the name of an unparsed entity 5726 * 5727 * Returns the xmlEntityPtr if found, or NULL otherwise. 5728 */ 5729xmlEntityPtr 5730xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5731 xmlChar *name; 5732 xmlEntityPtr ent = NULL; 5733 5734 GROW; 5735 5736 if (RAW == '&') { 5737 NEXT; 5738 name = xmlParseName(ctxt); 5739 if (name == NULL) { 5740 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5742 ctxt->sax->error(ctxt->userData, 5743 "xmlParseEntityRef: no name\n"); 5744 ctxt->wellFormed = 0; 5745 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5746 } else { 5747 if (RAW == ';') { 5748 NEXT; 5749 /* 5750 * Ask first SAX for entity resolution, otherwise try the 5751 * predefined set. 5752 */ 5753 if (ctxt->sax != NULL) { 5754 if (ctxt->sax->getEntity != NULL) 5755 ent = ctxt->sax->getEntity(ctxt->userData, name); 5756 if (ent == NULL) 5757 ent = xmlGetPredefinedEntity(name); 5758 if ((ent == NULL) && (ctxt->userData==ctxt)) { 5759 ent = getEntity(ctxt, name); 5760 } 5761 } 5762 /* 5763 * [ WFC: Entity Declared ] 5764 * In a document without any DTD, a document with only an 5765 * internal DTD subset which contains no parameter entity 5766 * references, or a document with "standalone='yes'", the 5767 * Name given in the entity reference must match that in an 5768 * entity declaration, except that well-formed documents 5769 * need not declare any of the following entities: amp, lt, 5770 * gt, apos, quot. 5771 * The declaration of a parameter entity must precede any 5772 * reference to it. 5773 * Similarly, the declaration of a general entity must 5774 * precede any reference to it which appears in a default 5775 * value in an attribute-list declaration. Note that if 5776 * entities are declared in the external subset or in 5777 * external parameter entities, a non-validating processor 5778 * is not obligated to read and process their declarations; 5779 * for such documents, the rule that an entity must be 5780 * declared is a well-formedness constraint only if 5781 * standalone='yes'. 5782 */ 5783 if (ent == NULL) { 5784 if ((ctxt->standalone == 1) || 5785 ((ctxt->hasExternalSubset == 0) && 5786 (ctxt->hasPErefs == 0))) { 5787 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5789 ctxt->sax->error(ctxt->userData, 5790 "Entity '%s' not defined\n", name); 5791 ctxt->wellFormed = 0; 5792 ctxt->valid = 0; 5793 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5794 } else { 5795 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5797 ctxt->sax->error(ctxt->userData, 5798 "Entity '%s' not defined\n", name); 5799 ctxt->valid = 0; 5800 } 5801 } 5802 5803 /* 5804 * [ WFC: Parsed Entity ] 5805 * An entity reference must not contain the name of an 5806 * unparsed entity 5807 */ 5808 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5809 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5811 ctxt->sax->error(ctxt->userData, 5812 "Entity reference to unparsed entity %s\n", name); 5813 ctxt->wellFormed = 0; 5814 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5815 } 5816 5817 /* 5818 * [ WFC: No External Entity References ] 5819 * Attribute values cannot contain direct or indirect 5820 * entity references to external entities. 5821 */ 5822 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5823 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5824 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5826 ctxt->sax->error(ctxt->userData, 5827 "Attribute references external entity '%s'\n", name); 5828 ctxt->wellFormed = 0; 5829 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5830 } 5831 /* 5832 * [ WFC: No < in Attribute Values ] 5833 * The replacement text of any entity referred to directly or 5834 * indirectly in an attribute value (other than "<") must 5835 * not contain a <. 5836 */ 5837 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5838 (ent != NULL) && 5839 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5840 (ent->content != NULL) && 5841 (xmlStrchr(ent->content, '<'))) { 5842 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5844 ctxt->sax->error(ctxt->userData, 5845 "'<' in entity '%s' is not allowed in attributes values\n", name); 5846 ctxt->wellFormed = 0; 5847 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5848 } 5849 5850 /* 5851 * Internal check, no parameter entities here ... 5852 */ 5853 else { 5854 switch (ent->etype) { 5855 case XML_INTERNAL_PARAMETER_ENTITY: 5856 case XML_EXTERNAL_PARAMETER_ENTITY: 5857 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5859 ctxt->sax->error(ctxt->userData, 5860 "Attempt to reference the parameter entity '%s'\n", name); 5861 ctxt->wellFormed = 0; 5862 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5863 break; 5864 default: 5865 break; 5866 } 5867 } 5868 5869 /* 5870 * [ WFC: No Recursion ] 5871 * A parsed entity must not contain a recursive reference 5872 * to itself, either directly or indirectly. 5873 * Done somewhere else 5874 */ 5875 5876 } else { 5877 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5879 ctxt->sax->error(ctxt->userData, 5880 "xmlParseEntityRef: expecting ';'\n"); 5881 ctxt->wellFormed = 0; 5882 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5883 } 5884 xmlFree(name); 5885 } 5886 } 5887 return(ent); 5888} 5889 5890/** 5891 * xmlParseStringEntityRef: 5892 * @ctxt: an XML parser context 5893 * @str: a pointer to an index in the string 5894 * 5895 * parse ENTITY references declarations, but this version parses it from 5896 * a string value. 5897 * 5898 * [68] EntityRef ::= '&' Name ';' 5899 * 5900 * [ WFC: Entity Declared ] 5901 * In a document without any DTD, a document with only an internal DTD 5902 * subset which contains no parameter entity references, or a document 5903 * with "standalone='yes'", the Name given in the entity reference 5904 * must match that in an entity declaration, except that well-formed 5905 * documents need not declare any of the following entities: amp, lt, 5906 * gt, apos, quot. The declaration of a parameter entity must precede 5907 * any reference to it. Similarly, the declaration of a general entity 5908 * must precede any reference to it which appears in a default value in an 5909 * attribute-list declaration. Note that if entities are declared in the 5910 * external subset or in external parameter entities, a non-validating 5911 * processor is not obligated to read and process their declarations; 5912 * for such documents, the rule that an entity must be declared is a 5913 * well-formedness constraint only if standalone='yes'. 5914 * 5915 * [ WFC: Parsed Entity ] 5916 * An entity reference must not contain the name of an unparsed entity 5917 * 5918 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5919 * is updated to the current location in the string. 5920 */ 5921xmlEntityPtr 5922xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5923 xmlChar *name; 5924 const xmlChar *ptr; 5925 xmlChar cur; 5926 xmlEntityPtr ent = NULL; 5927 5928 if ((str == NULL) || (*str == NULL)) 5929 return(NULL); 5930 ptr = *str; 5931 cur = *ptr; 5932 if (cur == '&') { 5933 ptr++; 5934 cur = *ptr; 5935 name = xmlParseStringName(ctxt, &ptr); 5936 if (name == NULL) { 5937 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5939 ctxt->sax->error(ctxt->userData, 5940 "xmlParseStringEntityRef: no name\n"); 5941 ctxt->wellFormed = 0; 5942 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5943 } else { 5944 if (*ptr == ';') { 5945 ptr++; 5946 /* 5947 * Ask first SAX for entity resolution, otherwise try the 5948 * predefined set. 5949 */ 5950 if (ctxt->sax != NULL) { 5951 if (ctxt->sax->getEntity != NULL) 5952 ent = ctxt->sax->getEntity(ctxt->userData, name); 5953 if (ent == NULL) 5954 ent = xmlGetPredefinedEntity(name); 5955 if ((ent == NULL) && (ctxt->userData==ctxt)) { 5956 ent = getEntity(ctxt, name); 5957 } 5958 } 5959 /* 5960 * [ WFC: Entity Declared ] 5961 * In a document without any DTD, a document with only an 5962 * internal DTD subset which contains no parameter entity 5963 * references, or a document with "standalone='yes'", the 5964 * Name given in the entity reference must match that in an 5965 * entity declaration, except that well-formed documents 5966 * need not declare any of the following entities: amp, lt, 5967 * gt, apos, quot. 5968 * The declaration of a parameter entity must precede any 5969 * reference to it. 5970 * Similarly, the declaration of a general entity must 5971 * precede any reference to it which appears in a default 5972 * value in an attribute-list declaration. Note that if 5973 * entities are declared in the external subset or in 5974 * external parameter entities, a non-validating processor 5975 * is not obligated to read and process their declarations; 5976 * for such documents, the rule that an entity must be 5977 * declared is a well-formedness constraint only if 5978 * standalone='yes'. 5979 */ 5980 if (ent == NULL) { 5981 if ((ctxt->standalone == 1) || 5982 ((ctxt->hasExternalSubset == 0) && 5983 (ctxt->hasPErefs == 0))) { 5984 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5986 ctxt->sax->error(ctxt->userData, 5987 "Entity '%s' not defined\n", name); 5988 ctxt->wellFormed = 0; 5989 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5990 } else { 5991 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5992 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5993 ctxt->sax->warning(ctxt->userData, 5994 "Entity '%s' not defined\n", name); 5995 } 5996 } 5997 5998 /* 5999 * [ WFC: Parsed Entity ] 6000 * An entity reference must not contain the name of an 6001 * unparsed entity 6002 */ 6003 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6004 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 6005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6006 ctxt->sax->error(ctxt->userData, 6007 "Entity reference to unparsed entity %s\n", name); 6008 ctxt->wellFormed = 0; 6009 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6010 } 6011 6012 /* 6013 * [ WFC: No External Entity References ] 6014 * Attribute values cannot contain direct or indirect 6015 * entity references to external entities. 6016 */ 6017 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6018 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6019 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6021 ctxt->sax->error(ctxt->userData, 6022 "Attribute references external entity '%s'\n", name); 6023 ctxt->wellFormed = 0; 6024 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6025 } 6026 /* 6027 * [ WFC: No < in Attribute Values ] 6028 * The replacement text of any entity referred to directly or 6029 * indirectly in an attribute value (other than "<") must 6030 * not contain a <. 6031 */ 6032 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6033 (ent != NULL) && 6034 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6035 (ent->content != NULL) && 6036 (xmlStrchr(ent->content, '<'))) { 6037 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6039 ctxt->sax->error(ctxt->userData, 6040 "'<' in entity '%s' is not allowed in attributes values\n", name); 6041 ctxt->wellFormed = 0; 6042 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6043 } 6044 6045 /* 6046 * Internal check, no parameter entities here ... 6047 */ 6048 else { 6049 switch (ent->etype) { 6050 case XML_INTERNAL_PARAMETER_ENTITY: 6051 case XML_EXTERNAL_PARAMETER_ENTITY: 6052 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6054 ctxt->sax->error(ctxt->userData, 6055 "Attempt to reference the parameter entity '%s'\n", name); 6056 ctxt->wellFormed = 0; 6057 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6058 break; 6059 default: 6060 break; 6061 } 6062 } 6063 6064 /* 6065 * [ WFC: No Recursion ] 6066 * A parsed entity must not contain a recursive reference 6067 * to itself, either directly or indirectly. 6068 * Done somewhere else 6069 */ 6070 6071 } else { 6072 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6074 ctxt->sax->error(ctxt->userData, 6075 "xmlParseStringEntityRef: expecting ';'\n"); 6076 ctxt->wellFormed = 0; 6077 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6078 } 6079 xmlFree(name); 6080 } 6081 } 6082 *str = ptr; 6083 return(ent); 6084} 6085 6086/** 6087 * xmlParsePEReference: 6088 * @ctxt: an XML parser context 6089 * 6090 * parse PEReference declarations 6091 * The entity content is handled directly by pushing it's content as 6092 * a new input stream. 6093 * 6094 * [69] PEReference ::= '%' Name ';' 6095 * 6096 * [ WFC: No Recursion ] 6097 * A parsed entity must not contain a recursive 6098 * reference to itself, either directly or indirectly. 6099 * 6100 * [ WFC: Entity Declared ] 6101 * In a document without any DTD, a document with only an internal DTD 6102 * subset which contains no parameter entity references, or a document 6103 * with "standalone='yes'", ... ... The declaration of a parameter 6104 * entity must precede any reference to it... 6105 * 6106 * [ VC: Entity Declared ] 6107 * In a document with an external subset or external parameter entities 6108 * with "standalone='no'", ... ... The declaration of a parameter entity 6109 * must precede any reference to it... 6110 * 6111 * [ WFC: In DTD ] 6112 * Parameter-entity references may only appear in the DTD. 6113 * NOTE: misleading but this is handled. 6114 */ 6115void 6116xmlParsePEReference(xmlParserCtxtPtr ctxt) { 6117 xmlChar *name; 6118 xmlEntityPtr entity = NULL; 6119 xmlParserInputPtr input; 6120 6121 if (RAW == '%') { 6122 NEXT; 6123 name = xmlParseName(ctxt); 6124 if (name == NULL) { 6125 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6127 ctxt->sax->error(ctxt->userData, 6128 "xmlParsePEReference: no name\n"); 6129 ctxt->wellFormed = 0; 6130 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6131 } else { 6132 if (RAW == ';') { 6133 NEXT; 6134 if ((ctxt->sax != NULL) && 6135 (ctxt->sax->getParameterEntity != NULL)) 6136 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6137 name); 6138 if (entity == NULL) { 6139 /* 6140 * [ WFC: Entity Declared ] 6141 * In a document without any DTD, a document with only an 6142 * internal DTD subset which contains no parameter entity 6143 * references, or a document with "standalone='yes'", ... 6144 * ... The declaration of a parameter entity must precede 6145 * any reference to it... 6146 */ 6147 if ((ctxt->standalone == 1) || 6148 ((ctxt->hasExternalSubset == 0) && 6149 (ctxt->hasPErefs == 0))) { 6150 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 6151 if ((!ctxt->disableSAX) && 6152 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6153 ctxt->sax->error(ctxt->userData, 6154 "PEReference: %%%s; not found\n", name); 6155 ctxt->wellFormed = 0; 6156 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6157 } else { 6158 /* 6159 * [ VC: Entity Declared ] 6160 * In a document with an external subset or external 6161 * parameter entities with "standalone='no'", ... 6162 * ... The declaration of a parameter entity must precede 6163 * any reference to it... 6164 */ 6165 if ((!ctxt->disableSAX) && 6166 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6167 ctxt->sax->warning(ctxt->userData, 6168 "PEReference: %%%s; not found\n", name); 6169 ctxt->valid = 0; 6170 } 6171 } else { 6172 /* 6173 * Internal checking in case the entity quest barfed 6174 */ 6175 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6176 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6177 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6178 ctxt->sax->warning(ctxt->userData, 6179 "Internal: %%%s; is not a parameter entity\n", name); 6180 } else if (ctxt->input->free != deallocblankswrapper) { 6181 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 6182 xmlPushInput(ctxt, input); 6183 } else { 6184 /* 6185 * TODO !!! 6186 * handle the extra spaces added before and after 6187 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6188 */ 6189 input = xmlNewEntityInputStream(ctxt, entity); 6190 xmlPushInput(ctxt, input); 6191 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6192 (RAW == '<') && (NXT(1) == '?') && 6193 (NXT(2) == 'x') && (NXT(3) == 'm') && 6194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 6195 xmlParseTextDecl(ctxt); 6196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6197 /* 6198 * The XML REC instructs us to stop parsing 6199 * right here 6200 */ 6201 ctxt->instate = XML_PARSER_EOF; 6202 xmlFree(name); 6203 return; 6204 } 6205 } 6206 } 6207 } 6208 ctxt->hasPErefs = 1; 6209 } else { 6210 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6212 ctxt->sax->error(ctxt->userData, 6213 "xmlParsePEReference: expecting ';'\n"); 6214 ctxt->wellFormed = 0; 6215 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6216 } 6217 xmlFree(name); 6218 } 6219 } 6220} 6221 6222/** 6223 * xmlParseStringPEReference: 6224 * @ctxt: an XML parser context 6225 * @str: a pointer to an index in the string 6226 * 6227 * parse PEReference declarations 6228 * 6229 * [69] PEReference ::= '%' Name ';' 6230 * 6231 * [ WFC: No Recursion ] 6232 * A parsed entity must not contain a recursive 6233 * reference to itself, either directly or indirectly. 6234 * 6235 * [ WFC: Entity Declared ] 6236 * In a document without any DTD, a document with only an internal DTD 6237 * subset which contains no parameter entity references, or a document 6238 * with "standalone='yes'", ... ... The declaration of a parameter 6239 * entity must precede any reference to it... 6240 * 6241 * [ VC: Entity Declared ] 6242 * In a document with an external subset or external parameter entities 6243 * with "standalone='no'", ... ... The declaration of a parameter entity 6244 * must precede any reference to it... 6245 * 6246 * [ WFC: In DTD ] 6247 * Parameter-entity references may only appear in the DTD. 6248 * NOTE: misleading but this is handled. 6249 * 6250 * Returns the string of the entity content. 6251 * str is updated to the current value of the index 6252 */ 6253xmlEntityPtr 6254xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6255 const xmlChar *ptr; 6256 xmlChar cur; 6257 xmlChar *name; 6258 xmlEntityPtr entity = NULL; 6259 6260 if ((str == NULL) || (*str == NULL)) return(NULL); 6261 ptr = *str; 6262 cur = *ptr; 6263 if (cur == '%') { 6264 ptr++; 6265 cur = *ptr; 6266 name = xmlParseStringName(ctxt, &ptr); 6267 if (name == NULL) { 6268 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6270 ctxt->sax->error(ctxt->userData, 6271 "xmlParseStringPEReference: no name\n"); 6272 ctxt->wellFormed = 0; 6273 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6274 } else { 6275 cur = *ptr; 6276 if (cur == ';') { 6277 ptr++; 6278 cur = *ptr; 6279 if ((ctxt->sax != NULL) && 6280 (ctxt->sax->getParameterEntity != NULL)) 6281 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6282 name); 6283 if (entity == NULL) { 6284 /* 6285 * [ WFC: Entity Declared ] 6286 * In a document without any DTD, a document with only an 6287 * internal DTD subset which contains no parameter entity 6288 * references, or a document with "standalone='yes'", ... 6289 * ... The declaration of a parameter entity must precede 6290 * any reference to it... 6291 */ 6292 if ((ctxt->standalone == 1) || 6293 ((ctxt->hasExternalSubset == 0) && 6294 (ctxt->hasPErefs == 0))) { 6295 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6297 ctxt->sax->error(ctxt->userData, 6298 "PEReference: %%%s; not found\n", name); 6299 ctxt->wellFormed = 0; 6300 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6301 } else { 6302 /* 6303 * [ VC: Entity Declared ] 6304 * In a document with an external subset or external 6305 * parameter entities with "standalone='no'", ... 6306 * ... The declaration of a parameter entity must 6307 * precede any reference to it... 6308 */ 6309 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6310 ctxt->sax->warning(ctxt->userData, 6311 "PEReference: %%%s; not found\n", name); 6312 ctxt->valid = 0; 6313 } 6314 } else { 6315 /* 6316 * Internal checking in case the entity quest barfed 6317 */ 6318 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6319 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6321 ctxt->sax->warning(ctxt->userData, 6322 "Internal: %%%s; is not a parameter entity\n", name); 6323 } 6324 } 6325 ctxt->hasPErefs = 1; 6326 } else { 6327 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6329 ctxt->sax->error(ctxt->userData, 6330 "xmlParseStringPEReference: expecting ';'\n"); 6331 ctxt->wellFormed = 0; 6332 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6333 } 6334 xmlFree(name); 6335 } 6336 } 6337 *str = ptr; 6338 return(entity); 6339} 6340 6341/** 6342 * xmlParseDocTypeDecl: 6343 * @ctxt: an XML parser context 6344 * 6345 * parse a DOCTYPE declaration 6346 * 6347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6349 * 6350 * [ VC: Root Element Type ] 6351 * The Name in the document type declaration must match the element 6352 * type of the root element. 6353 */ 6354 6355void 6356xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6357 xmlChar *name = NULL; 6358 xmlChar *ExternalID = NULL; 6359 xmlChar *URI = NULL; 6360 6361 /* 6362 * We know that '<!DOCTYPE' has been detected. 6363 */ 6364 SKIP(9); 6365 6366 SKIP_BLANKS; 6367 6368 /* 6369 * Parse the DOCTYPE name. 6370 */ 6371 name = xmlParseName(ctxt); 6372 if (name == NULL) { 6373 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6375 ctxt->sax->error(ctxt->userData, 6376 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6377 ctxt->wellFormed = 0; 6378 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6379 } 6380 ctxt->intSubName = name; 6381 6382 SKIP_BLANKS; 6383 6384 /* 6385 * Check for SystemID and ExternalID 6386 */ 6387 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6388 6389 if ((URI != NULL) || (ExternalID != NULL)) { 6390 ctxt->hasExternalSubset = 1; 6391 } 6392 ctxt->extSubURI = URI; 6393 ctxt->extSubSystem = ExternalID; 6394 6395 SKIP_BLANKS; 6396 6397 /* 6398 * Create and update the internal subset. 6399 */ 6400 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6401 (!ctxt->disableSAX)) 6402 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6403 6404 /* 6405 * Is there any internal subset declarations ? 6406 * they are handled separately in xmlParseInternalSubset() 6407 */ 6408 if (RAW == '[') 6409 return; 6410 6411 /* 6412 * We should be at the end of the DOCTYPE declaration. 6413 */ 6414 if (RAW != '>') { 6415 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6417 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6418 ctxt->wellFormed = 0; 6419 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6420 } 6421 NEXT; 6422} 6423 6424/** 6425 * xmlParseInternalSubset: 6426 * @ctxt: an XML parser context 6427 * 6428 * parse the internal subset declaration 6429 * 6430 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6431 */ 6432 6433static void 6434xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6435 /* 6436 * Is there any DTD definition ? 6437 */ 6438 if (RAW == '[') { 6439 ctxt->instate = XML_PARSER_DTD; 6440 NEXT; 6441 /* 6442 * Parse the succession of Markup declarations and 6443 * PEReferences. 6444 * Subsequence (markupdecl | PEReference | S)* 6445 */ 6446 while (RAW != ']') { 6447 const xmlChar *check = CUR_PTR; 6448 int cons = ctxt->input->consumed; 6449 6450 SKIP_BLANKS; 6451 xmlParseMarkupDecl(ctxt); 6452 xmlParsePEReference(ctxt); 6453 6454 /* 6455 * Pop-up of finished entities. 6456 */ 6457 while ((RAW == 0) && (ctxt->inputNr > 1)) 6458 xmlPopInput(ctxt); 6459 6460 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6461 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6463 ctxt->sax->error(ctxt->userData, 6464 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6465 ctxt->wellFormed = 0; 6466 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6467 break; 6468 } 6469 } 6470 if (RAW == ']') { 6471 NEXT; 6472 SKIP_BLANKS; 6473 } 6474 } 6475 6476 /* 6477 * We should be at the end of the DOCTYPE declaration. 6478 */ 6479 if (RAW != '>') { 6480 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6482 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6483 ctxt->wellFormed = 0; 6484 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6485 } 6486 NEXT; 6487} 6488 6489/** 6490 * xmlParseAttribute: 6491 * @ctxt: an XML parser context 6492 * @value: a xmlChar ** used to store the value of the attribute 6493 * 6494 * parse an attribute 6495 * 6496 * [41] Attribute ::= Name Eq AttValue 6497 * 6498 * [ WFC: No External Entity References ] 6499 * Attribute values cannot contain direct or indirect entity references 6500 * to external entities. 6501 * 6502 * [ WFC: No < in Attribute Values ] 6503 * The replacement text of any entity referred to directly or indirectly in 6504 * an attribute value (other than "<") must not contain a <. 6505 * 6506 * [ VC: Attribute Value Type ] 6507 * The attribute must have been declared; the value must be of the type 6508 * declared for it. 6509 * 6510 * [25] Eq ::= S? '=' S? 6511 * 6512 * With namespace: 6513 * 6514 * [NS 11] Attribute ::= QName Eq AttValue 6515 * 6516 * Also the case QName == xmlns:??? is handled independently as a namespace 6517 * definition. 6518 * 6519 * Returns the attribute name, and the value in *value. 6520 */ 6521 6522xmlChar * 6523xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6524 xmlChar *name, *val; 6525 6526 *value = NULL; 6527 GROW; 6528 name = xmlParseName(ctxt); 6529 if (name == NULL) { 6530 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6532 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 6533 ctxt->wellFormed = 0; 6534 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6535 return(NULL); 6536 } 6537 6538 /* 6539 * read the value 6540 */ 6541 SKIP_BLANKS; 6542 if (RAW == '=') { 6543 NEXT; 6544 SKIP_BLANKS; 6545 val = xmlParseAttValue(ctxt); 6546 ctxt->instate = XML_PARSER_CONTENT; 6547 } else { 6548 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6550 ctxt->sax->error(ctxt->userData, 6551 "Specification mandate value for attribute %s\n", name); 6552 ctxt->wellFormed = 0; 6553 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6554 xmlFree(name); 6555 return(NULL); 6556 } 6557 6558 /* 6559 * Check that xml:lang conforms to the specification 6560 * No more registered as an error, just generate a warning now 6561 * since this was deprecated in XML second edition 6562 */ 6563 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6564 if (!xmlCheckLanguageID(val)) { 6565 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6566 ctxt->sax->warning(ctxt->userData, 6567 "Malformed value for xml:lang : %s\n", val); 6568 } 6569 } 6570 6571 /* 6572 * Check that xml:space conforms to the specification 6573 */ 6574 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6575 if (xmlStrEqual(val, BAD_CAST "default")) 6576 *(ctxt->space) = 0; 6577 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6578 *(ctxt->space) = 1; 6579 else { 6580 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6582 ctxt->sax->error(ctxt->userData, 6583"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 6584 val); 6585 ctxt->wellFormed = 0; 6586 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6587 } 6588 } 6589 6590 *value = val; 6591 return(name); 6592} 6593 6594/** 6595 * xmlParseStartTag: 6596 * @ctxt: an XML parser context 6597 * 6598 * parse a start of tag either for rule element or 6599 * EmptyElement. In both case we don't parse the tag closing chars. 6600 * 6601 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6602 * 6603 * [ WFC: Unique Att Spec ] 6604 * No attribute name may appear more than once in the same start-tag or 6605 * empty-element tag. 6606 * 6607 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6608 * 6609 * [ WFC: Unique Att Spec ] 6610 * No attribute name may appear more than once in the same start-tag or 6611 * empty-element tag. 6612 * 6613 * With namespace: 6614 * 6615 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6616 * 6617 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6618 * 6619 * Returns the element name parsed 6620 */ 6621 6622xmlChar * 6623xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6624 xmlChar *name; 6625 xmlChar *attname; 6626 xmlChar *attvalue; 6627 const xmlChar **atts = NULL; 6628 int nbatts = 0; 6629 int maxatts = 0; 6630 int i; 6631 6632 if (RAW != '<') return(NULL); 6633 NEXT1; 6634 6635 name = xmlParseName(ctxt); 6636 if (name == NULL) { 6637 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6639 ctxt->sax->error(ctxt->userData, 6640 "xmlParseStartTag: invalid element name\n"); 6641 ctxt->wellFormed = 0; 6642 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6643 return(NULL); 6644 } 6645 6646 /* 6647 * Now parse the attributes, it ends up with the ending 6648 * 6649 * (S Attribute)* S? 6650 */ 6651 SKIP_BLANKS; 6652 GROW; 6653 6654 while ((RAW != '>') && 6655 ((RAW != '/') || (NXT(1) != '>')) && 6656 (IS_CHAR(RAW))) { 6657 const xmlChar *q = CUR_PTR; 6658 int cons = ctxt->input->consumed; 6659 6660 attname = xmlParseAttribute(ctxt, &attvalue); 6661 if ((attname != NULL) && (attvalue != NULL)) { 6662 /* 6663 * [ WFC: Unique Att Spec ] 6664 * No attribute name may appear more than once in the same 6665 * start-tag or empty-element tag. 6666 */ 6667 for (i = 0; i < nbatts;i += 2) { 6668 if (xmlStrEqual(atts[i], attname)) { 6669 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6671 ctxt->sax->error(ctxt->userData, 6672 "Attribute %s redefined\n", 6673 attname); 6674 ctxt->wellFormed = 0; 6675 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6676 xmlFree(attname); 6677 xmlFree(attvalue); 6678 goto failed; 6679 } 6680 } 6681 6682 /* 6683 * Add the pair to atts 6684 */ 6685 if (atts == NULL) { 6686 maxatts = 10; 6687 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6688 if (atts == NULL) { 6689 xmlGenericError(xmlGenericErrorContext, 6690 "malloc of %ld byte failed\n", 6691 maxatts * (long)sizeof(xmlChar *)); 6692 return(NULL); 6693 } 6694 } else if (nbatts + 4 > maxatts) { 6695 maxatts *= 2; 6696 atts = (const xmlChar **) xmlRealloc((void *) atts, 6697 maxatts * sizeof(xmlChar *)); 6698 if (atts == NULL) { 6699 xmlGenericError(xmlGenericErrorContext, 6700 "realloc of %ld byte failed\n", 6701 maxatts * (long)sizeof(xmlChar *)); 6702 return(NULL); 6703 } 6704 } 6705 atts[nbatts++] = attname; 6706 atts[nbatts++] = attvalue; 6707 atts[nbatts] = NULL; 6708 atts[nbatts + 1] = NULL; 6709 } else { 6710 if (attname != NULL) 6711 xmlFree(attname); 6712 if (attvalue != NULL) 6713 xmlFree(attvalue); 6714 } 6715 6716failed: 6717 6718 GROW 6719 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6720 break; 6721 if (!IS_BLANK(RAW)) { 6722 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6724 ctxt->sax->error(ctxt->userData, 6725 "attributes construct error\n"); 6726 ctxt->wellFormed = 0; 6727 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6728 } 6729 SKIP_BLANKS; 6730 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6731 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6733 ctxt->sax->error(ctxt->userData, 6734 "xmlParseStartTag: problem parsing attributes\n"); 6735 ctxt->wellFormed = 0; 6736 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6737 break; 6738 } 6739 GROW; 6740 } 6741 6742 /* 6743 * SAX: Start of Element ! 6744 */ 6745 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6746 (!ctxt->disableSAX)) 6747 ctxt->sax->startElement(ctxt->userData, name, atts); 6748 6749 if (atts != NULL) { 6750 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6751 xmlFree((void *) atts); 6752 } 6753 return(name); 6754} 6755 6756/** 6757 * xmlParseEndTag: 6758 * @ctxt: an XML parser context 6759 * 6760 * parse an end of tag 6761 * 6762 * [42] ETag ::= '</' Name S? '>' 6763 * 6764 * With namespace 6765 * 6766 * [NS 9] ETag ::= '</' QName S? '>' 6767 */ 6768 6769void 6770xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6771 xmlChar *name; 6772 xmlChar *oldname; 6773 6774 GROW; 6775 if ((RAW != '<') || (NXT(1) != '/')) { 6776 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6778 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6779 ctxt->wellFormed = 0; 6780 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6781 return; 6782 } 6783 SKIP(2); 6784 6785 name = xmlParseNameAndCompare(ctxt,ctxt->name); 6786 6787 /* 6788 * We should definitely be at the ending "S? '>'" part 6789 */ 6790 GROW; 6791 SKIP_BLANKS; 6792 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6793 ctxt->errNo = XML_ERR_GT_REQUIRED; 6794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6795 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6796 ctxt->wellFormed = 0; 6797 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6798 } else 6799 NEXT1; 6800 6801 /* 6802 * [ WFC: Element Type Match ] 6803 * The Name in an element's end-tag must match the element type in the 6804 * start-tag. 6805 * 6806 */ 6807 if (name != (xmlChar*)1) { 6808 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6810 if (name != NULL) { 6811 ctxt->sax->error(ctxt->userData, 6812 "Opening and ending tag mismatch: %s and %s\n", 6813 ctxt->name, name); 6814 } else { 6815 ctxt->sax->error(ctxt->userData, 6816 "Ending tag error for: %s\n", ctxt->name); 6817 } 6818 6819 } 6820 ctxt->wellFormed = 0; 6821 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6822#if 0 6823 else { 6824 /* 6825 * Recover in case of one missing close 6826 */ 6827 if ((ctxt->nameNr > 2) && 6828 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) { 6829 namePop(ctxt); 6830 spacePop(ctxt); 6831 } 6832 } 6833#endif 6834 if (name != NULL) 6835 xmlFree(name); 6836 } 6837 6838 /* 6839 * SAX: End of Tag 6840 */ 6841 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6842 (!ctxt->disableSAX)) 6843 ctxt->sax->endElement(ctxt->userData, ctxt->name); 6844 6845 oldname = namePop(ctxt); 6846 spacePop(ctxt); 6847 if (oldname != NULL) { 6848#ifdef DEBUG_STACK 6849 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6850#endif 6851 xmlFree(oldname); 6852 } 6853 return; 6854} 6855 6856/** 6857 * xmlParseCDSect: 6858 * @ctxt: an XML parser context 6859 * 6860 * Parse escaped pure raw content. 6861 * 6862 * [18] CDSect ::= CDStart CData CDEnd 6863 * 6864 * [19] CDStart ::= '<![CDATA[' 6865 * 6866 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6867 * 6868 * [21] CDEnd ::= ']]>' 6869 */ 6870void 6871xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6872 xmlChar *buf = NULL; 6873 int len = 0; 6874 int size = XML_PARSER_BUFFER_SIZE; 6875 int r, rl; 6876 int s, sl; 6877 int cur, l; 6878 int count = 0; 6879 6880 if ((NXT(0) == '<') && (NXT(1) == '!') && 6881 (NXT(2) == '[') && (NXT(3) == 'C') && 6882 (NXT(4) == 'D') && (NXT(5) == 'A') && 6883 (NXT(6) == 'T') && (NXT(7) == 'A') && 6884 (NXT(8) == '[')) { 6885 SKIP(9); 6886 } else 6887 return; 6888 6889 ctxt->instate = XML_PARSER_CDATA_SECTION; 6890 r = CUR_CHAR(rl); 6891 if (!IS_CHAR(r)) { 6892 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6894 ctxt->sax->error(ctxt->userData, 6895 "CData section not finished\n"); 6896 ctxt->wellFormed = 0; 6897 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6898 ctxt->instate = XML_PARSER_CONTENT; 6899 return; 6900 } 6901 NEXTL(rl); 6902 s = CUR_CHAR(sl); 6903 if (!IS_CHAR(s)) { 6904 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6906 ctxt->sax->error(ctxt->userData, 6907 "CData section not finished\n"); 6908 ctxt->wellFormed = 0; 6909 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6910 ctxt->instate = XML_PARSER_CONTENT; 6911 return; 6912 } 6913 NEXTL(sl); 6914 cur = CUR_CHAR(l); 6915 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6916 if (buf == NULL) { 6917 xmlGenericError(xmlGenericErrorContext, 6918 "malloc of %d byte failed\n", size); 6919 return; 6920 } 6921 while (IS_CHAR(cur) && 6922 ((r != ']') || (s != ']') || (cur != '>'))) { 6923 if (len + 5 >= size) { 6924 size *= 2; 6925 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6926 if (buf == NULL) { 6927 xmlGenericError(xmlGenericErrorContext, 6928 "realloc of %d byte failed\n", size); 6929 return; 6930 } 6931 } 6932 COPY_BUF(rl,buf,len,r); 6933 r = s; 6934 rl = sl; 6935 s = cur; 6936 sl = l; 6937 count++; 6938 if (count > 50) { 6939 GROW; 6940 count = 0; 6941 } 6942 NEXTL(l); 6943 cur = CUR_CHAR(l); 6944 } 6945 buf[len] = 0; 6946 ctxt->instate = XML_PARSER_CONTENT; 6947 if (cur != '>') { 6948 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6950 ctxt->sax->error(ctxt->userData, 6951 "CData section not finished\n%.50s\n", buf); 6952 ctxt->wellFormed = 0; 6953 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6954 xmlFree(buf); 6955 return; 6956 } 6957 NEXTL(l); 6958 6959 /* 6960 * OK the buffer is to be consumed as cdata. 6961 */ 6962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6963 if (ctxt->sax->cdataBlock != NULL) 6964 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6965 else if (ctxt->sax->characters != NULL) 6966 ctxt->sax->characters(ctxt->userData, buf, len); 6967 } 6968 xmlFree(buf); 6969} 6970 6971/** 6972 * xmlParseContent: 6973 * @ctxt: an XML parser context 6974 * 6975 * Parse a content: 6976 * 6977 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6978 */ 6979 6980void 6981xmlParseContent(xmlParserCtxtPtr ctxt) { 6982 GROW; 6983 while ((RAW != 0) && 6984 ((RAW != '<') || (NXT(1) != '/'))) { 6985 const xmlChar *test = CUR_PTR; 6986 int cons = ctxt->input->consumed; 6987 const xmlChar *cur = ctxt->input->cur; 6988 6989 /* 6990 * First case : a Processing Instruction. 6991 */ 6992 if ((*cur == '<') && (cur[1] == '?')) { 6993 xmlParsePI(ctxt); 6994 } 6995 6996 /* 6997 * Second case : a CDSection 6998 */ 6999 else if ((*cur == '<') && (NXT(1) == '!') && 7000 (NXT(2) == '[') && (NXT(3) == 'C') && 7001 (NXT(4) == 'D') && (NXT(5) == 'A') && 7002 (NXT(6) == 'T') && (NXT(7) == 'A') && 7003 (NXT(8) == '[')) { 7004 xmlParseCDSect(ctxt); 7005 } 7006 7007 /* 7008 * Third case : a comment 7009 */ 7010 else if ((*cur == '<') && (NXT(1) == '!') && 7011 (NXT(2) == '-') && (NXT(3) == '-')) { 7012 xmlParseComment(ctxt); 7013 ctxt->instate = XML_PARSER_CONTENT; 7014 } 7015 7016 /* 7017 * Fourth case : a sub-element. 7018 */ 7019 else if (*cur == '<') { 7020 xmlParseElement(ctxt); 7021 } 7022 7023 /* 7024 * Fifth case : a reference. If if has not been resolved, 7025 * parsing returns it's Name, create the node 7026 */ 7027 7028 else if (*cur == '&') { 7029 xmlParseReference(ctxt); 7030 } 7031 7032 /* 7033 * Last case, text. Note that References are handled directly. 7034 */ 7035 else { 7036 xmlParseCharData(ctxt, 0); 7037 } 7038 7039 GROW; 7040 /* 7041 * Pop-up of finished entities. 7042 */ 7043 while ((RAW == 0) && (ctxt->inputNr > 1)) 7044 xmlPopInput(ctxt); 7045 SHRINK; 7046 7047 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 7048 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 7049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7050 ctxt->sax->error(ctxt->userData, 7051 "detected an error in element content\n"); 7052 ctxt->wellFormed = 0; 7053 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7054 ctxt->instate = XML_PARSER_EOF; 7055 break; 7056 } 7057 } 7058} 7059 7060/** 7061 * xmlParseElement: 7062 * @ctxt: an XML parser context 7063 * 7064 * parse an XML element, this is highly recursive 7065 * 7066 * [39] element ::= EmptyElemTag | STag content ETag 7067 * 7068 * [ WFC: Element Type Match ] 7069 * The Name in an element's end-tag must match the element type in the 7070 * start-tag. 7071 * 7072 * [ VC: Element Valid ] 7073 * An element is valid if there is a declaration matching elementdecl 7074 * where the Name matches the element type and one of the following holds: 7075 * - The declaration matches EMPTY and the element has no content. 7076 * - The declaration matches children and the sequence of child elements 7077 * belongs to the language generated by the regular expression in the 7078 * content model, with optional white space (characters matching the 7079 * nonterminal S) between each pair of child elements. 7080 * - The declaration matches Mixed and the content consists of character 7081 * data and child elements whose types match names in the content model. 7082 * - The declaration matches ANY, and the types of any child elements have 7083 * been declared. 7084 */ 7085 7086void 7087xmlParseElement(xmlParserCtxtPtr ctxt) { 7088 xmlChar *name; 7089 xmlChar *oldname; 7090 xmlParserNodeInfo node_info; 7091 xmlNodePtr ret; 7092 7093 /* Capture start position */ 7094 if (ctxt->record_info) { 7095 node_info.begin_pos = ctxt->input->consumed + 7096 (CUR_PTR - ctxt->input->base); 7097 node_info.begin_line = ctxt->input->line; 7098 } 7099 7100 if (ctxt->spaceNr == 0) 7101 spacePush(ctxt, -1); 7102 else 7103 spacePush(ctxt, *ctxt->space); 7104 7105 name = xmlParseStartTag(ctxt); 7106 if (name == NULL) { 7107 spacePop(ctxt); 7108 return; 7109 } 7110 namePush(ctxt, name); 7111 ret = ctxt->node; 7112 7113 /* 7114 * [ VC: Root Element Type ] 7115 * The Name in the document type declaration must match the element 7116 * type of the root element. 7117 */ 7118 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7119 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7120 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7121 7122 /* 7123 * Check for an Empty Element. 7124 */ 7125 if ((RAW == '/') && (NXT(1) == '>')) { 7126 SKIP(2); 7127 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7128 (!ctxt->disableSAX)) 7129 ctxt->sax->endElement(ctxt->userData, name); 7130 oldname = namePop(ctxt); 7131 spacePop(ctxt); 7132 if (oldname != NULL) { 7133#ifdef DEBUG_STACK 7134 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7135#endif 7136 xmlFree(oldname); 7137 } 7138 if ( ret != NULL && ctxt->record_info ) { 7139 node_info.end_pos = ctxt->input->consumed + 7140 (CUR_PTR - ctxt->input->base); 7141 node_info.end_line = ctxt->input->line; 7142 node_info.node = ret; 7143 xmlParserAddNodeInfo(ctxt, &node_info); 7144 } 7145 return; 7146 } 7147 if (RAW == '>') { 7148 NEXT1; 7149 } else { 7150 ctxt->errNo = XML_ERR_GT_REQUIRED; 7151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7152 ctxt->sax->error(ctxt->userData, 7153 "Couldn't find end of Start Tag %s\n", 7154 name); 7155 ctxt->wellFormed = 0; 7156 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7157 7158 /* 7159 * end of parsing of this node. 7160 */ 7161 nodePop(ctxt); 7162 oldname = namePop(ctxt); 7163 spacePop(ctxt); 7164 if (oldname != NULL) { 7165#ifdef DEBUG_STACK 7166 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7167#endif 7168 xmlFree(oldname); 7169 } 7170 7171 /* 7172 * Capture end position and add node 7173 */ 7174 if ( ret != NULL && ctxt->record_info ) { 7175 node_info.end_pos = ctxt->input->consumed + 7176 (CUR_PTR - ctxt->input->base); 7177 node_info.end_line = ctxt->input->line; 7178 node_info.node = ret; 7179 xmlParserAddNodeInfo(ctxt, &node_info); 7180 } 7181 return; 7182 } 7183 7184 /* 7185 * Parse the content of the element: 7186 */ 7187 xmlParseContent(ctxt); 7188 if (!IS_CHAR(RAW)) { 7189 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED; 7190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7191 ctxt->sax->error(ctxt->userData, 7192 "Premature end of data in tag %s\n", name); 7193 ctxt->wellFormed = 0; 7194 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7195 7196 /* 7197 * end of parsing of this node. 7198 */ 7199 nodePop(ctxt); 7200 oldname = namePop(ctxt); 7201 spacePop(ctxt); 7202 if (oldname != NULL) { 7203#ifdef DEBUG_STACK 7204 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7205#endif 7206 xmlFree(oldname); 7207 } 7208 return; 7209 } 7210 7211 /* 7212 * parse the end of tag: '</' should be here. 7213 */ 7214 xmlParseEndTag(ctxt); 7215 7216 /* 7217 * Capture end position and add node 7218 */ 7219 if ( ret != NULL && ctxt->record_info ) { 7220 node_info.end_pos = ctxt->input->consumed + 7221 (CUR_PTR - ctxt->input->base); 7222 node_info.end_line = ctxt->input->line; 7223 node_info.node = ret; 7224 xmlParserAddNodeInfo(ctxt, &node_info); 7225 } 7226} 7227 7228/** 7229 * xmlParseVersionNum: 7230 * @ctxt: an XML parser context 7231 * 7232 * parse the XML version value. 7233 * 7234 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 7235 * 7236 * Returns the string giving the XML version number, or NULL 7237 */ 7238xmlChar * 7239xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 7240 xmlChar *buf = NULL; 7241 int len = 0; 7242 int size = 10; 7243 xmlChar cur; 7244 7245 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 7246 if (buf == NULL) { 7247 xmlGenericError(xmlGenericErrorContext, 7248 "malloc of %d byte failed\n", size); 7249 return(NULL); 7250 } 7251 cur = CUR; 7252 while (((cur >= 'a') && (cur <= 'z')) || 7253 ((cur >= 'A') && (cur <= 'Z')) || 7254 ((cur >= '0') && (cur <= '9')) || 7255 (cur == '_') || (cur == '.') || 7256 (cur == ':') || (cur == '-')) { 7257 if (len + 1 >= size) { 7258 size *= 2; 7259 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7260 if (buf == NULL) { 7261 xmlGenericError(xmlGenericErrorContext, 7262 "realloc of %d byte failed\n", size); 7263 return(NULL); 7264 } 7265 } 7266 buf[len++] = cur; 7267 NEXT; 7268 cur=CUR; 7269 } 7270 buf[len] = 0; 7271 return(buf); 7272} 7273 7274/** 7275 * xmlParseVersionInfo: 7276 * @ctxt: an XML parser context 7277 * 7278 * parse the XML version. 7279 * 7280 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 7281 * 7282 * [25] Eq ::= S? '=' S? 7283 * 7284 * Returns the version string, e.g. "1.0" 7285 */ 7286 7287xmlChar * 7288xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 7289 xmlChar *version = NULL; 7290 const xmlChar *q; 7291 7292 if ((RAW == 'v') && (NXT(1) == 'e') && 7293 (NXT(2) == 'r') && (NXT(3) == 's') && 7294 (NXT(4) == 'i') && (NXT(5) == 'o') && 7295 (NXT(6) == 'n')) { 7296 SKIP(7); 7297 SKIP_BLANKS; 7298 if (RAW != '=') { 7299 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7301 ctxt->sax->error(ctxt->userData, 7302 "xmlParseVersionInfo : expected '='\n"); 7303 ctxt->wellFormed = 0; 7304 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7305 return(NULL); 7306 } 7307 NEXT; 7308 SKIP_BLANKS; 7309 if (RAW == '"') { 7310 NEXT; 7311 q = CUR_PTR; 7312 version = xmlParseVersionNum(ctxt); 7313 if (RAW != '"') { 7314 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7316 ctxt->sax->error(ctxt->userData, 7317 "String not closed\n%.50s\n", q); 7318 ctxt->wellFormed = 0; 7319 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7320 } else 7321 NEXT; 7322 } else if (RAW == '\''){ 7323 NEXT; 7324 q = CUR_PTR; 7325 version = xmlParseVersionNum(ctxt); 7326 if (RAW != '\'') { 7327 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7329 ctxt->sax->error(ctxt->userData, 7330 "String not closed\n%.50s\n", q); 7331 ctxt->wellFormed = 0; 7332 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7333 } else 7334 NEXT; 7335 } else { 7336 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7338 ctxt->sax->error(ctxt->userData, 7339 "xmlParseVersionInfo : expected ' or \"\n"); 7340 ctxt->wellFormed = 0; 7341 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7342 } 7343 } 7344 return(version); 7345} 7346 7347/** 7348 * xmlParseEncName: 7349 * @ctxt: an XML parser context 7350 * 7351 * parse the XML encoding name 7352 * 7353 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 7354 * 7355 * Returns the encoding name value or NULL 7356 */ 7357xmlChar * 7358xmlParseEncName(xmlParserCtxtPtr ctxt) { 7359 xmlChar *buf = NULL; 7360 int len = 0; 7361 int size = 10; 7362 xmlChar cur; 7363 7364 cur = CUR; 7365 if (((cur >= 'a') && (cur <= 'z')) || 7366 ((cur >= 'A') && (cur <= 'Z'))) { 7367 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 7368 if (buf == NULL) { 7369 xmlGenericError(xmlGenericErrorContext, 7370 "malloc of %d byte failed\n", size); 7371 return(NULL); 7372 } 7373 7374 buf[len++] = cur; 7375 NEXT; 7376 cur = CUR; 7377 while (((cur >= 'a') && (cur <= 'z')) || 7378 ((cur >= 'A') && (cur <= 'Z')) || 7379 ((cur >= '0') && (cur <= '9')) || 7380 (cur == '.') || (cur == '_') || 7381 (cur == '-')) { 7382 if (len + 1 >= size) { 7383 size *= 2; 7384 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7385 if (buf == NULL) { 7386 xmlGenericError(xmlGenericErrorContext, 7387 "realloc of %d byte failed\n", size); 7388 return(NULL); 7389 } 7390 } 7391 buf[len++] = cur; 7392 NEXT; 7393 cur = CUR; 7394 if (cur == 0) { 7395 SHRINK; 7396 GROW; 7397 cur = CUR; 7398 } 7399 } 7400 buf[len] = 0; 7401 } else { 7402 ctxt->errNo = XML_ERR_ENCODING_NAME; 7403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7404 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 7405 ctxt->wellFormed = 0; 7406 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7407 } 7408 return(buf); 7409} 7410 7411/** 7412 * xmlParseEncodingDecl: 7413 * @ctxt: an XML parser context 7414 * 7415 * parse the XML encoding declaration 7416 * 7417 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 7418 * 7419 * this setups the conversion filters. 7420 * 7421 * Returns the encoding value or NULL 7422 */ 7423 7424xmlChar * 7425xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 7426 xmlChar *encoding = NULL; 7427 const xmlChar *q; 7428 7429 SKIP_BLANKS; 7430 if ((RAW == 'e') && (NXT(1) == 'n') && 7431 (NXT(2) == 'c') && (NXT(3) == 'o') && 7432 (NXT(4) == 'd') && (NXT(5) == 'i') && 7433 (NXT(6) == 'n') && (NXT(7) == 'g')) { 7434 SKIP(8); 7435 SKIP_BLANKS; 7436 if (RAW != '=') { 7437 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7439 ctxt->sax->error(ctxt->userData, 7440 "xmlParseEncodingDecl : expected '='\n"); 7441 ctxt->wellFormed = 0; 7442 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7443 return(NULL); 7444 } 7445 NEXT; 7446 SKIP_BLANKS; 7447 if (RAW == '"') { 7448 NEXT; 7449 q = CUR_PTR; 7450 encoding = xmlParseEncName(ctxt); 7451 if (RAW != '"') { 7452 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7454 ctxt->sax->error(ctxt->userData, 7455 "String not closed\n%.50s\n", q); 7456 ctxt->wellFormed = 0; 7457 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7458 } else 7459 NEXT; 7460 } else if (RAW == '\''){ 7461 NEXT; 7462 q = CUR_PTR; 7463 encoding = xmlParseEncName(ctxt); 7464 if (RAW != '\'') { 7465 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7467 ctxt->sax->error(ctxt->userData, 7468 "String not closed\n%.50s\n", q); 7469 ctxt->wellFormed = 0; 7470 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7471 } else 7472 NEXT; 7473 } else { 7474 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7476 ctxt->sax->error(ctxt->userData, 7477 "xmlParseEncodingDecl : expected ' or \"\n"); 7478 ctxt->wellFormed = 0; 7479 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7480 } 7481 if (encoding != NULL) { 7482 xmlCharEncoding enc; 7483 xmlCharEncodingHandlerPtr handler; 7484 7485 if (ctxt->input->encoding != NULL) 7486 xmlFree((xmlChar *) ctxt->input->encoding); 7487 ctxt->input->encoding = encoding; 7488 7489 enc = xmlParseCharEncoding((const char *) encoding); 7490 /* 7491 * registered set of known encodings 7492 */ 7493 if (enc != XML_CHAR_ENCODING_ERROR) { 7494 xmlSwitchEncoding(ctxt, enc); 7495 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7496 ctxt->input->encoding = NULL; 7497 xmlFree(encoding); 7498 return(NULL); 7499 } 7500 } else { 7501 /* 7502 * fallback for unknown encodings 7503 */ 7504 handler = xmlFindCharEncodingHandler((const char *) encoding); 7505 if (handler != NULL) { 7506 xmlSwitchToEncoding(ctxt, handler); 7507 } else { 7508 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 7509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7510 ctxt->sax->error(ctxt->userData, 7511 "Unsupported encoding %s\n", encoding); 7512 return(NULL); 7513 } 7514 } 7515 } 7516 } 7517 return(encoding); 7518} 7519 7520/** 7521 * xmlParseSDDecl: 7522 * @ctxt: an XML parser context 7523 * 7524 * parse the XML standalone declaration 7525 * 7526 * [32] SDDecl ::= S 'standalone' Eq 7527 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 7528 * 7529 * [ VC: Standalone Document Declaration ] 7530 * TODO The standalone document declaration must have the value "no" 7531 * if any external markup declarations contain declarations of: 7532 * - attributes with default values, if elements to which these 7533 * attributes apply appear in the document without specifications 7534 * of values for these attributes, or 7535 * - entities (other than amp, lt, gt, apos, quot), if references 7536 * to those entities appear in the document, or 7537 * - attributes with values subject to normalization, where the 7538 * attribute appears in the document with a value which will change 7539 * as a result of normalization, or 7540 * - element types with element content, if white space occurs directly 7541 * within any instance of those types. 7542 * 7543 * Returns 1 if standalone, 0 otherwise 7544 */ 7545 7546int 7547xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 7548 int standalone = -1; 7549 7550 SKIP_BLANKS; 7551 if ((RAW == 's') && (NXT(1) == 't') && 7552 (NXT(2) == 'a') && (NXT(3) == 'n') && 7553 (NXT(4) == 'd') && (NXT(5) == 'a') && 7554 (NXT(6) == 'l') && (NXT(7) == 'o') && 7555 (NXT(8) == 'n') && (NXT(9) == 'e')) { 7556 SKIP(10); 7557 SKIP_BLANKS; 7558 if (RAW != '=') { 7559 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7561 ctxt->sax->error(ctxt->userData, 7562 "XML standalone declaration : expected '='\n"); 7563 ctxt->wellFormed = 0; 7564 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7565 return(standalone); 7566 } 7567 NEXT; 7568 SKIP_BLANKS; 7569 if (RAW == '\''){ 7570 NEXT; 7571 if ((RAW == 'n') && (NXT(1) == 'o')) { 7572 standalone = 0; 7573 SKIP(2); 7574 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7575 (NXT(2) == 's')) { 7576 standalone = 1; 7577 SKIP(3); 7578 } else { 7579 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7581 ctxt->sax->error(ctxt->userData, 7582 "standalone accepts only 'yes' or 'no'\n"); 7583 ctxt->wellFormed = 0; 7584 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7585 } 7586 if (RAW != '\'') { 7587 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7589 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7590 ctxt->wellFormed = 0; 7591 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7592 } else 7593 NEXT; 7594 } else if (RAW == '"'){ 7595 NEXT; 7596 if ((RAW == 'n') && (NXT(1) == 'o')) { 7597 standalone = 0; 7598 SKIP(2); 7599 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7600 (NXT(2) == 's')) { 7601 standalone = 1; 7602 SKIP(3); 7603 } else { 7604 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7606 ctxt->sax->error(ctxt->userData, 7607 "standalone accepts only 'yes' or 'no'\n"); 7608 ctxt->wellFormed = 0; 7609 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7610 } 7611 if (RAW != '"') { 7612 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7614 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7615 ctxt->wellFormed = 0; 7616 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7617 } else 7618 NEXT; 7619 } else { 7620 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7622 ctxt->sax->error(ctxt->userData, 7623 "Standalone value not found\n"); 7624 ctxt->wellFormed = 0; 7625 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7626 } 7627 } 7628 return(standalone); 7629} 7630 7631/** 7632 * xmlParseXMLDecl: 7633 * @ctxt: an XML parser context 7634 * 7635 * parse an XML declaration header 7636 * 7637 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 7638 */ 7639 7640void 7641xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7642 xmlChar *version; 7643 7644 /* 7645 * We know that '<?xml' is here. 7646 */ 7647 SKIP(5); 7648 7649 if (!IS_BLANK(RAW)) { 7650 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7652 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7653 ctxt->wellFormed = 0; 7654 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7655 } 7656 SKIP_BLANKS; 7657 7658 /* 7659 * We must have the VersionInfo here. 7660 */ 7661 version = xmlParseVersionInfo(ctxt); 7662 if (version == NULL) { 7663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7664 ctxt->sax->error(ctxt->userData, 7665 "Malformed declaration expecting version\n"); 7666 ctxt->wellFormed = 0; 7667 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7668 } else { 7669 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 7670 /* 7671 * TODO: Blueberry should be detected here 7672 */ 7673 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7674 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", 7675 version); 7676 } 7677 if (ctxt->version != NULL) 7678 xmlFree((void *) ctxt->version); 7679 ctxt->version = version; 7680 } 7681 7682 /* 7683 * We may have the encoding declaration 7684 */ 7685 if (!IS_BLANK(RAW)) { 7686 if ((RAW == '?') && (NXT(1) == '>')) { 7687 SKIP(2); 7688 return; 7689 } 7690 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7692 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7693 ctxt->wellFormed = 0; 7694 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7695 } 7696 xmlParseEncodingDecl(ctxt); 7697 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7698 /* 7699 * The XML REC instructs us to stop parsing right here 7700 */ 7701 return; 7702 } 7703 7704 /* 7705 * We may have the standalone status. 7706 */ 7707 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7708 if ((RAW == '?') && (NXT(1) == '>')) { 7709 SKIP(2); 7710 return; 7711 } 7712 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7714 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7715 ctxt->wellFormed = 0; 7716 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7717 } 7718 SKIP_BLANKS; 7719 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7720 7721 SKIP_BLANKS; 7722 if ((RAW == '?') && (NXT(1) == '>')) { 7723 SKIP(2); 7724 } else if (RAW == '>') { 7725 /* Deprecated old WD ... */ 7726 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7728 ctxt->sax->error(ctxt->userData, 7729 "XML declaration must end-up with '?>'\n"); 7730 ctxt->wellFormed = 0; 7731 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7732 NEXT; 7733 } else { 7734 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7736 ctxt->sax->error(ctxt->userData, 7737 "parsing XML declaration: '?>' expected\n"); 7738 ctxt->wellFormed = 0; 7739 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7740 MOVETO_ENDTAG(CUR_PTR); 7741 NEXT; 7742 } 7743} 7744 7745/** 7746 * xmlParseMisc: 7747 * @ctxt: an XML parser context 7748 * 7749 * parse an XML Misc* optional field. 7750 * 7751 * [27] Misc ::= Comment | PI | S 7752 */ 7753 7754void 7755xmlParseMisc(xmlParserCtxtPtr ctxt) { 7756 while (((RAW == '<') && (NXT(1) == '?')) || 7757 ((RAW == '<') && (NXT(1) == '!') && 7758 (NXT(2) == '-') && (NXT(3) == '-')) || 7759 IS_BLANK(CUR)) { 7760 if ((RAW == '<') && (NXT(1) == '?')) { 7761 xmlParsePI(ctxt); 7762 } else if (IS_BLANK(CUR)) { 7763 NEXT; 7764 } else 7765 xmlParseComment(ctxt); 7766 } 7767} 7768 7769/** 7770 * xmlParseDocument: 7771 * @ctxt: an XML parser context 7772 * 7773 * parse an XML document (and build a tree if using the standard SAX 7774 * interface). 7775 * 7776 * [1] document ::= prolog element Misc* 7777 * 7778 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7779 * 7780 * Returns 0, -1 in case of error. the parser context is augmented 7781 * as a result of the parsing. 7782 */ 7783 7784int 7785xmlParseDocument(xmlParserCtxtPtr ctxt) { 7786 xmlChar start[4]; 7787 xmlCharEncoding enc; 7788 7789 xmlInitParser(); 7790 7791 GROW; 7792 7793 /* 7794 * SAX: beginning of the document processing. 7795 */ 7796 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7797 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7798 7799 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 7800 /* 7801 * Get the 4 first bytes and decode the charset 7802 * if enc != XML_CHAR_ENCODING_NONE 7803 * plug some encoding conversion routines. 7804 */ 7805 start[0] = RAW; 7806 start[1] = NXT(1); 7807 start[2] = NXT(2); 7808 start[3] = NXT(3); 7809 enc = xmlDetectCharEncoding(start, 4); 7810 if (enc != XML_CHAR_ENCODING_NONE) { 7811 xmlSwitchEncoding(ctxt, enc); 7812 } 7813 } 7814 7815 7816 if (CUR == 0) { 7817 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7819 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7820 ctxt->wellFormed = 0; 7821 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7822 } 7823 7824 /* 7825 * Check for the XMLDecl in the Prolog. 7826 */ 7827 GROW; 7828 if ((RAW == '<') && (NXT(1) == '?') && 7829 (NXT(2) == 'x') && (NXT(3) == 'm') && 7830 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7831 7832 /* 7833 * Note that we will switch encoding on the fly. 7834 */ 7835 xmlParseXMLDecl(ctxt); 7836 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7837 /* 7838 * The XML REC instructs us to stop parsing right here 7839 */ 7840 return(-1); 7841 } 7842 ctxt->standalone = ctxt->input->standalone; 7843 SKIP_BLANKS; 7844 } else { 7845 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7846 } 7847 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7848 ctxt->sax->startDocument(ctxt->userData); 7849 7850 /* 7851 * The Misc part of the Prolog 7852 */ 7853 GROW; 7854 xmlParseMisc(ctxt); 7855 7856 /* 7857 * Then possibly doc type declaration(s) and more Misc 7858 * (doctypedecl Misc*)? 7859 */ 7860 GROW; 7861 if ((RAW == '<') && (NXT(1) == '!') && 7862 (NXT(2) == 'D') && (NXT(3) == 'O') && 7863 (NXT(4) == 'C') && (NXT(5) == 'T') && 7864 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7865 (NXT(8) == 'E')) { 7866 7867 ctxt->inSubset = 1; 7868 xmlParseDocTypeDecl(ctxt); 7869 if (RAW == '[') { 7870 ctxt->instate = XML_PARSER_DTD; 7871 xmlParseInternalSubset(ctxt); 7872 } 7873 7874 /* 7875 * Create and update the external subset. 7876 */ 7877 ctxt->inSubset = 2; 7878 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7879 (!ctxt->disableSAX)) 7880 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7881 ctxt->extSubSystem, ctxt->extSubURI); 7882 ctxt->inSubset = 0; 7883 7884 7885 ctxt->instate = XML_PARSER_PROLOG; 7886 xmlParseMisc(ctxt); 7887 } 7888 7889 /* 7890 * Time to start parsing the tree itself 7891 */ 7892 GROW; 7893 if (RAW != '<') { 7894 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7896 ctxt->sax->error(ctxt->userData, 7897 "Start tag expected, '<' not found\n"); 7898 ctxt->wellFormed = 0; 7899 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7900 ctxt->instate = XML_PARSER_EOF; 7901 } else { 7902 ctxt->instate = XML_PARSER_CONTENT; 7903 xmlParseElement(ctxt); 7904 ctxt->instate = XML_PARSER_EPILOG; 7905 7906 7907 /* 7908 * The Misc part at the end 7909 */ 7910 xmlParseMisc(ctxt); 7911 7912 if (RAW != 0) { 7913 ctxt->errNo = XML_ERR_DOCUMENT_END; 7914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7915 ctxt->sax->error(ctxt->userData, 7916 "Extra content at the end of the document\n"); 7917 ctxt->wellFormed = 0; 7918 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7919 } 7920 ctxt->instate = XML_PARSER_EOF; 7921 } 7922 7923 /* 7924 * SAX: end of the document processing. 7925 */ 7926 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7927 ctxt->sax->endDocument(ctxt->userData); 7928 7929 /* 7930 * Remove locally kept entity definitions if the tree was not built 7931 */ 7932 if ((ctxt->myDoc != NULL) && 7933 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 7934 xmlFreeDoc(ctxt->myDoc); 7935 ctxt->myDoc = NULL; 7936 } 7937 7938 if (! ctxt->wellFormed) { 7939 ctxt->valid = 0; 7940 return(-1); 7941 } 7942 return(0); 7943} 7944 7945/** 7946 * xmlParseExtParsedEnt: 7947 * @ctxt: an XML parser context 7948 * 7949 * parse a general parsed entity 7950 * An external general parsed entity is well-formed if it matches the 7951 * production labeled extParsedEnt. 7952 * 7953 * [78] extParsedEnt ::= TextDecl? content 7954 * 7955 * Returns 0, -1 in case of error. the parser context is augmented 7956 * as a result of the parsing. 7957 */ 7958 7959int 7960xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7961 xmlChar start[4]; 7962 xmlCharEncoding enc; 7963 7964 xmlDefaultSAXHandlerInit(); 7965 7966 GROW; 7967 7968 /* 7969 * SAX: beginning of the document processing. 7970 */ 7971 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7972 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7973 7974 /* 7975 * Get the 4 first bytes and decode the charset 7976 * if enc != XML_CHAR_ENCODING_NONE 7977 * plug some encoding conversion routines. 7978 */ 7979 start[0] = RAW; 7980 start[1] = NXT(1); 7981 start[2] = NXT(2); 7982 start[3] = NXT(3); 7983 enc = xmlDetectCharEncoding(start, 4); 7984 if (enc != XML_CHAR_ENCODING_NONE) { 7985 xmlSwitchEncoding(ctxt, enc); 7986 } 7987 7988 7989 if (CUR == 0) { 7990 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7992 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7993 ctxt->wellFormed = 0; 7994 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 7995 } 7996 7997 /* 7998 * Check for the XMLDecl in the Prolog. 7999 */ 8000 GROW; 8001 if ((RAW == '<') && (NXT(1) == '?') && 8002 (NXT(2) == 'x') && (NXT(3) == 'm') && 8003 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8004 8005 /* 8006 * Note that we will switch encoding on the fly. 8007 */ 8008 xmlParseXMLDecl(ctxt); 8009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8010 /* 8011 * The XML REC instructs us to stop parsing right here 8012 */ 8013 return(-1); 8014 } 8015 SKIP_BLANKS; 8016 } else { 8017 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8018 } 8019 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8020 ctxt->sax->startDocument(ctxt->userData); 8021 8022 /* 8023 * Doing validity checking on chunk doesn't make sense 8024 */ 8025 ctxt->instate = XML_PARSER_CONTENT; 8026 ctxt->validate = 0; 8027 ctxt->loadsubset = 0; 8028 ctxt->depth = 0; 8029 8030 xmlParseContent(ctxt); 8031 8032 if ((RAW == '<') && (NXT(1) == '/')) { 8033 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8035 ctxt->sax->error(ctxt->userData, 8036 "chunk is not well balanced\n"); 8037 ctxt->wellFormed = 0; 8038 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8039 } else if (RAW != 0) { 8040 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8042 ctxt->sax->error(ctxt->userData, 8043 "extra content at the end of well balanced chunk\n"); 8044 ctxt->wellFormed = 0; 8045 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8046 } 8047 8048 /* 8049 * SAX: end of the document processing. 8050 */ 8051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8052 ctxt->sax->endDocument(ctxt->userData); 8053 8054 if (! ctxt->wellFormed) return(-1); 8055 return(0); 8056} 8057 8058/************************************************************************ 8059 * * 8060 * Progressive parsing interfaces * 8061 * * 8062 ************************************************************************/ 8063 8064/** 8065 * xmlParseLookupSequence: 8066 * @ctxt: an XML parser context 8067 * @first: the first char to lookup 8068 * @next: the next char to lookup or zero 8069 * @third: the next char to lookup or zero 8070 * 8071 * Try to find if a sequence (first, next, third) or just (first next) or 8072 * (first) is available in the input stream. 8073 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 8074 * to avoid rescanning sequences of bytes, it DOES change the state of the 8075 * parser, do not use liberally. 8076 * 8077 * Returns the index to the current parsing point if the full sequence 8078 * is available, -1 otherwise. 8079 */ 8080static int 8081xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 8082 xmlChar next, xmlChar third) { 8083 int base, len; 8084 xmlParserInputPtr in; 8085 const xmlChar *buf; 8086 8087 in = ctxt->input; 8088 if (in == NULL) return(-1); 8089 base = in->cur - in->base; 8090 if (base < 0) return(-1); 8091 if (ctxt->checkIndex > base) 8092 base = ctxt->checkIndex; 8093 if (in->buf == NULL) { 8094 buf = in->base; 8095 len = in->length; 8096 } else { 8097 buf = in->buf->buffer->content; 8098 len = in->buf->buffer->use; 8099 } 8100 /* take into account the sequence length */ 8101 if (third) len -= 2; 8102 else if (next) len --; 8103 for (;base < len;base++) { 8104 if (buf[base] == first) { 8105 if (third != 0) { 8106 if ((buf[base + 1] != next) || 8107 (buf[base + 2] != third)) continue; 8108 } else if (next != 0) { 8109 if (buf[base + 1] != next) continue; 8110 } 8111 ctxt->checkIndex = 0; 8112#ifdef DEBUG_PUSH 8113 if (next == 0) 8114 xmlGenericError(xmlGenericErrorContext, 8115 "PP: lookup '%c' found at %d\n", 8116 first, base); 8117 else if (third == 0) 8118 xmlGenericError(xmlGenericErrorContext, 8119 "PP: lookup '%c%c' found at %d\n", 8120 first, next, base); 8121 else 8122 xmlGenericError(xmlGenericErrorContext, 8123 "PP: lookup '%c%c%c' found at %d\n", 8124 first, next, third, base); 8125#endif 8126 return(base - (in->cur - in->base)); 8127 } 8128 } 8129 ctxt->checkIndex = base; 8130#ifdef DEBUG_PUSH 8131 if (next == 0) 8132 xmlGenericError(xmlGenericErrorContext, 8133 "PP: lookup '%c' failed\n", first); 8134 else if (third == 0) 8135 xmlGenericError(xmlGenericErrorContext, 8136 "PP: lookup '%c%c' failed\n", first, next); 8137 else 8138 xmlGenericError(xmlGenericErrorContext, 8139 "PP: lookup '%c%c%c' failed\n", first, next, third); 8140#endif 8141 return(-1); 8142} 8143 8144/** 8145 * xmlParseTryOrFinish: 8146 * @ctxt: an XML parser context 8147 * @terminate: last chunk indicator 8148 * 8149 * Try to progress on parsing 8150 * 8151 * Returns zero if no parsing was possible 8152 */ 8153static int 8154xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 8155 int ret = 0; 8156 int avail; 8157 xmlChar cur, next; 8158 8159#ifdef DEBUG_PUSH 8160 switch (ctxt->instate) { 8161 case XML_PARSER_EOF: 8162 xmlGenericError(xmlGenericErrorContext, 8163 "PP: try EOF\n"); break; 8164 case XML_PARSER_START: 8165 xmlGenericError(xmlGenericErrorContext, 8166 "PP: try START\n"); break; 8167 case XML_PARSER_MISC: 8168 xmlGenericError(xmlGenericErrorContext, 8169 "PP: try MISC\n");break; 8170 case XML_PARSER_COMMENT: 8171 xmlGenericError(xmlGenericErrorContext, 8172 "PP: try COMMENT\n");break; 8173 case XML_PARSER_PROLOG: 8174 xmlGenericError(xmlGenericErrorContext, 8175 "PP: try PROLOG\n");break; 8176 case XML_PARSER_START_TAG: 8177 xmlGenericError(xmlGenericErrorContext, 8178 "PP: try START_TAG\n");break; 8179 case XML_PARSER_CONTENT: 8180 xmlGenericError(xmlGenericErrorContext, 8181 "PP: try CONTENT\n");break; 8182 case XML_PARSER_CDATA_SECTION: 8183 xmlGenericError(xmlGenericErrorContext, 8184 "PP: try CDATA_SECTION\n");break; 8185 case XML_PARSER_END_TAG: 8186 xmlGenericError(xmlGenericErrorContext, 8187 "PP: try END_TAG\n");break; 8188 case XML_PARSER_ENTITY_DECL: 8189 xmlGenericError(xmlGenericErrorContext, 8190 "PP: try ENTITY_DECL\n");break; 8191 case XML_PARSER_ENTITY_VALUE: 8192 xmlGenericError(xmlGenericErrorContext, 8193 "PP: try ENTITY_VALUE\n");break; 8194 case XML_PARSER_ATTRIBUTE_VALUE: 8195 xmlGenericError(xmlGenericErrorContext, 8196 "PP: try ATTRIBUTE_VALUE\n");break; 8197 case XML_PARSER_DTD: 8198 xmlGenericError(xmlGenericErrorContext, 8199 "PP: try DTD\n");break; 8200 case XML_PARSER_EPILOG: 8201 xmlGenericError(xmlGenericErrorContext, 8202 "PP: try EPILOG\n");break; 8203 case XML_PARSER_PI: 8204 xmlGenericError(xmlGenericErrorContext, 8205 "PP: try PI\n");break; 8206 case XML_PARSER_IGNORE: 8207 xmlGenericError(xmlGenericErrorContext, 8208 "PP: try IGNORE\n");break; 8209 } 8210#endif 8211 8212 while (1) { 8213 SHRINK; 8214 8215 /* 8216 * Pop-up of finished entities. 8217 */ 8218 while ((RAW == 0) && (ctxt->inputNr > 1)) 8219 xmlPopInput(ctxt); 8220 8221 if (ctxt->input ==NULL) break; 8222 if (ctxt->input->buf == NULL) 8223 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8224 else { 8225 /* 8226 * If we are operating on converted input, try to flush 8227 * remainng chars to avoid them stalling in the non-converted 8228 * buffer. 8229 */ 8230 if ((ctxt->input->buf->raw != NULL) && 8231 (ctxt->input->buf->raw->use > 0)) { 8232 int base = ctxt->input->base - 8233 ctxt->input->buf->buffer->content; 8234 int current = ctxt->input->cur - ctxt->input->base; 8235 8236 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 8237 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8238 ctxt->input->cur = ctxt->input->base + current; 8239 ctxt->input->end = 8240 &ctxt->input->buf->buffer->content[ 8241 ctxt->input->buf->buffer->use]; 8242 } 8243 avail = ctxt->input->buf->buffer->use - 8244 (ctxt->input->cur - ctxt->input->base); 8245 } 8246 if (avail < 1) 8247 goto done; 8248 switch (ctxt->instate) { 8249 case XML_PARSER_EOF: 8250 /* 8251 * Document parsing is done ! 8252 */ 8253 goto done; 8254 case XML_PARSER_START: 8255 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 8256 xmlChar start[4]; 8257 xmlCharEncoding enc; 8258 8259 /* 8260 * Very first chars read from the document flow. 8261 */ 8262 if (avail < 4) 8263 goto done; 8264 8265 /* 8266 * Get the 4 first bytes and decode the charset 8267 * if enc != XML_CHAR_ENCODING_NONE 8268 * plug some encoding conversion routines. 8269 */ 8270 start[0] = RAW; 8271 start[1] = NXT(1); 8272 start[2] = NXT(2); 8273 start[3] = NXT(3); 8274 enc = xmlDetectCharEncoding(start, 4); 8275 if (enc != XML_CHAR_ENCODING_NONE) { 8276 xmlSwitchEncoding(ctxt, enc); 8277 } 8278 break; 8279 } 8280 8281 cur = ctxt->input->cur[0]; 8282 next = ctxt->input->cur[1]; 8283 if (cur == 0) { 8284 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8285 ctxt->sax->setDocumentLocator(ctxt->userData, 8286 &xmlDefaultSAXLocator); 8287 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8289 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 8290 ctxt->wellFormed = 0; 8291 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8292 ctxt->instate = XML_PARSER_EOF; 8293#ifdef DEBUG_PUSH 8294 xmlGenericError(xmlGenericErrorContext, 8295 "PP: entering EOF\n"); 8296#endif 8297 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8298 ctxt->sax->endDocument(ctxt->userData); 8299 goto done; 8300 } 8301 if ((cur == '<') && (next == '?')) { 8302 /* PI or XML decl */ 8303 if (avail < 5) return(ret); 8304 if ((!terminate) && 8305 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8306 return(ret); 8307 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8308 ctxt->sax->setDocumentLocator(ctxt->userData, 8309 &xmlDefaultSAXLocator); 8310 if ((ctxt->input->cur[2] == 'x') && 8311 (ctxt->input->cur[3] == 'm') && 8312 (ctxt->input->cur[4] == 'l') && 8313 (IS_BLANK(ctxt->input->cur[5]))) { 8314 ret += 5; 8315#ifdef DEBUG_PUSH 8316 xmlGenericError(xmlGenericErrorContext, 8317 "PP: Parsing XML Decl\n"); 8318#endif 8319 xmlParseXMLDecl(ctxt); 8320 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8321 /* 8322 * The XML REC instructs us to stop parsing right 8323 * here 8324 */ 8325 ctxt->instate = XML_PARSER_EOF; 8326 return(0); 8327 } 8328 ctxt->standalone = ctxt->input->standalone; 8329 if ((ctxt->encoding == NULL) && 8330 (ctxt->input->encoding != NULL)) 8331 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 8332 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8333 (!ctxt->disableSAX)) 8334 ctxt->sax->startDocument(ctxt->userData); 8335 ctxt->instate = XML_PARSER_MISC; 8336#ifdef DEBUG_PUSH 8337 xmlGenericError(xmlGenericErrorContext, 8338 "PP: entering MISC\n"); 8339#endif 8340 } else { 8341 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8342 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8343 (!ctxt->disableSAX)) 8344 ctxt->sax->startDocument(ctxt->userData); 8345 ctxt->instate = XML_PARSER_MISC; 8346#ifdef DEBUG_PUSH 8347 xmlGenericError(xmlGenericErrorContext, 8348 "PP: entering MISC\n"); 8349#endif 8350 } 8351 } else { 8352 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8353 ctxt->sax->setDocumentLocator(ctxt->userData, 8354 &xmlDefaultSAXLocator); 8355 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8356 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8357 (!ctxt->disableSAX)) 8358 ctxt->sax->startDocument(ctxt->userData); 8359 ctxt->instate = XML_PARSER_MISC; 8360#ifdef DEBUG_PUSH 8361 xmlGenericError(xmlGenericErrorContext, 8362 "PP: entering MISC\n"); 8363#endif 8364 } 8365 break; 8366 case XML_PARSER_MISC: 8367 SKIP_BLANKS; 8368 if (ctxt->input->buf == NULL) 8369 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8370 else 8371 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8372 if (avail < 2) 8373 goto done; 8374 cur = ctxt->input->cur[0]; 8375 next = ctxt->input->cur[1]; 8376 if ((cur == '<') && (next == '?')) { 8377 if ((!terminate) && 8378 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8379 goto done; 8380#ifdef DEBUG_PUSH 8381 xmlGenericError(xmlGenericErrorContext, 8382 "PP: Parsing PI\n"); 8383#endif 8384 xmlParsePI(ctxt); 8385 } else if ((cur == '<') && (next == '!') && 8386 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8387 if ((!terminate) && 8388 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8389 goto done; 8390#ifdef DEBUG_PUSH 8391 xmlGenericError(xmlGenericErrorContext, 8392 "PP: Parsing Comment\n"); 8393#endif 8394 xmlParseComment(ctxt); 8395 ctxt->instate = XML_PARSER_MISC; 8396 } else if ((cur == '<') && (next == '!') && 8397 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 8398 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 8399 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 8400 (ctxt->input->cur[8] == 'E')) { 8401 if ((!terminate) && 8402 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8403 goto done; 8404#ifdef DEBUG_PUSH 8405 xmlGenericError(xmlGenericErrorContext, 8406 "PP: Parsing internal subset\n"); 8407#endif 8408 ctxt->inSubset = 1; 8409 xmlParseDocTypeDecl(ctxt); 8410 if (RAW == '[') { 8411 ctxt->instate = XML_PARSER_DTD; 8412#ifdef DEBUG_PUSH 8413 xmlGenericError(xmlGenericErrorContext, 8414 "PP: entering DTD\n"); 8415#endif 8416 } else { 8417 /* 8418 * Create and update the external subset. 8419 */ 8420 ctxt->inSubset = 2; 8421 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8422 (ctxt->sax->externalSubset != NULL)) 8423 ctxt->sax->externalSubset(ctxt->userData, 8424 ctxt->intSubName, ctxt->extSubSystem, 8425 ctxt->extSubURI); 8426 ctxt->inSubset = 0; 8427 ctxt->instate = XML_PARSER_PROLOG; 8428#ifdef DEBUG_PUSH 8429 xmlGenericError(xmlGenericErrorContext, 8430 "PP: entering PROLOG\n"); 8431#endif 8432 } 8433 } else if ((cur == '<') && (next == '!') && 8434 (avail < 9)) { 8435 goto done; 8436 } else { 8437 ctxt->instate = XML_PARSER_START_TAG; 8438#ifdef DEBUG_PUSH 8439 xmlGenericError(xmlGenericErrorContext, 8440 "PP: entering START_TAG\n"); 8441#endif 8442 } 8443 break; 8444 case XML_PARSER_IGNORE: 8445 xmlGenericError(xmlGenericErrorContext, 8446 "PP: internal error, state == IGNORE"); 8447 ctxt->instate = XML_PARSER_DTD; 8448#ifdef DEBUG_PUSH 8449 xmlGenericError(xmlGenericErrorContext, 8450 "PP: entering DTD\n"); 8451#endif 8452 break; 8453 case XML_PARSER_PROLOG: 8454 SKIP_BLANKS; 8455 if (ctxt->input->buf == NULL) 8456 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8457 else 8458 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8459 if (avail < 2) 8460 goto done; 8461 cur = ctxt->input->cur[0]; 8462 next = ctxt->input->cur[1]; 8463 if ((cur == '<') && (next == '?')) { 8464 if ((!terminate) && 8465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8466 goto done; 8467#ifdef DEBUG_PUSH 8468 xmlGenericError(xmlGenericErrorContext, 8469 "PP: Parsing PI\n"); 8470#endif 8471 xmlParsePI(ctxt); 8472 } else if ((cur == '<') && (next == '!') && 8473 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8474 if ((!terminate) && 8475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8476 goto done; 8477#ifdef DEBUG_PUSH 8478 xmlGenericError(xmlGenericErrorContext, 8479 "PP: Parsing Comment\n"); 8480#endif 8481 xmlParseComment(ctxt); 8482 ctxt->instate = XML_PARSER_PROLOG; 8483 } else if ((cur == '<') && (next == '!') && 8484 (avail < 4)) { 8485 goto done; 8486 } else { 8487 ctxt->instate = XML_PARSER_START_TAG; 8488#ifdef DEBUG_PUSH 8489 xmlGenericError(xmlGenericErrorContext, 8490 "PP: entering START_TAG\n"); 8491#endif 8492 } 8493 break; 8494 case XML_PARSER_EPILOG: 8495 SKIP_BLANKS; 8496 if (ctxt->input->buf == NULL) 8497 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8498 else 8499 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8500 if (avail < 2) 8501 goto done; 8502 cur = ctxt->input->cur[0]; 8503 next = ctxt->input->cur[1]; 8504 if ((cur == '<') && (next == '?')) { 8505 if ((!terminate) && 8506 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8507 goto done; 8508#ifdef DEBUG_PUSH 8509 xmlGenericError(xmlGenericErrorContext, 8510 "PP: Parsing PI\n"); 8511#endif 8512 xmlParsePI(ctxt); 8513 ctxt->instate = XML_PARSER_EPILOG; 8514 } else if ((cur == '<') && (next == '!') && 8515 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8516 if ((!terminate) && 8517 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8518 goto done; 8519#ifdef DEBUG_PUSH 8520 xmlGenericError(xmlGenericErrorContext, 8521 "PP: Parsing Comment\n"); 8522#endif 8523 xmlParseComment(ctxt); 8524 ctxt->instate = XML_PARSER_EPILOG; 8525 } else if ((cur == '<') && (next == '!') && 8526 (avail < 4)) { 8527 goto done; 8528 } else { 8529 ctxt->errNo = XML_ERR_DOCUMENT_END; 8530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8531 ctxt->sax->error(ctxt->userData, 8532 "Extra content at the end of the document\n"); 8533 ctxt->wellFormed = 0; 8534 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8535 ctxt->instate = XML_PARSER_EOF; 8536#ifdef DEBUG_PUSH 8537 xmlGenericError(xmlGenericErrorContext, 8538 "PP: entering EOF\n"); 8539#endif 8540 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8541 ctxt->sax->endDocument(ctxt->userData); 8542 goto done; 8543 } 8544 break; 8545 case XML_PARSER_START_TAG: { 8546 xmlChar *name, *oldname; 8547 8548 if ((avail < 2) && (ctxt->inputNr == 1)) 8549 goto done; 8550 cur = ctxt->input->cur[0]; 8551 if (cur != '<') { 8552 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8554 ctxt->sax->error(ctxt->userData, 8555 "Start tag expect, '<' not found\n"); 8556 ctxt->wellFormed = 0; 8557 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8558 ctxt->instate = XML_PARSER_EOF; 8559#ifdef DEBUG_PUSH 8560 xmlGenericError(xmlGenericErrorContext, 8561 "PP: entering EOF\n"); 8562#endif 8563 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8564 ctxt->sax->endDocument(ctxt->userData); 8565 goto done; 8566 } 8567 if ((!terminate) && 8568 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8569 goto done; 8570 if (ctxt->spaceNr == 0) 8571 spacePush(ctxt, -1); 8572 else 8573 spacePush(ctxt, *ctxt->space); 8574 name = xmlParseStartTag(ctxt); 8575 if (name == NULL) { 8576 spacePop(ctxt); 8577 ctxt->instate = XML_PARSER_EOF; 8578#ifdef DEBUG_PUSH 8579 xmlGenericError(xmlGenericErrorContext, 8580 "PP: entering EOF\n"); 8581#endif 8582 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8583 ctxt->sax->endDocument(ctxt->userData); 8584 goto done; 8585 } 8586 namePush(ctxt, xmlStrdup(name)); 8587 8588 /* 8589 * [ VC: Root Element Type ] 8590 * The Name in the document type declaration must match 8591 * the element type of the root element. 8592 */ 8593 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8594 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8595 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8596 8597 /* 8598 * Check for an Empty Element. 8599 */ 8600 if ((RAW == '/') && (NXT(1) == '>')) { 8601 SKIP(2); 8602 if ((ctxt->sax != NULL) && 8603 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 8604 ctxt->sax->endElement(ctxt->userData, name); 8605 xmlFree(name); 8606 oldname = namePop(ctxt); 8607 spacePop(ctxt); 8608 if (oldname != NULL) { 8609#ifdef DEBUG_STACK 8610 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8611#endif 8612 xmlFree(oldname); 8613 } 8614 if (ctxt->name == NULL) { 8615 ctxt->instate = XML_PARSER_EPILOG; 8616#ifdef DEBUG_PUSH 8617 xmlGenericError(xmlGenericErrorContext, 8618 "PP: entering EPILOG\n"); 8619#endif 8620 } else { 8621 ctxt->instate = XML_PARSER_CONTENT; 8622#ifdef DEBUG_PUSH 8623 xmlGenericError(xmlGenericErrorContext, 8624 "PP: entering CONTENT\n"); 8625#endif 8626 } 8627 break; 8628 } 8629 if (RAW == '>') { 8630 NEXT; 8631 } else { 8632 ctxt->errNo = XML_ERR_GT_REQUIRED; 8633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8634 ctxt->sax->error(ctxt->userData, 8635 "Couldn't find end of Start Tag %s\n", 8636 name); 8637 ctxt->wellFormed = 0; 8638 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8639 8640 /* 8641 * end of parsing of this node. 8642 */ 8643 nodePop(ctxt); 8644 oldname = namePop(ctxt); 8645 spacePop(ctxt); 8646 if (oldname != NULL) { 8647#ifdef DEBUG_STACK 8648 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8649#endif 8650 xmlFree(oldname); 8651 } 8652 } 8653 xmlFree(name); 8654 ctxt->instate = XML_PARSER_CONTENT; 8655#ifdef DEBUG_PUSH 8656 xmlGenericError(xmlGenericErrorContext, 8657 "PP: entering CONTENT\n"); 8658#endif 8659 break; 8660 } 8661 case XML_PARSER_CONTENT: { 8662 const xmlChar *test; 8663 int cons; 8664 if ((avail < 2) && (ctxt->inputNr == 1)) 8665 goto done; 8666 cur = ctxt->input->cur[0]; 8667 next = ctxt->input->cur[1]; 8668 8669 test = CUR_PTR; 8670 cons = ctxt->input->consumed; 8671 if ((cur == '<') && (next == '?')) { 8672 if ((!terminate) && 8673 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8674 goto done; 8675#ifdef DEBUG_PUSH 8676 xmlGenericError(xmlGenericErrorContext, 8677 "PP: Parsing PI\n"); 8678#endif 8679 xmlParsePI(ctxt); 8680 } else if ((cur == '<') && (next == '!') && 8681 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8682 if ((!terminate) && 8683 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8684 goto done; 8685#ifdef DEBUG_PUSH 8686 xmlGenericError(xmlGenericErrorContext, 8687 "PP: Parsing Comment\n"); 8688#endif 8689 xmlParseComment(ctxt); 8690 ctxt->instate = XML_PARSER_CONTENT; 8691 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8692 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8693 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8694 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8695 (ctxt->input->cur[8] == '[')) { 8696 SKIP(9); 8697 ctxt->instate = XML_PARSER_CDATA_SECTION; 8698#ifdef DEBUG_PUSH 8699 xmlGenericError(xmlGenericErrorContext, 8700 "PP: entering CDATA_SECTION\n"); 8701#endif 8702 break; 8703 } else if ((cur == '<') && (next == '!') && 8704 (avail < 9)) { 8705 goto done; 8706 } else if ((cur == '<') && (next == '/')) { 8707 ctxt->instate = XML_PARSER_END_TAG; 8708#ifdef DEBUG_PUSH 8709 xmlGenericError(xmlGenericErrorContext, 8710 "PP: entering END_TAG\n"); 8711#endif 8712 break; 8713 } else if (cur == '<') { 8714 ctxt->instate = XML_PARSER_START_TAG; 8715#ifdef DEBUG_PUSH 8716 xmlGenericError(xmlGenericErrorContext, 8717 "PP: entering START_TAG\n"); 8718#endif 8719 break; 8720 } else if (cur == '&') { 8721 if ((!terminate) && 8722 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8723 goto done; 8724#ifdef DEBUG_PUSH 8725 xmlGenericError(xmlGenericErrorContext, 8726 "PP: Parsing Reference\n"); 8727#endif 8728 xmlParseReference(ctxt); 8729 } else { 8730 /* TODO Avoid the extra copy, handle directly !!! */ 8731 /* 8732 * Goal of the following test is: 8733 * - minimize calls to the SAX 'character' callback 8734 * when they are mergeable 8735 * - handle an problem for isBlank when we only parse 8736 * a sequence of blank chars and the next one is 8737 * not available to check against '<' presence. 8738 * - tries to homogenize the differences in SAX 8739 * callbacks between the push and pull versions 8740 * of the parser. 8741 */ 8742 if ((ctxt->inputNr == 1) && 8743 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8744 if ((!terminate) && 8745 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8746 goto done; 8747 } 8748 ctxt->checkIndex = 0; 8749#ifdef DEBUG_PUSH 8750 xmlGenericError(xmlGenericErrorContext, 8751 "PP: Parsing char data\n"); 8752#endif 8753 xmlParseCharData(ctxt, 0); 8754 } 8755 /* 8756 * Pop-up of finished entities. 8757 */ 8758 while ((RAW == 0) && (ctxt->inputNr > 1)) 8759 xmlPopInput(ctxt); 8760 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8761 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8763 ctxt->sax->error(ctxt->userData, 8764 "detected an error in element content\n"); 8765 ctxt->wellFormed = 0; 8766 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 8767 ctxt->instate = XML_PARSER_EOF; 8768 break; 8769 } 8770 break; 8771 } 8772 case XML_PARSER_CDATA_SECTION: { 8773 /* 8774 * The Push mode need to have the SAX callback for 8775 * cdataBlock merge back contiguous callbacks. 8776 */ 8777 int base; 8778 8779 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8780 if (base < 0) { 8781 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8782 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8783 if (ctxt->sax->cdataBlock != NULL) 8784 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8785 XML_PARSER_BIG_BUFFER_SIZE); 8786 } 8787 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8788 ctxt->checkIndex = 0; 8789 } 8790 goto done; 8791 } else { 8792 if ((ctxt->sax != NULL) && (base > 0) && 8793 (!ctxt->disableSAX)) { 8794 if (ctxt->sax->cdataBlock != NULL) 8795 ctxt->sax->cdataBlock(ctxt->userData, 8796 ctxt->input->cur, base); 8797 } 8798 SKIP(base + 3); 8799 ctxt->checkIndex = 0; 8800 ctxt->instate = XML_PARSER_CONTENT; 8801#ifdef DEBUG_PUSH 8802 xmlGenericError(xmlGenericErrorContext, 8803 "PP: entering CONTENT\n"); 8804#endif 8805 } 8806 break; 8807 } 8808 case XML_PARSER_END_TAG: 8809 if (avail < 2) 8810 goto done; 8811 if ((!terminate) && 8812 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8813 goto done; 8814 xmlParseEndTag(ctxt); 8815 if (ctxt->name == NULL) { 8816 ctxt->instate = XML_PARSER_EPILOG; 8817#ifdef DEBUG_PUSH 8818 xmlGenericError(xmlGenericErrorContext, 8819 "PP: entering EPILOG\n"); 8820#endif 8821 } else { 8822 ctxt->instate = XML_PARSER_CONTENT; 8823#ifdef DEBUG_PUSH 8824 xmlGenericError(xmlGenericErrorContext, 8825 "PP: entering CONTENT\n"); 8826#endif 8827 } 8828 break; 8829 case XML_PARSER_DTD: { 8830 /* 8831 * Sorry but progressive parsing of the internal subset 8832 * is not expected to be supported. We first check that 8833 * the full content of the internal subset is available and 8834 * the parsing is launched only at that point. 8835 * Internal subset ends up with "']' S? '>'" in an unescaped 8836 * section and not in a ']]>' sequence which are conditional 8837 * sections (whoever argued to keep that crap in XML deserve 8838 * a place in hell !). 8839 */ 8840 int base, i; 8841 xmlChar *buf; 8842 xmlChar quote = 0; 8843 8844 base = ctxt->input->cur - ctxt->input->base; 8845 if (base < 0) return(0); 8846 if (ctxt->checkIndex > base) 8847 base = ctxt->checkIndex; 8848 buf = ctxt->input->buf->buffer->content; 8849 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8850 base++) { 8851 if (quote != 0) { 8852 if (buf[base] == quote) 8853 quote = 0; 8854 continue; 8855 } 8856 if (buf[base] == '"') { 8857 quote = '"'; 8858 continue; 8859 } 8860 if (buf[base] == '\'') { 8861 quote = '\''; 8862 continue; 8863 } 8864 if (buf[base] == ']') { 8865 if ((unsigned int) base +1 >= 8866 ctxt->input->buf->buffer->use) 8867 break; 8868 if (buf[base + 1] == ']') { 8869 /* conditional crap, skip both ']' ! */ 8870 base++; 8871 continue; 8872 } 8873 for (i = 0; 8874 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8875 i++) { 8876 if (buf[base + i] == '>') 8877 goto found_end_int_subset; 8878 } 8879 break; 8880 } 8881 } 8882 /* 8883 * We didn't found the end of the Internal subset 8884 */ 8885 if (quote == 0) 8886 ctxt->checkIndex = base; 8887#ifdef DEBUG_PUSH 8888 if (next == 0) 8889 xmlGenericError(xmlGenericErrorContext, 8890 "PP: lookup of int subset end filed\n"); 8891#endif 8892 goto done; 8893 8894found_end_int_subset: 8895 xmlParseInternalSubset(ctxt); 8896 ctxt->inSubset = 2; 8897 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8898 (ctxt->sax->externalSubset != NULL)) 8899 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8900 ctxt->extSubSystem, ctxt->extSubURI); 8901 ctxt->inSubset = 0; 8902 ctxt->instate = XML_PARSER_PROLOG; 8903 ctxt->checkIndex = 0; 8904#ifdef DEBUG_PUSH 8905 xmlGenericError(xmlGenericErrorContext, 8906 "PP: entering PROLOG\n"); 8907#endif 8908 break; 8909 } 8910 case XML_PARSER_COMMENT: 8911 xmlGenericError(xmlGenericErrorContext, 8912 "PP: internal error, state == COMMENT\n"); 8913 ctxt->instate = XML_PARSER_CONTENT; 8914#ifdef DEBUG_PUSH 8915 xmlGenericError(xmlGenericErrorContext, 8916 "PP: entering CONTENT\n"); 8917#endif 8918 break; 8919 case XML_PARSER_PI: 8920 xmlGenericError(xmlGenericErrorContext, 8921 "PP: internal error, state == PI\n"); 8922 ctxt->instate = XML_PARSER_CONTENT; 8923#ifdef DEBUG_PUSH 8924 xmlGenericError(xmlGenericErrorContext, 8925 "PP: entering CONTENT\n"); 8926#endif 8927 break; 8928 case XML_PARSER_ENTITY_DECL: 8929 xmlGenericError(xmlGenericErrorContext, 8930 "PP: internal error, state == ENTITY_DECL\n"); 8931 ctxt->instate = XML_PARSER_DTD; 8932#ifdef DEBUG_PUSH 8933 xmlGenericError(xmlGenericErrorContext, 8934 "PP: entering DTD\n"); 8935#endif 8936 break; 8937 case XML_PARSER_ENTITY_VALUE: 8938 xmlGenericError(xmlGenericErrorContext, 8939 "PP: internal error, state == ENTITY_VALUE\n"); 8940 ctxt->instate = XML_PARSER_CONTENT; 8941#ifdef DEBUG_PUSH 8942 xmlGenericError(xmlGenericErrorContext, 8943 "PP: entering DTD\n"); 8944#endif 8945 break; 8946 case XML_PARSER_ATTRIBUTE_VALUE: 8947 xmlGenericError(xmlGenericErrorContext, 8948 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8949 ctxt->instate = XML_PARSER_START_TAG; 8950#ifdef DEBUG_PUSH 8951 xmlGenericError(xmlGenericErrorContext, 8952 "PP: entering START_TAG\n"); 8953#endif 8954 break; 8955 case XML_PARSER_SYSTEM_LITERAL: 8956 xmlGenericError(xmlGenericErrorContext, 8957 "PP: internal error, state == SYSTEM_LITERAL\n"); 8958 ctxt->instate = XML_PARSER_START_TAG; 8959#ifdef DEBUG_PUSH 8960 xmlGenericError(xmlGenericErrorContext, 8961 "PP: entering START_TAG\n"); 8962#endif 8963 break; 8964 case XML_PARSER_PUBLIC_LITERAL: 8965 xmlGenericError(xmlGenericErrorContext, 8966 "PP: internal error, state == PUBLIC_LITERAL\n"); 8967 ctxt->instate = XML_PARSER_START_TAG; 8968#ifdef DEBUG_PUSH 8969 xmlGenericError(xmlGenericErrorContext, 8970 "PP: entering START_TAG\n"); 8971#endif 8972 break; 8973 } 8974 } 8975done: 8976#ifdef DEBUG_PUSH 8977 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8978#endif 8979 return(ret); 8980} 8981 8982/** 8983 * xmlParseChunk: 8984 * @ctxt: an XML parser context 8985 * @chunk: an char array 8986 * @size: the size in byte of the chunk 8987 * @terminate: last chunk indicator 8988 * 8989 * Parse a Chunk of memory 8990 * 8991 * Returns zero if no error, the xmlParserErrors otherwise. 8992 */ 8993int 8994xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8995 int terminate) { 8996 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8997 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8998 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8999 int cur = ctxt->input->cur - ctxt->input->base; 9000 9001 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9002 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9003 ctxt->input->cur = ctxt->input->base + cur; 9004 ctxt->input->end = 9005 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9006#ifdef DEBUG_PUSH 9007 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9008#endif 9009 9010 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 9011 xmlParseTryOrFinish(ctxt, terminate); 9012 } else if (ctxt->instate != XML_PARSER_EOF) { 9013 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 9014 xmlParserInputBufferPtr in = ctxt->input->buf; 9015 if ((in->encoder != NULL) && (in->buffer != NULL) && 9016 (in->raw != NULL)) { 9017 int nbchars; 9018 9019 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 9020 if (nbchars < 0) { 9021 xmlGenericError(xmlGenericErrorContext, 9022 "xmlParseChunk: encoder error\n"); 9023 return(XML_ERR_INVALID_ENCODING); 9024 } 9025 } 9026 } 9027 } 9028 xmlParseTryOrFinish(ctxt, terminate); 9029 if (terminate) { 9030 /* 9031 * Check for termination 9032 */ 9033 int avail = 0; 9034 if (ctxt->input->buf == NULL) 9035 avail = ctxt->input->length - 9036 (ctxt->input->cur - ctxt->input->base); 9037 else 9038 avail = ctxt->input->buf->buffer->use - 9039 (ctxt->input->cur - ctxt->input->base); 9040 9041 if ((ctxt->instate != XML_PARSER_EOF) && 9042 (ctxt->instate != XML_PARSER_EPILOG)) { 9043 ctxt->errNo = XML_ERR_DOCUMENT_END; 9044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9045 ctxt->sax->error(ctxt->userData, 9046 "Extra content at the end of the document\n"); 9047 ctxt->wellFormed = 0; 9048 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9049 } 9050 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 9051 ctxt->errNo = XML_ERR_DOCUMENT_END; 9052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9053 ctxt->sax->error(ctxt->userData, 9054 "Extra content at the end of the document\n"); 9055 ctxt->wellFormed = 0; 9056 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9057 9058 } 9059 if (ctxt->instate != XML_PARSER_EOF) { 9060 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9061 ctxt->sax->endDocument(ctxt->userData); 9062 } 9063 ctxt->instate = XML_PARSER_EOF; 9064 } 9065 return((xmlParserErrors) ctxt->errNo); 9066} 9067 9068/************************************************************************ 9069 * * 9070 * I/O front end functions to the parser * 9071 * * 9072 ************************************************************************/ 9073 9074/** 9075 * xmlStopParser: 9076 * @ctxt: an XML parser context 9077 * 9078 * Blocks further parser processing 9079 */ 9080void 9081xmlStopParser(xmlParserCtxtPtr ctxt) { 9082 ctxt->instate = XML_PARSER_EOF; 9083 if (ctxt->input != NULL) 9084 ctxt->input->cur = BAD_CAST""; 9085} 9086 9087/** 9088 * xmlCreatePushParserCtxt: 9089 * @sax: a SAX handler 9090 * @user_data: The user data returned on SAX callbacks 9091 * @chunk: a pointer to an array of chars 9092 * @size: number of chars in the array 9093 * @filename: an optional file name or URI 9094 * 9095 * Create a parser context for using the XML parser in push mode. 9096 * If @buffer and @size are non-NULL, the data is used to detect 9097 * the encoding. The remaining characters will be parsed so they 9098 * don't need to be fed in again through xmlParseChunk. 9099 * To allow content encoding detection, @size should be >= 4 9100 * The value of @filename is used for fetching external entities 9101 * and error/warning reports. 9102 * 9103 * Returns the new parser context or NULL 9104 */ 9105 9106xmlParserCtxtPtr 9107xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9108 const char *chunk, int size, const char *filename) { 9109 xmlParserCtxtPtr ctxt; 9110 xmlParserInputPtr inputStream; 9111 xmlParserInputBufferPtr buf; 9112 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 9113 9114 /* 9115 * plug some encoding conversion routines 9116 */ 9117 if ((chunk != NULL) && (size >= 4)) 9118 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 9119 9120 buf = xmlAllocParserInputBuffer(enc); 9121 if (buf == NULL) return(NULL); 9122 9123 ctxt = xmlNewParserCtxt(); 9124 if (ctxt == NULL) { 9125 xmlFree(buf); 9126 return(NULL); 9127 } 9128 if (sax != NULL) { 9129 if (ctxt->sax != &xmlDefaultSAXHandler) 9130 xmlFree(ctxt->sax); 9131 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9132 if (ctxt->sax == NULL) { 9133 xmlFree(buf); 9134 xmlFree(ctxt); 9135 return(NULL); 9136 } 9137 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9138 if (user_data != NULL) 9139 ctxt->userData = user_data; 9140 } 9141 if (filename == NULL) { 9142 ctxt->directory = NULL; 9143 } else { 9144 ctxt->directory = xmlParserGetDirectory(filename); 9145 } 9146 9147 inputStream = xmlNewInputStream(ctxt); 9148 if (inputStream == NULL) { 9149 xmlFreeParserCtxt(ctxt); 9150 return(NULL); 9151 } 9152 9153 if (filename == NULL) 9154 inputStream->filename = NULL; 9155 else 9156 inputStream->filename = (char *) 9157 xmlCanonicPath((const xmlChar *) filename); 9158 inputStream->buf = buf; 9159 inputStream->base = inputStream->buf->buffer->content; 9160 inputStream->cur = inputStream->buf->buffer->content; 9161 inputStream->end = 9162 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 9163 9164 inputPush(ctxt, inputStream); 9165 9166 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9167 (ctxt->input->buf != NULL)) { 9168 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9169 int cur = ctxt->input->cur - ctxt->input->base; 9170 9171 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9172 9173 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9174 ctxt->input->cur = ctxt->input->base + cur; 9175 ctxt->input->end = 9176 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9177#ifdef DEBUG_PUSH 9178 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9179#endif 9180 } 9181 9182 if (enc != XML_CHAR_ENCODING_NONE) { 9183 xmlSwitchEncoding(ctxt, enc); 9184 } 9185 9186 return(ctxt); 9187} 9188 9189/** 9190 * xmlCreateIOParserCtxt: 9191 * @sax: a SAX handler 9192 * @user_data: The user data returned on SAX callbacks 9193 * @ioread: an I/O read function 9194 * @ioclose: an I/O close function 9195 * @ioctx: an I/O handler 9196 * @enc: the charset encoding if known 9197 * 9198 * Create a parser context for using the XML parser with an existing 9199 * I/O stream 9200 * 9201 * Returns the new parser context or NULL 9202 */ 9203xmlParserCtxtPtr 9204xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9205 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 9206 void *ioctx, xmlCharEncoding enc) { 9207 xmlParserCtxtPtr ctxt; 9208 xmlParserInputPtr inputStream; 9209 xmlParserInputBufferPtr buf; 9210 9211 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 9212 if (buf == NULL) return(NULL); 9213 9214 ctxt = xmlNewParserCtxt(); 9215 if (ctxt == NULL) { 9216 xmlFree(buf); 9217 return(NULL); 9218 } 9219 if (sax != NULL) { 9220 if (ctxt->sax != &xmlDefaultSAXHandler) 9221 xmlFree(ctxt->sax); 9222 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9223 if (ctxt->sax == NULL) { 9224 xmlFree(buf); 9225 xmlFree(ctxt); 9226 return(NULL); 9227 } 9228 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9229 if (user_data != NULL) 9230 ctxt->userData = user_data; 9231 } 9232 9233 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 9234 if (inputStream == NULL) { 9235 xmlFreeParserCtxt(ctxt); 9236 return(NULL); 9237 } 9238 inputPush(ctxt, inputStream); 9239 9240 return(ctxt); 9241} 9242 9243/************************************************************************ 9244 * * 9245 * Front ends when parsing a DTD * 9246 * * 9247 ************************************************************************/ 9248 9249/** 9250 * xmlIOParseDTD: 9251 * @sax: the SAX handler block or NULL 9252 * @input: an Input Buffer 9253 * @enc: the charset encoding if known 9254 * 9255 * Load and parse a DTD 9256 * 9257 * Returns the resulting xmlDtdPtr or NULL in case of error. 9258 * @input will be freed at parsing end. 9259 */ 9260 9261xmlDtdPtr 9262xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 9263 xmlCharEncoding enc) { 9264 xmlDtdPtr ret = NULL; 9265 xmlParserCtxtPtr ctxt; 9266 xmlParserInputPtr pinput = NULL; 9267 xmlChar start[4]; 9268 9269 if (input == NULL) 9270 return(NULL); 9271 9272 ctxt = xmlNewParserCtxt(); 9273 if (ctxt == NULL) { 9274 return(NULL); 9275 } 9276 9277 /* 9278 * Set-up the SAX context 9279 */ 9280 if (sax != NULL) { 9281 if (ctxt->sax != NULL) 9282 xmlFree(ctxt->sax); 9283 ctxt->sax = sax; 9284 ctxt->userData = NULL; 9285 } 9286 9287 /* 9288 * generate a parser input from the I/O handler 9289 */ 9290 9291 pinput = xmlNewIOInputStream(ctxt, input, enc); 9292 if (pinput == NULL) { 9293 if (sax != NULL) ctxt->sax = NULL; 9294 xmlFreeParserCtxt(ctxt); 9295 return(NULL); 9296 } 9297 9298 /* 9299 * plug some encoding conversion routines here. 9300 */ 9301 xmlPushInput(ctxt, pinput); 9302 9303 pinput->filename = NULL; 9304 pinput->line = 1; 9305 pinput->col = 1; 9306 pinput->base = ctxt->input->cur; 9307 pinput->cur = ctxt->input->cur; 9308 pinput->free = NULL; 9309 9310 /* 9311 * let's parse that entity knowing it's an external subset. 9312 */ 9313 ctxt->inSubset = 2; 9314 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9315 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9316 BAD_CAST "none", BAD_CAST "none"); 9317 9318 if (enc == XML_CHAR_ENCODING_NONE) { 9319 /* 9320 * Get the 4 first bytes and decode the charset 9321 * if enc != XML_CHAR_ENCODING_NONE 9322 * plug some encoding conversion routines. 9323 */ 9324 start[0] = RAW; 9325 start[1] = NXT(1); 9326 start[2] = NXT(2); 9327 start[3] = NXT(3); 9328 enc = xmlDetectCharEncoding(start, 4); 9329 if (enc != XML_CHAR_ENCODING_NONE) { 9330 xmlSwitchEncoding(ctxt, enc); 9331 } 9332 } 9333 9334 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 9335 9336 if (ctxt->myDoc != NULL) { 9337 if (ctxt->wellFormed) { 9338 ret = ctxt->myDoc->extSubset; 9339 ctxt->myDoc->extSubset = NULL; 9340 } else { 9341 ret = NULL; 9342 } 9343 xmlFreeDoc(ctxt->myDoc); 9344 ctxt->myDoc = NULL; 9345 } 9346 if (sax != NULL) ctxt->sax = NULL; 9347 xmlFreeParserCtxt(ctxt); 9348 9349 return(ret); 9350} 9351 9352/** 9353 * xmlSAXParseDTD: 9354 * @sax: the SAX handler block 9355 * @ExternalID: a NAME* containing the External ID of the DTD 9356 * @SystemID: a NAME* containing the URL to the DTD 9357 * 9358 * Load and parse an external subset. 9359 * 9360 * Returns the resulting xmlDtdPtr or NULL in case of error. 9361 */ 9362 9363xmlDtdPtr 9364xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 9365 const xmlChar *SystemID) { 9366 xmlDtdPtr ret = NULL; 9367 xmlParserCtxtPtr ctxt; 9368 xmlParserInputPtr input = NULL; 9369 xmlCharEncoding enc; 9370 9371 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 9372 9373 ctxt = xmlNewParserCtxt(); 9374 if (ctxt == NULL) { 9375 return(NULL); 9376 } 9377 9378 /* 9379 * Set-up the SAX context 9380 */ 9381 if (sax != NULL) { 9382 if (ctxt->sax != NULL) 9383 xmlFree(ctxt->sax); 9384 ctxt->sax = sax; 9385 ctxt->userData = NULL; 9386 } 9387 9388 /* 9389 * Ask the Entity resolver to load the damn thing 9390 */ 9391 9392 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 9393 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 9394 if (input == NULL) { 9395 if (sax != NULL) ctxt->sax = NULL; 9396 xmlFreeParserCtxt(ctxt); 9397 return(NULL); 9398 } 9399 9400 /* 9401 * plug some encoding conversion routines here. 9402 */ 9403 xmlPushInput(ctxt, input); 9404 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 9405 xmlSwitchEncoding(ctxt, enc); 9406 9407 if (input->filename == NULL) 9408 input->filename = (char *) xmlStrdup(SystemID); 9409 input->line = 1; 9410 input->col = 1; 9411 input->base = ctxt->input->cur; 9412 input->cur = ctxt->input->cur; 9413 input->free = NULL; 9414 9415 /* 9416 * let's parse that entity knowing it's an external subset. 9417 */ 9418 ctxt->inSubset = 2; 9419 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9420 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9421 ExternalID, SystemID); 9422 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 9423 9424 if (ctxt->myDoc != NULL) { 9425 if (ctxt->wellFormed) { 9426 ret = ctxt->myDoc->extSubset; 9427 ctxt->myDoc->extSubset = NULL; 9428 } else { 9429 ret = NULL; 9430 } 9431 xmlFreeDoc(ctxt->myDoc); 9432 ctxt->myDoc = NULL; 9433 } 9434 if (sax != NULL) ctxt->sax = NULL; 9435 xmlFreeParserCtxt(ctxt); 9436 9437 return(ret); 9438} 9439 9440/** 9441 * xmlParseDTD: 9442 * @ExternalID: a NAME* containing the External ID of the DTD 9443 * @SystemID: a NAME* containing the URL to the DTD 9444 * 9445 * Load and parse an external subset. 9446 * 9447 * Returns the resulting xmlDtdPtr or NULL in case of error. 9448 */ 9449 9450xmlDtdPtr 9451xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 9452 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 9453} 9454 9455/************************************************************************ 9456 * * 9457 * Front ends when parsing an Entity * 9458 * * 9459 ************************************************************************/ 9460 9461/** 9462 * xmlParseCtxtExternalEntity: 9463 * @ctx: the existing parsing context 9464 * @URL: the URL for the entity to load 9465 * @ID: the System ID for the entity to load 9466 * @lst: the return value for the set of parsed nodes 9467 * 9468 * Parse an external general entity within an existing parsing context 9469 * An external general parsed entity is well-formed if it matches the 9470 * production labeled extParsedEnt. 9471 * 9472 * [78] extParsedEnt ::= TextDecl? content 9473 * 9474 * Returns 0 if the entity is well formed, -1 in case of args problem and 9475 * the parser error code otherwise 9476 */ 9477 9478int 9479xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 9480 const xmlChar *ID, xmlNodePtr *lst) { 9481 xmlParserCtxtPtr ctxt; 9482 xmlDocPtr newDoc; 9483 xmlSAXHandlerPtr oldsax = NULL; 9484 int ret = 0; 9485 xmlChar start[4]; 9486 xmlCharEncoding enc; 9487 9488 if (ctx->depth > 40) { 9489 return(XML_ERR_ENTITY_LOOP); 9490 } 9491 9492 if (lst != NULL) 9493 *lst = NULL; 9494 if ((URL == NULL) && (ID == NULL)) 9495 return(-1); 9496 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 9497 return(-1); 9498 9499 9500 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9501 if (ctxt == NULL) return(-1); 9502 ctxt->userData = ctxt; 9503 ctxt->_private = ctx->_private; 9504 oldsax = ctxt->sax; 9505 ctxt->sax = ctx->sax; 9506 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9507 if (newDoc == NULL) { 9508 xmlFreeParserCtxt(ctxt); 9509 return(-1); 9510 } 9511 if (ctx->myDoc != NULL) { 9512 newDoc->intSubset = ctx->myDoc->intSubset; 9513 newDoc->extSubset = ctx->myDoc->extSubset; 9514 } 9515 if (ctx->myDoc->URL != NULL) { 9516 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 9517 } 9518 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9519 if (newDoc->children == NULL) { 9520 ctxt->sax = oldsax; 9521 xmlFreeParserCtxt(ctxt); 9522 newDoc->intSubset = NULL; 9523 newDoc->extSubset = NULL; 9524 xmlFreeDoc(newDoc); 9525 return(-1); 9526 } 9527 nodePush(ctxt, newDoc->children); 9528 if (ctx->myDoc == NULL) { 9529 ctxt->myDoc = newDoc; 9530 } else { 9531 ctxt->myDoc = ctx->myDoc; 9532 newDoc->children->doc = ctx->myDoc; 9533 } 9534 9535 /* 9536 * Get the 4 first bytes and decode the charset 9537 * if enc != XML_CHAR_ENCODING_NONE 9538 * plug some encoding conversion routines. 9539 */ 9540 GROW 9541 start[0] = RAW; 9542 start[1] = NXT(1); 9543 start[2] = NXT(2); 9544 start[3] = NXT(3); 9545 enc = xmlDetectCharEncoding(start, 4); 9546 if (enc != XML_CHAR_ENCODING_NONE) { 9547 xmlSwitchEncoding(ctxt, enc); 9548 } 9549 9550 /* 9551 * Parse a possible text declaration first 9552 */ 9553 if ((RAW == '<') && (NXT(1) == '?') && 9554 (NXT(2) == 'x') && (NXT(3) == 'm') && 9555 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9556 xmlParseTextDecl(ctxt); 9557 } 9558 9559 /* 9560 * Doing validity checking on chunk doesn't make sense 9561 */ 9562 ctxt->instate = XML_PARSER_CONTENT; 9563 ctxt->validate = ctx->validate; 9564 ctxt->loadsubset = ctx->loadsubset; 9565 ctxt->depth = ctx->depth + 1; 9566 ctxt->replaceEntities = ctx->replaceEntities; 9567 if (ctxt->validate) { 9568 ctxt->vctxt.error = ctx->vctxt.error; 9569 ctxt->vctxt.warning = ctx->vctxt.warning; 9570 } else { 9571 ctxt->vctxt.error = NULL; 9572 ctxt->vctxt.warning = NULL; 9573 } 9574 ctxt->vctxt.nodeTab = NULL; 9575 ctxt->vctxt.nodeNr = 0; 9576 ctxt->vctxt.nodeMax = 0; 9577 ctxt->vctxt.node = NULL; 9578 9579 xmlParseContent(ctxt); 9580 9581 if ((RAW == '<') && (NXT(1) == '/')) { 9582 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9584 ctxt->sax->error(ctxt->userData, 9585 "chunk is not well balanced\n"); 9586 ctxt->wellFormed = 0; 9587 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9588 } else if (RAW != 0) { 9589 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9591 ctxt->sax->error(ctxt->userData, 9592 "extra content at the end of well balanced chunk\n"); 9593 ctxt->wellFormed = 0; 9594 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9595 } 9596 if (ctxt->node != newDoc->children) { 9597 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9599 ctxt->sax->error(ctxt->userData, 9600 "chunk is not well balanced\n"); 9601 ctxt->wellFormed = 0; 9602 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9603 } 9604 9605 if (!ctxt->wellFormed) { 9606 if (ctxt->errNo == 0) 9607 ret = 1; 9608 else 9609 ret = ctxt->errNo; 9610 } else { 9611 if (lst != NULL) { 9612 xmlNodePtr cur; 9613 9614 /* 9615 * Return the newly created nodeset after unlinking it from 9616 * they pseudo parent. 9617 */ 9618 cur = newDoc->children->children; 9619 *lst = cur; 9620 while (cur != NULL) { 9621 cur->parent = NULL; 9622 cur = cur->next; 9623 } 9624 newDoc->children->children = NULL; 9625 } 9626 ret = 0; 9627 } 9628 ctxt->sax = oldsax; 9629 xmlFreeParserCtxt(ctxt); 9630 newDoc->intSubset = NULL; 9631 newDoc->extSubset = NULL; 9632 xmlFreeDoc(newDoc); 9633 9634 return(ret); 9635} 9636 9637/** 9638 * xmlParseExternalEntityPrivate: 9639 * @doc: the document the chunk pertains to 9640 * @oldctxt: the previous parser context if available 9641 * @sax: the SAX handler bloc (possibly NULL) 9642 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9643 * @depth: Used for loop detection, use 0 9644 * @URL: the URL for the entity to load 9645 * @ID: the System ID for the entity to load 9646 * @list: the return value for the set of parsed nodes 9647 * 9648 * Private version of xmlParseExternalEntity() 9649 * 9650 * Returns 0 if the entity is well formed, -1 in case of args problem and 9651 * the parser error code otherwise 9652 */ 9653 9654static int 9655xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 9656 xmlSAXHandlerPtr sax, 9657 void *user_data, int depth, const xmlChar *URL, 9658 const xmlChar *ID, xmlNodePtr *list) { 9659 xmlParserCtxtPtr ctxt; 9660 xmlDocPtr newDoc; 9661 xmlSAXHandlerPtr oldsax = NULL; 9662 int ret = 0; 9663 xmlChar start[4]; 9664 xmlCharEncoding enc; 9665 9666 if (depth > 40) { 9667 return(XML_ERR_ENTITY_LOOP); 9668 } 9669 9670 9671 9672 if (list != NULL) 9673 *list = NULL; 9674 if ((URL == NULL) && (ID == NULL)) 9675 return(-1); 9676 if (doc == NULL) /* @@ relax but check for dereferences */ 9677 return(-1); 9678 9679 9680 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9681 if (ctxt == NULL) return(-1); 9682 ctxt->userData = ctxt; 9683 if (oldctxt != NULL) { 9684 ctxt->_private = oldctxt->_private; 9685 ctxt->loadsubset = oldctxt->loadsubset; 9686 ctxt->validate = oldctxt->validate; 9687 ctxt->external = oldctxt->external; 9688 ctxt->record_info = oldctxt->record_info; 9689 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 9690 ctxt->node_seq.length = oldctxt->node_seq.length; 9691 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 9692 } else { 9693 /* 9694 * Doing validity checking on chunk without context 9695 * doesn't make sense 9696 */ 9697 ctxt->_private = NULL; 9698 ctxt->validate = 0; 9699 ctxt->external = 2; 9700 ctxt->loadsubset = 0; 9701 } 9702 if (sax != NULL) { 9703 oldsax = ctxt->sax; 9704 ctxt->sax = sax; 9705 if (user_data != NULL) 9706 ctxt->userData = user_data; 9707 } 9708 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9709 if (newDoc == NULL) { 9710 ctxt->node_seq.maximum = 0; 9711 ctxt->node_seq.length = 0; 9712 ctxt->node_seq.buffer = NULL; 9713 xmlFreeParserCtxt(ctxt); 9714 return(-1); 9715 } 9716 if (doc != NULL) { 9717 newDoc->intSubset = doc->intSubset; 9718 newDoc->extSubset = doc->extSubset; 9719 } 9720 if (doc->URL != NULL) { 9721 newDoc->URL = xmlStrdup(doc->URL); 9722 } 9723 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9724 if (newDoc->children == NULL) { 9725 if (sax != NULL) 9726 ctxt->sax = oldsax; 9727 ctxt->node_seq.maximum = 0; 9728 ctxt->node_seq.length = 0; 9729 ctxt->node_seq.buffer = NULL; 9730 xmlFreeParserCtxt(ctxt); 9731 newDoc->intSubset = NULL; 9732 newDoc->extSubset = NULL; 9733 xmlFreeDoc(newDoc); 9734 return(-1); 9735 } 9736 nodePush(ctxt, newDoc->children); 9737 if (doc == NULL) { 9738 ctxt->myDoc = newDoc; 9739 } else { 9740 ctxt->myDoc = doc; 9741 newDoc->children->doc = doc; 9742 } 9743 9744 /* 9745 * Get the 4 first bytes and decode the charset 9746 * if enc != XML_CHAR_ENCODING_NONE 9747 * plug some encoding conversion routines. 9748 */ 9749 GROW; 9750 start[0] = RAW; 9751 start[1] = NXT(1); 9752 start[2] = NXT(2); 9753 start[3] = NXT(3); 9754 enc = xmlDetectCharEncoding(start, 4); 9755 if (enc != XML_CHAR_ENCODING_NONE) { 9756 xmlSwitchEncoding(ctxt, enc); 9757 } 9758 9759 /* 9760 * Parse a possible text declaration first 9761 */ 9762 if ((RAW == '<') && (NXT(1) == '?') && 9763 (NXT(2) == 'x') && (NXT(3) == 'm') && 9764 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9765 xmlParseTextDecl(ctxt); 9766 } 9767 9768 ctxt->instate = XML_PARSER_CONTENT; 9769 ctxt->depth = depth; 9770 9771 xmlParseContent(ctxt); 9772 9773 if ((RAW == '<') && (NXT(1) == '/')) { 9774 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9776 ctxt->sax->error(ctxt->userData, 9777 "chunk is not well balanced\n"); 9778 ctxt->wellFormed = 0; 9779 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9780 } else if (RAW != 0) { 9781 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9783 ctxt->sax->error(ctxt->userData, 9784 "extra content at the end of well balanced chunk\n"); 9785 ctxt->wellFormed = 0; 9786 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9787 } 9788 if (ctxt->node != newDoc->children) { 9789 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9791 ctxt->sax->error(ctxt->userData, 9792 "chunk is not well balanced\n"); 9793 ctxt->wellFormed = 0; 9794 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9795 } 9796 9797 if (!ctxt->wellFormed) { 9798 if (ctxt->errNo == 0) 9799 ret = 1; 9800 else 9801 ret = ctxt->errNo; 9802 } else { 9803 if (list != NULL) { 9804 xmlNodePtr cur; 9805 9806 /* 9807 * Return the newly created nodeset after unlinking it from 9808 * they pseudo parent. 9809 */ 9810 cur = newDoc->children->children; 9811 *list = cur; 9812 while (cur != NULL) { 9813 cur->parent = NULL; 9814 cur = cur->next; 9815 } 9816 newDoc->children->children = NULL; 9817 } 9818 ret = 0; 9819 } 9820 if (sax != NULL) 9821 ctxt->sax = oldsax; 9822 ctxt->node_seq.maximum = 0; 9823 ctxt->node_seq.length = 0; 9824 ctxt->node_seq.buffer = NULL; 9825 xmlFreeParserCtxt(ctxt); 9826 newDoc->intSubset = NULL; 9827 newDoc->extSubset = NULL; 9828 xmlFreeDoc(newDoc); 9829 9830 return(ret); 9831} 9832 9833/** 9834 * xmlParseExternalEntity: 9835 * @doc: the document the chunk pertains to 9836 * @sax: the SAX handler bloc (possibly NULL) 9837 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9838 * @depth: Used for loop detection, use 0 9839 * @URL: the URL for the entity to load 9840 * @ID: the System ID for the entity to load 9841 * @lst: the return value for the set of parsed nodes 9842 * 9843 * Parse an external general entity 9844 * An external general parsed entity is well-formed if it matches the 9845 * production labeled extParsedEnt. 9846 * 9847 * [78] extParsedEnt ::= TextDecl? content 9848 * 9849 * Returns 0 if the entity is well formed, -1 in case of args problem and 9850 * the parser error code otherwise 9851 */ 9852 9853int 9854xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 9855 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 9856 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 9857 ID, lst)); 9858} 9859 9860/** 9861 * xmlParseBalancedChunkMemory: 9862 * @doc: the document the chunk pertains to 9863 * @sax: the SAX handler bloc (possibly NULL) 9864 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9865 * @depth: Used for loop detection, use 0 9866 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9867 * @lst: the return value for the set of parsed nodes 9868 * 9869 * Parse a well-balanced chunk of an XML document 9870 * called by the parser 9871 * The allowed sequence for the Well Balanced Chunk is the one defined by 9872 * the content production in the XML grammar: 9873 * 9874 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9875 * 9876 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9877 * the parser error code otherwise 9878 */ 9879 9880int 9881xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9882 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 9883 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 9884 depth, string, lst, 0 ); 9885} 9886 9887/** 9888 * xmlParseBalancedChunkMemoryInternal: 9889 * @oldctxt: the existing parsing context 9890 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9891 * @user_data: the user data field for the parser context 9892 * @lst: the return value for the set of parsed nodes 9893 * 9894 * 9895 * Parse a well-balanced chunk of an XML document 9896 * called by the parser 9897 * The allowed sequence for the Well Balanced Chunk is the one defined by 9898 * the content production in the XML grammar: 9899 * 9900 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9901 * 9902 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9903 * the parser error code otherwise 9904 * 9905 * In case recover is set to 1, the nodelist will not be empty even if 9906 * the parsed chunk is not well balanced. 9907 */ 9908static int 9909xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 9910 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 9911 xmlParserCtxtPtr ctxt; 9912 xmlDocPtr newDoc = NULL; 9913 xmlSAXHandlerPtr oldsax = NULL; 9914 xmlNodePtr content = NULL; 9915 int size; 9916 int ret = 0; 9917 9918 if (oldctxt->depth > 40) { 9919 return(XML_ERR_ENTITY_LOOP); 9920 } 9921 9922 9923 if (lst != NULL) 9924 *lst = NULL; 9925 if (string == NULL) 9926 return(-1); 9927 9928 size = xmlStrlen(string); 9929 9930 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9931 if (ctxt == NULL) return(-1); 9932 if (user_data != NULL) 9933 ctxt->userData = user_data; 9934 else 9935 ctxt->userData = ctxt; 9936 9937 oldsax = ctxt->sax; 9938 ctxt->sax = oldctxt->sax; 9939 ctxt->_private = oldctxt->_private; 9940 if (oldctxt->myDoc == NULL) { 9941 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9942 if (newDoc == NULL) { 9943 ctxt->sax = oldsax; 9944 xmlFreeParserCtxt(ctxt); 9945 return(-1); 9946 } 9947 ctxt->myDoc = newDoc; 9948 } else { 9949 ctxt->myDoc = oldctxt->myDoc; 9950 content = ctxt->myDoc->children; 9951 } 9952 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL, 9953 BAD_CAST "pseudoroot", NULL); 9954 if (ctxt->myDoc->children == NULL) { 9955 ctxt->sax = oldsax; 9956 xmlFreeParserCtxt(ctxt); 9957 if (newDoc != NULL) 9958 xmlFreeDoc(newDoc); 9959 return(-1); 9960 } 9961 nodePush(ctxt, ctxt->myDoc->children); 9962 ctxt->instate = XML_PARSER_CONTENT; 9963 ctxt->depth = oldctxt->depth + 1; 9964 9965 ctxt->validate = 0; 9966 ctxt->loadsubset = oldctxt->loadsubset; 9967 9968 xmlParseContent(ctxt); 9969 if ((RAW == '<') && (NXT(1) == '/')) { 9970 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9972 ctxt->sax->error(ctxt->userData, 9973 "chunk is not well balanced\n"); 9974 ctxt->wellFormed = 0; 9975 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9976 } else if (RAW != 0) { 9977 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9979 ctxt->sax->error(ctxt->userData, 9980 "extra content at the end of well balanced chunk\n"); 9981 ctxt->wellFormed = 0; 9982 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9983 } 9984 if (ctxt->node != ctxt->myDoc->children) { 9985 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9987 ctxt->sax->error(ctxt->userData, 9988 "chunk is not well balanced\n"); 9989 ctxt->wellFormed = 0; 9990 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 9991 } 9992 9993 if (!ctxt->wellFormed) { 9994 if (ctxt->errNo == 0) 9995 ret = 1; 9996 else 9997 ret = ctxt->errNo; 9998 } else { 9999 ret = 0; 10000 } 10001 10002 if ((lst != NULL) && (ret == 0)) { 10003 xmlNodePtr cur; 10004 10005 /* 10006 * Return the newly created nodeset after unlinking it from 10007 * they pseudo parent. 10008 */ 10009 cur = ctxt->myDoc->children->children; 10010 *lst = cur; 10011 while (cur != NULL) { 10012 if (oldctxt->validate && oldctxt->wellFormed && 10013 oldctxt->myDoc && oldctxt->myDoc->intSubset) { 10014 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 10015 oldctxt->myDoc, cur); 10016 } 10017 cur->parent = NULL; 10018 cur = cur->next; 10019 } 10020 ctxt->myDoc->children->children = NULL; 10021 } 10022 if (ctxt->myDoc != NULL) { 10023 xmlFreeNode(ctxt->myDoc->children); 10024 ctxt->myDoc->children = content; 10025 } 10026 10027 ctxt->sax = oldsax; 10028 xmlFreeParserCtxt(ctxt); 10029 if (newDoc != NULL) 10030 xmlFreeDoc(newDoc); 10031 10032 return(ret); 10033} 10034 10035/** 10036 * xmlParseBalancedChunkMemoryRecover: 10037 * @doc: the document the chunk pertains to 10038 * @sax: the SAX handler bloc (possibly NULL) 10039 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10040 * @depth: Used for loop detection, use 0 10041 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10042 * @lst: the return value for the set of parsed nodes 10043 * @recover: return nodes even if the data is broken (use 0) 10044 * 10045 * 10046 * Parse a well-balanced chunk of an XML document 10047 * called by the parser 10048 * The allowed sequence for the Well Balanced Chunk is the one defined by 10049 * the content production in the XML grammar: 10050 * 10051 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10052 * 10053 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10054 * the parser error code otherwise 10055 * 10056 * In case recover is set to 1, the nodelist will not be empty even if 10057 * the parsed chunk is not well balanced. 10058 */ 10059int 10060xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10061 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 10062 int recover) { 10063 xmlParserCtxtPtr ctxt; 10064 xmlDocPtr newDoc; 10065 xmlSAXHandlerPtr oldsax = NULL; 10066 xmlNodePtr content; 10067 int size; 10068 int ret = 0; 10069 10070 if (depth > 40) { 10071 return(XML_ERR_ENTITY_LOOP); 10072 } 10073 10074 10075 if (lst != NULL) 10076 *lst = NULL; 10077 if (string == NULL) 10078 return(-1); 10079 10080 size = xmlStrlen(string); 10081 10082 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10083 if (ctxt == NULL) return(-1); 10084 ctxt->userData = ctxt; 10085 if (sax != NULL) { 10086 oldsax = ctxt->sax; 10087 ctxt->sax = sax; 10088 if (user_data != NULL) 10089 ctxt->userData = user_data; 10090 } 10091 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10092 if (newDoc == NULL) { 10093 xmlFreeParserCtxt(ctxt); 10094 return(-1); 10095 } 10096 if (doc != NULL) { 10097 newDoc->intSubset = doc->intSubset; 10098 newDoc->extSubset = doc->extSubset; 10099 } 10100 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10101 if (newDoc->children == NULL) { 10102 if (sax != NULL) 10103 ctxt->sax = oldsax; 10104 xmlFreeParserCtxt(ctxt); 10105 newDoc->intSubset = NULL; 10106 newDoc->extSubset = NULL; 10107 xmlFreeDoc(newDoc); 10108 return(-1); 10109 } 10110 nodePush(ctxt, newDoc->children); 10111 if (doc == NULL) { 10112 ctxt->myDoc = newDoc; 10113 } else { 10114 ctxt->myDoc = newDoc; 10115 newDoc->children->doc = doc; 10116 } 10117 ctxt->instate = XML_PARSER_CONTENT; 10118 ctxt->depth = depth; 10119 10120 /* 10121 * Doing validity checking on chunk doesn't make sense 10122 */ 10123 ctxt->validate = 0; 10124 ctxt->loadsubset = 0; 10125 10126 if ( doc != NULL ){ 10127 content = doc->children; 10128 doc->children = NULL; 10129 xmlParseContent(ctxt); 10130 doc->children = content; 10131 } 10132 else { 10133 xmlParseContent(ctxt); 10134 } 10135 if ((RAW == '<') && (NXT(1) == '/')) { 10136 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 10137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 10138 ctxt->sax->error(ctxt->userData, 10139 "chunk is not well balanced\n"); 10140 ctxt->wellFormed = 0; 10141 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 10142 } else if (RAW != 0) { 10143 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 10144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 10145 ctxt->sax->error(ctxt->userData, 10146 "extra content at the end of well balanced chunk\n"); 10147 ctxt->wellFormed = 0; 10148 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 10149 } 10150 if (ctxt->node != newDoc->children) { 10151 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 10152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 10153 ctxt->sax->error(ctxt->userData, 10154 "chunk is not well balanced\n"); 10155 ctxt->wellFormed = 0; 10156 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 10157 } 10158 10159 if (!ctxt->wellFormed) { 10160 if (ctxt->errNo == 0) 10161 ret = 1; 10162 else 10163 ret = ctxt->errNo; 10164 } else { 10165 ret = 0; 10166 } 10167 10168 if (lst != NULL && (ret == 0 || recover == 1)) { 10169 xmlNodePtr cur; 10170 10171 /* 10172 * Return the newly created nodeset after unlinking it from 10173 * they pseudo parent. 10174 */ 10175 cur = newDoc->children->children; 10176 *lst = cur; 10177 while (cur != NULL) { 10178 cur->parent = NULL; 10179 cur = cur->next; 10180 } 10181 newDoc->children->children = NULL; 10182 } 10183 10184 if (sax != NULL) 10185 ctxt->sax = oldsax; 10186 xmlFreeParserCtxt(ctxt); 10187 newDoc->intSubset = NULL; 10188 newDoc->extSubset = NULL; 10189 xmlFreeDoc(newDoc); 10190 10191 return(ret); 10192} 10193 10194/** 10195 * xmlSAXParseEntity: 10196 * @sax: the SAX handler block 10197 * @filename: the filename 10198 * 10199 * parse an XML external entity out of context and build a tree. 10200 * It use the given SAX function block to handle the parsing callback. 10201 * If sax is NULL, fallback to the default DOM tree building routines. 10202 * 10203 * [78] extParsedEnt ::= TextDecl? content 10204 * 10205 * This correspond to a "Well Balanced" chunk 10206 * 10207 * Returns the resulting document tree 10208 */ 10209 10210xmlDocPtr 10211xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 10212 xmlDocPtr ret; 10213 xmlParserCtxtPtr ctxt; 10214 char *directory = NULL; 10215 10216 ctxt = xmlCreateFileParserCtxt(filename); 10217 if (ctxt == NULL) { 10218 return(NULL); 10219 } 10220 if (sax != NULL) { 10221 if (ctxt->sax != NULL) 10222 xmlFree(ctxt->sax); 10223 ctxt->sax = sax; 10224 ctxt->userData = NULL; 10225 } 10226 10227 if ((ctxt->directory == NULL) && (directory == NULL)) 10228 directory = xmlParserGetDirectory(filename); 10229 10230 xmlParseExtParsedEnt(ctxt); 10231 10232 if (ctxt->wellFormed) 10233 ret = ctxt->myDoc; 10234 else { 10235 ret = NULL; 10236 xmlFreeDoc(ctxt->myDoc); 10237 ctxt->myDoc = NULL; 10238 } 10239 if (sax != NULL) 10240 ctxt->sax = NULL; 10241 xmlFreeParserCtxt(ctxt); 10242 10243 return(ret); 10244} 10245 10246/** 10247 * xmlParseEntity: 10248 * @filename: the filename 10249 * 10250 * parse an XML external entity out of context and build a tree. 10251 * 10252 * [78] extParsedEnt ::= TextDecl? content 10253 * 10254 * This correspond to a "Well Balanced" chunk 10255 * 10256 * Returns the resulting document tree 10257 */ 10258 10259xmlDocPtr 10260xmlParseEntity(const char *filename) { 10261 return(xmlSAXParseEntity(NULL, filename)); 10262} 10263 10264/** 10265 * xmlCreateEntityParserCtxt: 10266 * @URL: the entity URL 10267 * @ID: the entity PUBLIC ID 10268 * @base: a possible base for the target URI 10269 * 10270 * Create a parser context for an external entity 10271 * Automatic support for ZLIB/Compress compressed document is provided 10272 * by default if found at compile-time. 10273 * 10274 * Returns the new parser context or NULL 10275 */ 10276xmlParserCtxtPtr 10277xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 10278 const xmlChar *base) { 10279 xmlParserCtxtPtr ctxt; 10280 xmlParserInputPtr inputStream; 10281 char *directory = NULL; 10282 xmlChar *uri; 10283 10284 ctxt = xmlNewParserCtxt(); 10285 if (ctxt == NULL) { 10286 return(NULL); 10287 } 10288 10289 uri = xmlBuildURI(URL, base); 10290 10291 if (uri == NULL) { 10292 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 10293 if (inputStream == NULL) { 10294 xmlFreeParserCtxt(ctxt); 10295 return(NULL); 10296 } 10297 10298 inputPush(ctxt, inputStream); 10299 10300 if ((ctxt->directory == NULL) && (directory == NULL)) 10301 directory = xmlParserGetDirectory((char *)URL); 10302 if ((ctxt->directory == NULL) && (directory != NULL)) 10303 ctxt->directory = directory; 10304 } else { 10305 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 10306 if (inputStream == NULL) { 10307 xmlFree(uri); 10308 xmlFreeParserCtxt(ctxt); 10309 return(NULL); 10310 } 10311 10312 inputPush(ctxt, inputStream); 10313 10314 if ((ctxt->directory == NULL) && (directory == NULL)) 10315 directory = xmlParserGetDirectory((char *)uri); 10316 if ((ctxt->directory == NULL) && (directory != NULL)) 10317 ctxt->directory = directory; 10318 xmlFree(uri); 10319 } 10320 10321 return(ctxt); 10322} 10323 10324/************************************************************************ 10325 * * 10326 * Front ends when parsing from a file * 10327 * * 10328 ************************************************************************/ 10329 10330/** 10331 * xmlCreateFileParserCtxt: 10332 * @filename: the filename 10333 * 10334 * Create a parser context for a file content. 10335 * Automatic support for ZLIB/Compress compressed document is provided 10336 * by default if found at compile-time. 10337 * 10338 * Returns the new parser context or NULL 10339 */ 10340xmlParserCtxtPtr 10341xmlCreateFileParserCtxt(const char *filename) 10342{ 10343 xmlParserCtxtPtr ctxt; 10344 xmlParserInputPtr inputStream; 10345 char *directory = NULL; 10346 10347 ctxt = xmlNewParserCtxt(); 10348 if (ctxt == NULL) { 10349 if (xmlDefaultSAXHandler.error != NULL) { 10350 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 10351 } 10352 return(NULL); 10353 } 10354 10355 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 10356 if (inputStream == NULL) { 10357 xmlFreeParserCtxt(ctxt); 10358 return(NULL); 10359 } 10360 10361 inputPush(ctxt, inputStream); 10362 if ((ctxt->directory == NULL) && (directory == NULL)) 10363 directory = xmlParserGetDirectory(filename); 10364 if ((ctxt->directory == NULL) && (directory != NULL)) 10365 ctxt->directory = directory; 10366 10367 return(ctxt); 10368} 10369 10370/** 10371 * xmlSAXParseFileWithData: 10372 * @sax: the SAX handler block 10373 * @filename: the filename 10374 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10375 * documents 10376 * @data: the userdata 10377 * 10378 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10379 * compressed document is provided by default if found at compile-time. 10380 * It use the given SAX function block to handle the parsing callback. 10381 * If sax is NULL, fallback to the default DOM tree building routines. 10382 * 10383 * User data (void *) is stored within the parser context in the 10384 * context's _private member, so it is available nearly everywhere in libxml 10385 * 10386 * Returns the resulting document tree 10387 */ 10388 10389xmlDocPtr 10390xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 10391 int recovery, void *data) { 10392 xmlDocPtr ret; 10393 xmlParserCtxtPtr ctxt; 10394 char *directory = NULL; 10395 10396 xmlInitParser(); 10397 10398 ctxt = xmlCreateFileParserCtxt(filename); 10399 if (ctxt == NULL) { 10400 return(NULL); 10401 } 10402 if (sax != NULL) { 10403 if (ctxt->sax != NULL) 10404 xmlFree(ctxt->sax); 10405 ctxt->sax = sax; 10406 } 10407 if (data!=NULL) { 10408 ctxt->_private=data; 10409 } 10410 10411 if ((ctxt->directory == NULL) && (directory == NULL)) 10412 directory = xmlParserGetDirectory(filename); 10413 if ((ctxt->directory == NULL) && (directory != NULL)) 10414 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 10415 10416 ctxt->recovery = recovery; 10417 10418 xmlParseDocument(ctxt); 10419 10420 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10421 else { 10422 ret = NULL; 10423 xmlFreeDoc(ctxt->myDoc); 10424 ctxt->myDoc = NULL; 10425 } 10426 if (sax != NULL) 10427 ctxt->sax = NULL; 10428 xmlFreeParserCtxt(ctxt); 10429 10430 return(ret); 10431} 10432 10433/** 10434 * xmlSAXParseFile: 10435 * @sax: the SAX handler block 10436 * @filename: the filename 10437 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10438 * documents 10439 * 10440 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10441 * compressed document is provided by default if found at compile-time. 10442 * It use the given SAX function block to handle the parsing callback. 10443 * If sax is NULL, fallback to the default DOM tree building routines. 10444 * 10445 * Returns the resulting document tree 10446 */ 10447 10448xmlDocPtr 10449xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 10450 int recovery) { 10451 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 10452} 10453 10454/** 10455 * xmlRecoverDoc: 10456 * @cur: a pointer to an array of xmlChar 10457 * 10458 * parse an XML in-memory document and build a tree. 10459 * In the case the document is not Well Formed, a tree is built anyway 10460 * 10461 * Returns the resulting document tree 10462 */ 10463 10464xmlDocPtr 10465xmlRecoverDoc(xmlChar *cur) { 10466 return(xmlSAXParseDoc(NULL, cur, 1)); 10467} 10468 10469/** 10470 * xmlParseFile: 10471 * @filename: the filename 10472 * 10473 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10474 * compressed document is provided by default if found at compile-time. 10475 * 10476 * Returns the resulting document tree if the file was wellformed, 10477 * NULL otherwise. 10478 */ 10479 10480xmlDocPtr 10481xmlParseFile(const char *filename) { 10482 return(xmlSAXParseFile(NULL, filename, 0)); 10483} 10484 10485/** 10486 * xmlRecoverFile: 10487 * @filename: the filename 10488 * 10489 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10490 * compressed document is provided by default if found at compile-time. 10491 * In the case the document is not Well Formed, a tree is built anyway 10492 * 10493 * Returns the resulting document tree 10494 */ 10495 10496xmlDocPtr 10497xmlRecoverFile(const char *filename) { 10498 return(xmlSAXParseFile(NULL, filename, 1)); 10499} 10500 10501 10502/** 10503 * xmlSetupParserForBuffer: 10504 * @ctxt: an XML parser context 10505 * @buffer: a xmlChar * buffer 10506 * @filename: a file name 10507 * 10508 * Setup the parser context to parse a new buffer; Clears any prior 10509 * contents from the parser context. The buffer parameter must not be 10510 * NULL, but the filename parameter can be 10511 */ 10512void 10513xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 10514 const char* filename) 10515{ 10516 xmlParserInputPtr input; 10517 10518 input = xmlNewInputStream(ctxt); 10519 if (input == NULL) { 10520 xmlGenericError(xmlGenericErrorContext, 10521 "malloc"); 10522 xmlFree(ctxt); 10523 return; 10524 } 10525 10526 xmlClearParserCtxt(ctxt); 10527 if (filename != NULL) 10528 input->filename = xmlMemStrdup(filename); 10529 input->base = buffer; 10530 input->cur = buffer; 10531 input->end = &buffer[xmlStrlen(buffer)]; 10532 inputPush(ctxt, input); 10533} 10534 10535/** 10536 * xmlSAXUserParseFile: 10537 * @sax: a SAX handler 10538 * @user_data: The user data returned on SAX callbacks 10539 * @filename: a file name 10540 * 10541 * parse an XML file and call the given SAX handler routines. 10542 * Automatic support for ZLIB/Compress compressed document is provided 10543 * 10544 * Returns 0 in case of success or a error number otherwise 10545 */ 10546int 10547xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 10548 const char *filename) { 10549 int ret = 0; 10550 xmlParserCtxtPtr ctxt; 10551 10552 ctxt = xmlCreateFileParserCtxt(filename); 10553 if (ctxt == NULL) return -1; 10554 if (ctxt->sax != &xmlDefaultSAXHandler) 10555 xmlFree(ctxt->sax); 10556 ctxt->sax = sax; 10557 if (user_data != NULL) 10558 ctxt->userData = user_data; 10559 10560 xmlParseDocument(ctxt); 10561 10562 if (ctxt->wellFormed) 10563 ret = 0; 10564 else { 10565 if (ctxt->errNo != 0) 10566 ret = ctxt->errNo; 10567 else 10568 ret = -1; 10569 } 10570 if (sax != NULL) 10571 ctxt->sax = NULL; 10572 xmlFreeParserCtxt(ctxt); 10573 10574 return ret; 10575} 10576 10577/************************************************************************ 10578 * * 10579 * Front ends when parsing from memory * 10580 * * 10581 ************************************************************************/ 10582 10583/** 10584 * xmlCreateMemoryParserCtxt: 10585 * @buffer: a pointer to a char array 10586 * @size: the size of the array 10587 * 10588 * Create a parser context for an XML in-memory document. 10589 * 10590 * Returns the new parser context or NULL 10591 */ 10592xmlParserCtxtPtr 10593xmlCreateMemoryParserCtxt(const char *buffer, int size) { 10594 xmlParserCtxtPtr ctxt; 10595 xmlParserInputPtr input; 10596 xmlParserInputBufferPtr buf; 10597 10598 if (buffer == NULL) 10599 return(NULL); 10600 if (size <= 0) 10601 return(NULL); 10602 10603 ctxt = xmlNewParserCtxt(); 10604 if (ctxt == NULL) 10605 return(NULL); 10606 10607 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 10608 if (buf == NULL) { 10609 xmlFreeParserCtxt(ctxt); 10610 return(NULL); 10611 } 10612 10613 input = xmlNewInputStream(ctxt); 10614 if (input == NULL) { 10615 xmlFreeParserInputBuffer(buf); 10616 xmlFreeParserCtxt(ctxt); 10617 return(NULL); 10618 } 10619 10620 input->filename = NULL; 10621 input->buf = buf; 10622 input->base = input->buf->buffer->content; 10623 input->cur = input->buf->buffer->content; 10624 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 10625 10626 inputPush(ctxt, input); 10627 return(ctxt); 10628} 10629 10630/** 10631 * xmlSAXParseMemoryWithData: 10632 * @sax: the SAX handler block 10633 * @buffer: an pointer to a char array 10634 * @size: the size of the array 10635 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10636 * documents 10637 * @data: the userdata 10638 * 10639 * parse an XML in-memory block and use the given SAX function block 10640 * to handle the parsing callback. If sax is NULL, fallback to the default 10641 * DOM tree building routines. 10642 * 10643 * User data (void *) is stored within the parser context in the 10644 * context's _private member, so it is available nearly everywhere in libxml 10645 * 10646 * Returns the resulting document tree 10647 */ 10648 10649xmlDocPtr 10650xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 10651 int size, int recovery, void *data) { 10652 xmlDocPtr ret; 10653 xmlParserCtxtPtr ctxt; 10654 10655 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10656 if (ctxt == NULL) return(NULL); 10657 if (sax != NULL) { 10658 if (ctxt->sax != NULL) 10659 xmlFree(ctxt->sax); 10660 ctxt->sax = sax; 10661 } 10662 if (data!=NULL) { 10663 ctxt->_private=data; 10664 } 10665 10666 xmlParseDocument(ctxt); 10667 10668 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10669 else { 10670 ret = NULL; 10671 xmlFreeDoc(ctxt->myDoc); 10672 ctxt->myDoc = NULL; 10673 } 10674 if (sax != NULL) 10675 ctxt->sax = NULL; 10676 xmlFreeParserCtxt(ctxt); 10677 10678 return(ret); 10679} 10680 10681/** 10682 * xmlSAXParseMemory: 10683 * @sax: the SAX handler block 10684 * @buffer: an pointer to a char array 10685 * @size: the size of the array 10686 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 10687 * documents 10688 * 10689 * parse an XML in-memory block and use the given SAX function block 10690 * to handle the parsing callback. If sax is NULL, fallback to the default 10691 * DOM tree building routines. 10692 * 10693 * Returns the resulting document tree 10694 */ 10695xmlDocPtr 10696xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 10697 int size, int recovery) { 10698 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 10699} 10700 10701/** 10702 * xmlParseMemory: 10703 * @buffer: an pointer to a char array 10704 * @size: the size of the array 10705 * 10706 * parse an XML in-memory block and build a tree. 10707 * 10708 * Returns the resulting document tree 10709 */ 10710 10711xmlDocPtr xmlParseMemory(const char *buffer, int size) { 10712 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 10713} 10714 10715/** 10716 * xmlRecoverMemory: 10717 * @buffer: an pointer to a char array 10718 * @size: the size of the array 10719 * 10720 * parse an XML in-memory block and build a tree. 10721 * In the case the document is not Well Formed, a tree is built anyway 10722 * 10723 * Returns the resulting document tree 10724 */ 10725 10726xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 10727 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 10728} 10729 10730/** 10731 * xmlSAXUserParseMemory: 10732 * @sax: a SAX handler 10733 * @user_data: The user data returned on SAX callbacks 10734 * @buffer: an in-memory XML document input 10735 * @size: the length of the XML document in bytes 10736 * 10737 * A better SAX parsing routine. 10738 * parse an XML in-memory buffer and call the given SAX handler routines. 10739 * 10740 * Returns 0 in case of success or a error number otherwise 10741 */ 10742int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 10743 const char *buffer, int size) { 10744 int ret = 0; 10745 xmlParserCtxtPtr ctxt; 10746 xmlSAXHandlerPtr oldsax = NULL; 10747 10748 if (sax == NULL) return -1; 10749 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10750 if (ctxt == NULL) return -1; 10751 oldsax = ctxt->sax; 10752 ctxt->sax = sax; 10753 if (user_data != NULL) 10754 ctxt->userData = user_data; 10755 10756 xmlParseDocument(ctxt); 10757 10758 if (ctxt->wellFormed) 10759 ret = 0; 10760 else { 10761 if (ctxt->errNo != 0) 10762 ret = ctxt->errNo; 10763 else 10764 ret = -1; 10765 } 10766 ctxt->sax = oldsax; 10767 xmlFreeParserCtxt(ctxt); 10768 10769 return ret; 10770} 10771 10772/** 10773 * xmlCreateDocParserCtxt: 10774 * @cur: a pointer to an array of xmlChar 10775 * 10776 * Creates a parser context for an XML in-memory document. 10777 * 10778 * Returns the new parser context or NULL 10779 */ 10780xmlParserCtxtPtr 10781xmlCreateDocParserCtxt(xmlChar *cur) { 10782 int len; 10783 10784 if (cur == NULL) 10785 return(NULL); 10786 len = xmlStrlen(cur); 10787 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 10788} 10789 10790/** 10791 * xmlSAXParseDoc: 10792 * @sax: the SAX handler block 10793 * @cur: a pointer to an array of xmlChar 10794 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10795 * documents 10796 * 10797 * parse an XML in-memory document and build a tree. 10798 * It use the given SAX function block to handle the parsing callback. 10799 * If sax is NULL, fallback to the default DOM tree building routines. 10800 * 10801 * Returns the resulting document tree 10802 */ 10803 10804xmlDocPtr 10805xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 10806 xmlDocPtr ret; 10807 xmlParserCtxtPtr ctxt; 10808 10809 if (cur == NULL) return(NULL); 10810 10811 10812 ctxt = xmlCreateDocParserCtxt(cur); 10813 if (ctxt == NULL) return(NULL); 10814 if (sax != NULL) { 10815 ctxt->sax = sax; 10816 ctxt->userData = NULL; 10817 } 10818 10819 xmlParseDocument(ctxt); 10820 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10821 else { 10822 ret = NULL; 10823 xmlFreeDoc(ctxt->myDoc); 10824 ctxt->myDoc = NULL; 10825 } 10826 if (sax != NULL) 10827 ctxt->sax = NULL; 10828 xmlFreeParserCtxt(ctxt); 10829 10830 return(ret); 10831} 10832 10833/** 10834 * xmlParseDoc: 10835 * @cur: a pointer to an array of xmlChar 10836 * 10837 * parse an XML in-memory document and build a tree. 10838 * 10839 * Returns the resulting document tree 10840 */ 10841 10842xmlDocPtr 10843xmlParseDoc(xmlChar *cur) { 10844 return(xmlSAXParseDoc(NULL, cur, 0)); 10845} 10846 10847/************************************************************************ 10848 * * 10849 * Specific function to keep track of entities references * 10850 * and used by the XSLT debugger * 10851 * * 10852 ************************************************************************/ 10853 10854static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 10855 10856/** 10857 * xmlAddEntityReference: 10858 * @ent : A valid entity 10859 * @firstNode : A valid first node for children of entity 10860 * @lastNode : A valid last node of children entity 10861 * 10862 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 10863 */ 10864static void 10865xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 10866 xmlNodePtr lastNode) 10867{ 10868 if (xmlEntityRefFunc != NULL) { 10869 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 10870 } 10871} 10872 10873 10874/** 10875 * xmlSetEntityReferenceFunc: 10876 * @func: A valid function 10877 * 10878 * Set the function to call call back when a xml reference has been made 10879 */ 10880void 10881xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 10882{ 10883 xmlEntityRefFunc = func; 10884} 10885 10886/************************************************************************ 10887 * * 10888 * Miscellaneous * 10889 * * 10890 ************************************************************************/ 10891 10892#ifdef LIBXML_XPATH_ENABLED 10893#include <libxml/xpath.h> 10894#endif 10895 10896extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 10897static int xmlParserInitialized = 0; 10898 10899/** 10900 * xmlInitParser: 10901 * 10902 * Initialization function for the XML parser. 10903 * This is not reentrant. Call once before processing in case of 10904 * use in multithreaded programs. 10905 */ 10906 10907void 10908xmlInitParser(void) { 10909 if (xmlParserInitialized != 0) 10910 return; 10911 10912 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 10913 (xmlGenericError == NULL)) 10914 initGenericErrorDefaultFunc(NULL); 10915 xmlInitThreads(); 10916 xmlInitMemory(); 10917 xmlInitCharEncodingHandlers(); 10918 xmlInitializePredefinedEntities(); 10919 xmlDefaultSAXHandlerInit(); 10920 xmlRegisterDefaultInputCallbacks(); 10921 xmlRegisterDefaultOutputCallbacks(); 10922#ifdef LIBXML_HTML_ENABLED 10923 htmlInitAutoClose(); 10924 htmlDefaultSAXHandlerInit(); 10925#endif 10926#ifdef LIBXML_XPATH_ENABLED 10927 xmlXPathInit(); 10928#endif 10929 xmlParserInitialized = 1; 10930} 10931 10932/** 10933 * xmlCleanupParser: 10934 * 10935 * Cleanup function for the XML parser. It tries to reclaim all 10936 * parsing related global memory allocated for the parser processing. 10937 * It doesn't deallocate any document related memory. Calling this 10938 * function should not prevent reusing the parser. 10939 * One should call xmlCleanupParser() only when the process has 10940 * finished using the library or XML document built with it. 10941 */ 10942 10943void 10944xmlCleanupParser(void) { 10945 xmlCleanupCharEncodingHandlers(); 10946 xmlCleanupPredefinedEntities(); 10947#ifdef LIBXML_CATALOG_ENABLED 10948 xmlCatalogCleanup(); 10949#endif 10950 xmlCleanupThreads(); 10951 xmlParserInitialized = 0; 10952} 10953