parser.c revision 8606bbbc0a04293afd7541033d6a83c4943a6f02
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <libxml/xmlmemory.h> 45#include <libxml/threads.h> 46#include <libxml/globals.h> 47#include <libxml/tree.h> 48#include <libxml/parser.h> 49#include <libxml/parserInternals.h> 50#include <libxml/valid.h> 51#include <libxml/entities.h> 52#include <libxml/xmlerror.h> 53#include <libxml/encoding.h> 54#include <libxml/xmlIO.h> 55#include <libxml/uri.h> 56#ifdef LIBXML_CATALOG_ENABLED 57#include <libxml/catalog.h> 58#endif 59 60#ifdef HAVE_CTYPE_H 61#include <ctype.h> 62#endif 63#ifdef HAVE_STDLIB_H 64#include <stdlib.h> 65#endif 66#ifdef HAVE_SYS_STAT_H 67#include <sys/stat.h> 68#endif 69#ifdef HAVE_FCNTL_H 70#include <fcntl.h> 71#endif 72#ifdef HAVE_UNISTD_H 73#include <unistd.h> 74#endif 75#ifdef HAVE_ZLIB_H 76#include <zlib.h> 77#endif 78 79 80#define XML_PARSER_BIG_BUFFER_SIZE 300 81#define XML_PARSER_BUFFER_SIZE 100 82 83#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 84 85/* 86 * List of XML prefixed PI allowed by W3C specs 87 */ 88 89static const char *xmlW3CPIs[] = { 90 "xml-stylesheet", 91 NULL 92}; 93 94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 95xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 96 const xmlChar **str); 97 98static int 99xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 100 xmlSAXHandlerPtr sax, 101 void *user_data, int depth, const xmlChar *URL, 102 const xmlChar *ID, xmlNodePtr *list); 103 104static void 105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 106 xmlNodePtr lastNode); 107 108/************************************************************************ 109 * * 110 * Parser stacks related functions and macros * 111 * * 112 ************************************************************************/ 113 114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 115 const xmlChar ** str); 116 117/* 118 * Generic function for accessing stacks in the Parser Context 119 */ 120 121#define PUSH_AND_POP(scope, type, name) \ 122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 123 if (ctxt->name##Nr >= ctxt->name##Max) { \ 124 ctxt->name##Max *= 2; \ 125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 127 if (ctxt->name##Tab == NULL) { \ 128 xmlGenericError(xmlGenericErrorContext, \ 129 "realloc failed !\n"); \ 130 return(0); \ 131 } \ 132 } \ 133 ctxt->name##Tab[ctxt->name##Nr] = value; \ 134 ctxt->name = value; \ 135 return(ctxt->name##Nr++); \ 136} \ 137scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 138 type ret; \ 139 if (ctxt->name##Nr <= 0) return(0); \ 140 ctxt->name##Nr--; \ 141 if (ctxt->name##Nr > 0) \ 142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 143 else \ 144 ctxt->name = NULL; \ 145 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 146 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 147 return(ret); \ 148} \ 149 150/** 151 * inputPop: 152 * @ctxt: an XML parser context 153 * 154 * Pops the top parser input from the input stack 155 * 156 * Returns the input just removed 157 */ 158/** 159 * inputPush: 160 * @ctxt: an XML parser context 161 * @value: the parser input 162 * 163 * Pushes a new parser input on top of the input stack 164 * 165 * Returns 0 in case of error, the index in the stack otherwise 166 */ 167/** 168 * namePop: 169 * @ctxt: an XML parser context 170 * 171 * Pops the top element name from the name stack 172 * 173 * Returns the name just removed 174 */ 175/** 176 * namePush: 177 * @ctxt: an XML parser context 178 * @value: the element name 179 * 180 * Pushes a new element name on top of the name stack 181 * 182 * Returns 0 in case of error, the index in the stack otherwise 183 */ 184/** 185 * nodePop: 186 * @ctxt: an XML parser context 187 * 188 * Pops the top element node from the node stack 189 * 190 * Returns the node just removed 191 */ 192/** 193 * nodePush: 194 * @ctxt: an XML parser context 195 * @value: the element node 196 * 197 * Pushes a new element node on top of the node stack 198 * 199 * Returns 0 in case of error, the index in the stack otherwise 200 */ 201/* 202 * Those macros actually generate the functions 203 */ 204PUSH_AND_POP(extern, xmlParserInputPtr, input) 205PUSH_AND_POP(extern, xmlNodePtr, node) 206PUSH_AND_POP(extern, xmlChar*, name) 207 208static int spacePush(xmlParserCtxtPtr ctxt, int val) { 209 if (ctxt->spaceNr >= ctxt->spaceMax) { 210 ctxt->spaceMax *= 2; 211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 213 if (ctxt->spaceTab == NULL) { 214 xmlGenericError(xmlGenericErrorContext, 215 "realloc failed !\n"); 216 return(0); 217 } 218 } 219 ctxt->spaceTab[ctxt->spaceNr] = val; 220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 221 return(ctxt->spaceNr++); 222} 223 224static int spacePop(xmlParserCtxtPtr ctxt) { 225 int ret; 226 if (ctxt->spaceNr <= 0) return(0); 227 ctxt->spaceNr--; 228 if (ctxt->spaceNr > 0) 229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 230 else 231 ctxt->space = NULL; 232 ret = ctxt->spaceTab[ctxt->spaceNr]; 233 ctxt->spaceTab[ctxt->spaceNr] = -1; 234 return(ret); 235} 236 237/* 238 * Macros for accessing the content. Those should be used only by the parser, 239 * and not exported. 240 * 241 * Dirty macros, i.e. one often need to make assumption on the context to 242 * use them 243 * 244 * CUR_PTR return the current pointer to the xmlChar to be parsed. 245 * To be used with extreme caution since operations consuming 246 * characters may move the input buffer to a different location ! 247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 248 * This should be used internally by the parser 249 * only to compare to ASCII values otherwise it would break when 250 * running with UTF-8 encoding. 251 * RAW same as CUR but in the input buffer, bypass any token 252 * extraction that may have been done 253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 254 * to compare on ASCII based substring. 255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 256 * strings within the parser. 257 * 258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 259 * 260 * NEXT Skip to the next character, this does the proper decoding 261 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 262 * NEXTL(l) Skip l xmlChar in the input buffer 263 * CUR_CHAR(l) returns the current unicode character (int), set l 264 * to the number of xmlChars used for the encoding [0-5]. 265 * CUR_SCHAR same but operate on a string instead of the context 266 * COPY_BUF copy the current unicode char to the target buffer, increment 267 * the index 268 * GROW, SHRINK handling of input buffers 269 */ 270 271#define RAW (*ctxt->input->cur) 272#define CUR (*ctxt->input->cur) 273#define NXT(val) ctxt->input->cur[(val)] 274#define CUR_PTR ctxt->input->cur 275 276#define SKIP(val) do { \ 277 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 279 if ((*ctxt->input->cur == 0) && \ 280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 281 xmlPopInput(ctxt); \ 282 } while (0) 283 284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ 285 xmlSHRINK (ctxt); 286 287static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 288 xmlParserInputShrink(ctxt->input); 289 if ((*ctxt->input->cur == 0) && 290 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 291 xmlPopInput(ctxt); 292 } 293 294#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ 295 xmlGROW (ctxt); 296 297static void xmlGROW (xmlParserCtxtPtr ctxt) { 298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 299 if ((*ctxt->input->cur == 0) && 300 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 301 xmlPopInput(ctxt); 302 } 303 304#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 305 306#define NEXT xmlNextChar(ctxt) 307 308#define NEXT1 { \ 309 ctxt->input->cur++; \ 310 ctxt->nbChars++; \ 311 if (*ctxt->input->cur == 0) \ 312 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 313 } 314 315#define NEXTL(l) do { \ 316 if (*(ctxt->input->cur) == '\n') { \ 317 ctxt->input->line++; ctxt->input->col = 1; \ 318 } else ctxt->input->col++; \ 319 ctxt->input->cur += l; \ 320 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 321 } while (0) 322 323#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 324#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 325 326#define COPY_BUF(l,b,i,v) \ 327 if (l == 1) b[i++] = (xmlChar) v; \ 328 else i += xmlCopyCharMultiByte(&b[i],v) 329 330/** 331 * xmlSkipBlankChars: 332 * @ctxt: the XML parser context 333 * 334 * skip all blanks character found at that point in the input streams. 335 * It pops up finished entities in the process if allowable at that point. 336 * 337 * Returns the number of space chars skipped 338 */ 339 340int 341xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 342 int res = 0; 343 344 /* 345 * It's Okay to use CUR/NEXT here since all the blanks are on 346 * the ASCII range. 347 */ 348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 349 const xmlChar *cur; 350 /* 351 * if we are in the document content, go really fast 352 */ 353 cur = ctxt->input->cur; 354 while (IS_BLANK(*cur)) { 355 if (*cur == '\n') { 356 ctxt->input->line++; ctxt->input->col = 1; 357 } 358 cur++; 359 res++; 360 if (*cur == 0) { 361 ctxt->input->cur = cur; 362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 363 cur = ctxt->input->cur; 364 } 365 } 366 ctxt->input->cur = cur; 367 } else { 368 int cur; 369 do { 370 cur = CUR; 371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 372 NEXT; 373 cur = CUR; 374 res++; 375 } 376 while ((cur == 0) && (ctxt->inputNr > 1) && 377 (ctxt->instate != XML_PARSER_COMMENT)) { 378 xmlPopInput(ctxt); 379 cur = CUR; 380 } 381 /* 382 * Need to handle support of entities branching here 383 */ 384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 386 } 387 return(res); 388} 389 390/************************************************************************ 391 * * 392 * Commodity functions to handle entities * 393 * * 394 ************************************************************************/ 395 396/** 397 * xmlPopInput: 398 * @ctxt: an XML parser context 399 * 400 * xmlPopInput: the current input pointed by ctxt->input came to an end 401 * pop it and return the next char. 402 * 403 * Returns the current xmlChar in the parser context 404 */ 405xmlChar 406xmlPopInput(xmlParserCtxtPtr ctxt) { 407 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 408 if (xmlParserDebugEntities) 409 xmlGenericError(xmlGenericErrorContext, 410 "Popping input %d\n", ctxt->inputNr); 411 xmlFreeInputStream(inputPop(ctxt)); 412 if ((*ctxt->input->cur == 0) && 413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 414 return(xmlPopInput(ctxt)); 415 return(CUR); 416} 417 418/** 419 * xmlPushInput: 420 * @ctxt: an XML parser context 421 * @input: an XML parser input fragment (entity, XML fragment ...). 422 * 423 * xmlPushInput: switch to a new input stream which is stacked on top 424 * of the previous one(s). 425 */ 426void 427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 428 if (input == NULL) return; 429 430 if (xmlParserDebugEntities) { 431 if ((ctxt->input != NULL) && (ctxt->input->filename)) 432 xmlGenericError(xmlGenericErrorContext, 433 "%s(%d): ", ctxt->input->filename, 434 ctxt->input->line); 435 xmlGenericError(xmlGenericErrorContext, 436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 437 } 438 inputPush(ctxt, input); 439 GROW; 440} 441 442/** 443 * xmlParseCharRef: 444 * @ctxt: an XML parser context 445 * 446 * parse Reference declarations 447 * 448 * [66] CharRef ::= '&#' [0-9]+ ';' | 449 * '&#x' [0-9a-fA-F]+ ';' 450 * 451 * [ WFC: Legal Character ] 452 * Characters referred to using character references must match the 453 * production for Char. 454 * 455 * Returns the value parsed (as an int), 0 in case of error 456 */ 457int 458xmlParseCharRef(xmlParserCtxtPtr ctxt) { 459 unsigned int val = 0; 460 int count = 0; 461 462 /* 463 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 464 */ 465 if ((RAW == '&') && (NXT(1) == '#') && 466 (NXT(2) == 'x')) { 467 SKIP(3); 468 GROW; 469 while (RAW != ';') { /* loop blocked by count */ 470 if (count++ > 20) { 471 count = 0; 472 GROW; 473 } 474 if ((RAW >= '0') && (RAW <= '9')) 475 val = val * 16 + (CUR - '0'); 476 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 477 val = val * 16 + (CUR - 'a') + 10; 478 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 479 val = val * 16 + (CUR - 'A') + 10; 480 else { 481 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 483 ctxt->sax->error(ctxt->userData, 484 "xmlParseCharRef: invalid hexadecimal value\n"); 485 ctxt->wellFormed = 0; 486 ctxt->disableSAX = 1; 487 val = 0; 488 break; 489 } 490 NEXT; 491 count++; 492 } 493 if (RAW == ';') { 494 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 495 ctxt->nbChars ++; 496 ctxt->input->cur++; 497 } 498 } else if ((RAW == '&') && (NXT(1) == '#')) { 499 SKIP(2); 500 GROW; 501 while (RAW != ';') { /* loop blocked by count */ 502 if (count++ > 20) { 503 count = 0; 504 GROW; 505 } 506 if ((RAW >= '0') && (RAW <= '9')) 507 val = val * 10 + (CUR - '0'); 508 else { 509 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 511 ctxt->sax->error(ctxt->userData, 512 "xmlParseCharRef: invalid decimal value\n"); 513 ctxt->wellFormed = 0; 514 ctxt->disableSAX = 1; 515 val = 0; 516 break; 517 } 518 NEXT; 519 count++; 520 } 521 if (RAW == ';') { 522 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 523 ctxt->nbChars ++; 524 ctxt->input->cur++; 525 } 526 } else { 527 ctxt->errNo = XML_ERR_INVALID_CHARREF; 528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 529 ctxt->sax->error(ctxt->userData, 530 "xmlParseCharRef: invalid value\n"); 531 ctxt->wellFormed = 0; 532 ctxt->disableSAX = 1; 533 } 534 535 /* 536 * [ WFC: Legal Character ] 537 * Characters referred to using character references must match the 538 * production for Char. 539 */ 540 if (IS_CHAR(val)) { 541 return(val); 542 } else { 543 ctxt->errNo = XML_ERR_INVALID_CHAR; 544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 545 ctxt->sax->error(ctxt->userData, 546 "xmlParseCharRef: invalid xmlChar value %d\n", 547 val); 548 ctxt->wellFormed = 0; 549 ctxt->disableSAX = 1; 550 } 551 return(0); 552} 553 554/** 555 * xmlParseStringCharRef: 556 * @ctxt: an XML parser context 557 * @str: a pointer to an index in the string 558 * 559 * parse Reference declarations, variant parsing from a string rather 560 * than an an input flow. 561 * 562 * [66] CharRef ::= '&#' [0-9]+ ';' | 563 * '&#x' [0-9a-fA-F]+ ';' 564 * 565 * [ WFC: Legal Character ] 566 * Characters referred to using character references must match the 567 * production for Char. 568 * 569 * Returns the value parsed (as an int), 0 in case of error, str will be 570 * updated to the current value of the index 571 */ 572static int 573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 574 const xmlChar *ptr; 575 xmlChar cur; 576 int val = 0; 577 578 if ((str == NULL) || (*str == NULL)) return(0); 579 ptr = *str; 580 cur = *ptr; 581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 582 ptr += 3; 583 cur = *ptr; 584 while (cur != ';') { /* Non input consuming loop */ 585 if ((cur >= '0') && (cur <= '9')) 586 val = val * 16 + (cur - '0'); 587 else if ((cur >= 'a') && (cur <= 'f')) 588 val = val * 16 + (cur - 'a') + 10; 589 else if ((cur >= 'A') && (cur <= 'F')) 590 val = val * 16 + (cur - 'A') + 10; 591 else { 592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 594 ctxt->sax->error(ctxt->userData, 595 "xmlParseStringCharRef: invalid hexadecimal value\n"); 596 ctxt->wellFormed = 0; 597 ctxt->disableSAX = 1; 598 val = 0; 599 break; 600 } 601 ptr++; 602 cur = *ptr; 603 } 604 if (cur == ';') 605 ptr++; 606 } else if ((cur == '&') && (ptr[1] == '#')){ 607 ptr += 2; 608 cur = *ptr; 609 while (cur != ';') { /* Non input consuming loops */ 610 if ((cur >= '0') && (cur <= '9')) 611 val = val * 10 + (cur - '0'); 612 else { 613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 615 ctxt->sax->error(ctxt->userData, 616 "xmlParseStringCharRef: invalid decimal value\n"); 617 ctxt->wellFormed = 0; 618 ctxt->disableSAX = 1; 619 val = 0; 620 break; 621 } 622 ptr++; 623 cur = *ptr; 624 } 625 if (cur == ';') 626 ptr++; 627 } else { 628 ctxt->errNo = XML_ERR_INVALID_CHARREF; 629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 630 ctxt->sax->error(ctxt->userData, 631 "xmlParseStringCharRef: invalid value\n"); 632 ctxt->wellFormed = 0; 633 ctxt->disableSAX = 1; 634 return(0); 635 } 636 *str = ptr; 637 638 /* 639 * [ WFC: Legal Character ] 640 * Characters referred to using character references must match the 641 * production for Char. 642 */ 643 if (IS_CHAR(val)) { 644 return(val); 645 } else { 646 ctxt->errNo = XML_ERR_INVALID_CHAR; 647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 648 ctxt->sax->error(ctxt->userData, 649 "xmlParseStringCharRef: invalid xmlChar value %d\n", val); 650 ctxt->wellFormed = 0; 651 ctxt->disableSAX = 1; 652 } 653 return(0); 654} 655 656/** 657 * xmlNewBlanksWrapperInputStream: 658 * @ctxt: an XML parser context 659 * @entity: an Entity pointer 660 * 661 * Create a new input stream for wrapping 662 * blanks around a PEReference 663 * 664 * Returns the new input stream or NULL 665 */ 666 667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 668 669static xmlParserInputPtr 670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 671 xmlParserInputPtr input; 672 xmlChar *buffer; 673 size_t length; 674 if (entity == NULL) { 675 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 677 ctxt->sax->error(ctxt->userData, 678 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n"); 679 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 680 return(NULL); 681 } 682 if (xmlParserDebugEntities) 683 xmlGenericError(xmlGenericErrorContext, 684 "new blanks wrapper for entity: %s\n", entity->name); 685 input = xmlNewInputStream(ctxt); 686 if (input == NULL) { 687 return(NULL); 688 } 689 length = xmlStrlen(entity->name) + 5; 690 buffer = xmlMalloc(length); 691 if (buffer == NULL) { 692 return(NULL); 693 } 694 buffer [0] = ' '; 695 buffer [1] = '%'; 696 buffer [length-3] = ';'; 697 buffer [length-2] = ' '; 698 buffer [length-1] = 0; 699 memcpy(buffer + 2, entity->name, length - 5); 700 input->free = deallocblankswrapper; 701 input->base = buffer; 702 input->cur = buffer; 703 input->length = length; 704 input->end = &buffer[length]; 705 return(input); 706} 707 708/** 709 * xmlParserHandlePEReference: 710 * @ctxt: the parser context 711 * 712 * [69] PEReference ::= '%' Name ';' 713 * 714 * [ WFC: No Recursion ] 715 * A parsed entity must not contain a recursive 716 * reference to itself, either directly or indirectly. 717 * 718 * [ WFC: Entity Declared ] 719 * In a document without any DTD, a document with only an internal DTD 720 * subset which contains no parameter entity references, or a document 721 * with "standalone='yes'", ... ... The declaration of a parameter 722 * entity must precede any reference to it... 723 * 724 * [ VC: Entity Declared ] 725 * In a document with an external subset or external parameter entities 726 * with "standalone='no'", ... ... The declaration of a parameter entity 727 * must precede any reference to it... 728 * 729 * [ WFC: In DTD ] 730 * Parameter-entity references may only appear in the DTD. 731 * NOTE: misleading but this is handled. 732 * 733 * A PEReference may have been detected in the current input stream 734 * the handling is done accordingly to 735 * http://www.w3.org/TR/REC-xml#entproc 736 * i.e. 737 * - Included in literal in entity values 738 * - Included as Parameter Entity reference within DTDs 739 */ 740void 741xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 742 xmlChar *name; 743 xmlEntityPtr entity = NULL; 744 xmlParserInputPtr input; 745 746 if (RAW != '%') return; 747 switch(ctxt->instate) { 748 case XML_PARSER_CDATA_SECTION: 749 return; 750 case XML_PARSER_COMMENT: 751 return; 752 case XML_PARSER_START_TAG: 753 return; 754 case XML_PARSER_END_TAG: 755 return; 756 case XML_PARSER_EOF: 757 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 759 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 760 ctxt->wellFormed = 0; 761 ctxt->disableSAX = 1; 762 return; 763 case XML_PARSER_PROLOG: 764 case XML_PARSER_START: 765 case XML_PARSER_MISC: 766 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 768 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 769 ctxt->wellFormed = 0; 770 ctxt->disableSAX = 1; 771 return; 772 case XML_PARSER_ENTITY_DECL: 773 case XML_PARSER_CONTENT: 774 case XML_PARSER_ATTRIBUTE_VALUE: 775 case XML_PARSER_PI: 776 case XML_PARSER_SYSTEM_LITERAL: 777 case XML_PARSER_PUBLIC_LITERAL: 778 /* we just ignore it there */ 779 return; 780 case XML_PARSER_EPILOG: 781 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 783 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 784 ctxt->wellFormed = 0; 785 ctxt->disableSAX = 1; 786 return; 787 case XML_PARSER_ENTITY_VALUE: 788 /* 789 * NOTE: in the case of entity values, we don't do the 790 * substitution here since we need the literal 791 * entity value to be able to save the internal 792 * subset of the document. 793 * This will be handled by xmlStringDecodeEntities 794 */ 795 return; 796 case XML_PARSER_DTD: 797 /* 798 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 799 * In the internal DTD subset, parameter-entity references 800 * can occur only where markup declarations can occur, not 801 * within markup declarations. 802 * In that case this is handled in xmlParseMarkupDecl 803 */ 804 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 805 return; 806 if (IS_BLANK(NXT(1)) || NXT(1) == 0) 807 return; 808 break; 809 case XML_PARSER_IGNORE: 810 return; 811 } 812 813 NEXT; 814 name = xmlParseName(ctxt); 815 if (xmlParserDebugEntities) 816 xmlGenericError(xmlGenericErrorContext, 817 "PEReference: %s\n", name); 818 if (name == NULL) { 819 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 821 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n"); 822 ctxt->wellFormed = 0; 823 ctxt->disableSAX = 1; 824 } else { 825 if (RAW == ';') { 826 NEXT; 827 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 828 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 829 if (entity == NULL) { 830 831 /* 832 * [ WFC: Entity Declared ] 833 * In a document without any DTD, a document with only an 834 * internal DTD subset which contains no parameter entity 835 * references, or a document with "standalone='yes'", ... 836 * ... The declaration of a parameter entity must precede 837 * any reference to it... 838 */ 839 if ((ctxt->standalone == 1) || 840 ((ctxt->hasExternalSubset == 0) && 841 (ctxt->hasPErefs == 0))) { 842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 843 ctxt->sax->error(ctxt->userData, 844 "PEReference: %%%s; not found\n", name); 845 ctxt->wellFormed = 0; 846 ctxt->disableSAX = 1; 847 } else { 848 /* 849 * [ VC: Entity Declared ] 850 * In a document with an external subset or external 851 * parameter entities with "standalone='no'", ... 852 * ... The declaration of a parameter entity must precede 853 * any reference to it... 854 */ 855 if ((!ctxt->disableSAX) && 856 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 857 ctxt->vctxt.error(ctxt->vctxt.userData, 858 "PEReference: %%%s; not found\n", name); 859 } else if ((!ctxt->disableSAX) && 860 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 861 ctxt->sax->warning(ctxt->userData, 862 "PEReference: %%%s; not found\n", name); 863 ctxt->valid = 0; 864 } 865 } else if (ctxt->input->free != deallocblankswrapper) { 866 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 867 xmlPushInput(ctxt, input); 868 } else { 869 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 870 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 871 xmlChar start[4]; 872 xmlCharEncoding enc; 873 874 /* 875 * handle the extra spaces added before and after 876 * c.f. http://www.w3.org/TR/REC-xml#as-PE 877 * this is done independently. 878 */ 879 input = xmlNewEntityInputStream(ctxt, entity); 880 xmlPushInput(ctxt, input); 881 882 /* 883 * Get the 4 first bytes and decode the charset 884 * if enc != XML_CHAR_ENCODING_NONE 885 * plug some encoding conversion routines. 886 */ 887 GROW 888 if (entity->length >= 4) { 889 start[0] = RAW; 890 start[1] = NXT(1); 891 start[2] = NXT(2); 892 start[3] = NXT(3); 893 enc = xmlDetectCharEncoding(start, 4); 894 if (enc != XML_CHAR_ENCODING_NONE) { 895 xmlSwitchEncoding(ctxt, enc); 896 } 897 } 898 899 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 900 (RAW == '<') && (NXT(1) == '?') && 901 (NXT(2) == 'x') && (NXT(3) == 'm') && 902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 903 xmlParseTextDecl(ctxt); 904 } 905 } else { 906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 907 ctxt->sax->error(ctxt->userData, 908 "xmlParserHandlePEReference: %s is not a parameter entity\n", 909 name); 910 ctxt->wellFormed = 0; 911 ctxt->disableSAX = 1; 912 } 913 } 914 } else { 915 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 917 ctxt->sax->error(ctxt->userData, 918 "xmlParserHandlePEReference: expecting ';'\n"); 919 ctxt->wellFormed = 0; 920 ctxt->disableSAX = 1; 921 } 922 xmlFree(name); 923 } 924} 925 926/* 927 * Macro used to grow the current buffer. 928 */ 929#define growBuffer(buffer) { \ 930 buffer##_size *= 2; \ 931 buffer = (xmlChar *) \ 932 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 933 if (buffer == NULL) { \ 934 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ 935 return(NULL); \ 936 } \ 937} 938 939/** 940 * xmlStringDecodeEntities: 941 * @ctxt: the parser context 942 * @str: the input string 943 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 944 * @end: an end marker xmlChar, 0 if none 945 * @end2: an end marker xmlChar, 0 if none 946 * @end3: an end marker xmlChar, 0 if none 947 * 948 * Takes a entity string content and process to do the adequate substitutions. 949 * 950 * [67] Reference ::= EntityRef | CharRef 951 * 952 * [69] PEReference ::= '%' Name ';' 953 * 954 * Returns A newly allocated string with the substitution done. The caller 955 * must deallocate it ! 956 */ 957xmlChar * 958xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 959 xmlChar end, xmlChar end2, xmlChar end3) { 960 xmlChar *buffer = NULL; 961 int buffer_size = 0; 962 963 xmlChar *current = NULL; 964 xmlEntityPtr ent; 965 int c,l; 966 int nbchars = 0; 967 968 if (str == NULL) 969 return(NULL); 970 971 if (ctxt->depth > 40) { 972 ctxt->errNo = XML_ERR_ENTITY_LOOP; 973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 974 ctxt->sax->error(ctxt->userData, 975 "Detected entity reference loop\n"); 976 ctxt->wellFormed = 0; 977 ctxt->disableSAX = 1; 978 return(NULL); 979 } 980 981 /* 982 * allocate a translation buffer. 983 */ 984 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 985 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 986 if (buffer == NULL) { 987 xmlGenericError(xmlGenericErrorContext, 988 "xmlStringDecodeEntities: malloc failed"); 989 return(NULL); 990 } 991 992 /* 993 * OK loop until we reach one of the ending char or a size limit. 994 * we are operating on already parsed values. 995 */ 996 c = CUR_SCHAR(str, l); 997 while ((c != 0) && (c != end) && /* non input consuming loop */ 998 (c != end2) && (c != end3)) { 999 1000 if (c == 0) break; 1001 if ((c == '&') && (str[1] == '#')) { 1002 int val = xmlParseStringCharRef(ctxt, &str); 1003 if (val != 0) { 1004 COPY_BUF(0,buffer,nbchars,val); 1005 } 1006 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1007 if (xmlParserDebugEntities) 1008 xmlGenericError(xmlGenericErrorContext, 1009 "String decoding Entity Reference: %.30s\n", 1010 str); 1011 ent = xmlParseStringEntityRef(ctxt, &str); 1012 if ((ent != NULL) && 1013 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1014 if (ent->content != NULL) { 1015 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1016 } else { 1017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1018 ctxt->sax->error(ctxt->userData, 1019 "internal error entity has no content\n"); 1020 } 1021 } else if ((ent != NULL) && (ent->content != NULL)) { 1022 xmlChar *rep; 1023 1024 ctxt->depth++; 1025 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1026 0, 0, 0); 1027 ctxt->depth--; 1028 if (rep != NULL) { 1029 current = rep; 1030 while (*current != 0) { /* non input consuming loop */ 1031 buffer[nbchars++] = *current++; 1032 if (nbchars > 1033 buffer_size - XML_PARSER_BUFFER_SIZE) { 1034 growBuffer(buffer); 1035 } 1036 } 1037 xmlFree(rep); 1038 } 1039 } else if (ent != NULL) { 1040 int i = xmlStrlen(ent->name); 1041 const xmlChar *cur = ent->name; 1042 1043 buffer[nbchars++] = '&'; 1044 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1045 growBuffer(buffer); 1046 } 1047 for (;i > 0;i--) 1048 buffer[nbchars++] = *cur++; 1049 buffer[nbchars++] = ';'; 1050 } 1051 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1052 if (xmlParserDebugEntities) 1053 xmlGenericError(xmlGenericErrorContext, 1054 "String decoding PE Reference: %.30s\n", str); 1055 ent = xmlParseStringPEReference(ctxt, &str); 1056 if (ent != NULL) { 1057 xmlChar *rep; 1058 1059 ctxt->depth++; 1060 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1061 0, 0, 0); 1062 ctxt->depth--; 1063 if (rep != NULL) { 1064 current = rep; 1065 while (*current != 0) { /* non input consuming loop */ 1066 buffer[nbchars++] = *current++; 1067 if (nbchars > 1068 buffer_size - XML_PARSER_BUFFER_SIZE) { 1069 growBuffer(buffer); 1070 } 1071 } 1072 xmlFree(rep); 1073 } 1074 } 1075 } else { 1076 COPY_BUF(l,buffer,nbchars,c); 1077 str += l; 1078 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1079 growBuffer(buffer); 1080 } 1081 } 1082 c = CUR_SCHAR(str, l); 1083 } 1084 buffer[nbchars++] = 0; 1085 return(buffer); 1086} 1087 1088 1089/************************************************************************ 1090 * * 1091 * Commodity functions to handle xmlChars * 1092 * * 1093 ************************************************************************/ 1094 1095/** 1096 * xmlStrndup: 1097 * @cur: the input xmlChar * 1098 * @len: the len of @cur 1099 * 1100 * a strndup for array of xmlChar's 1101 * 1102 * Returns a new xmlChar * or NULL 1103 */ 1104xmlChar * 1105xmlStrndup(const xmlChar *cur, int len) { 1106 xmlChar *ret; 1107 1108 if ((cur == NULL) || (len < 0)) return(NULL); 1109 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1110 if (ret == NULL) { 1111 xmlGenericError(xmlGenericErrorContext, 1112 "malloc of %ld byte failed\n", 1113 (len + 1) * (long)sizeof(xmlChar)); 1114 return(NULL); 1115 } 1116 memcpy(ret, cur, len * sizeof(xmlChar)); 1117 ret[len] = 0; 1118 return(ret); 1119} 1120 1121/** 1122 * xmlStrdup: 1123 * @cur: the input xmlChar * 1124 * 1125 * a strdup for array of xmlChar's. Since they are supposed to be 1126 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1127 * a termination mark of '0'. 1128 * 1129 * Returns a new xmlChar * or NULL 1130 */ 1131xmlChar * 1132xmlStrdup(const xmlChar *cur) { 1133 const xmlChar *p = cur; 1134 1135 if (cur == NULL) return(NULL); 1136 while (*p != 0) p++; /* non input consuming */ 1137 return(xmlStrndup(cur, p - cur)); 1138} 1139 1140/** 1141 * xmlCharStrndup: 1142 * @cur: the input char * 1143 * @len: the len of @cur 1144 * 1145 * a strndup for char's to xmlChar's 1146 * 1147 * Returns a new xmlChar * or NULL 1148 */ 1149 1150xmlChar * 1151xmlCharStrndup(const char *cur, int len) { 1152 int i; 1153 xmlChar *ret; 1154 1155 if ((cur == NULL) || (len < 0)) return(NULL); 1156 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1157 if (ret == NULL) { 1158 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1159 (len + 1) * (long)sizeof(xmlChar)); 1160 return(NULL); 1161 } 1162 for (i = 0;i < len;i++) 1163 ret[i] = (xmlChar) cur[i]; 1164 ret[len] = 0; 1165 return(ret); 1166} 1167 1168/** 1169 * xmlCharStrdup: 1170 * @cur: the input char * 1171 * @len: the len of @cur 1172 * 1173 * a strdup for char's to xmlChar's 1174 * 1175 * Returns a new xmlChar * or NULL 1176 */ 1177 1178xmlChar * 1179xmlCharStrdup(const char *cur) { 1180 const char *p = cur; 1181 1182 if (cur == NULL) return(NULL); 1183 while (*p != '\0') p++; /* non input consuming */ 1184 return(xmlCharStrndup(cur, p - cur)); 1185} 1186 1187/** 1188 * xmlStrcmp: 1189 * @str1: the first xmlChar * 1190 * @str2: the second xmlChar * 1191 * 1192 * a strcmp for xmlChar's 1193 * 1194 * Returns the integer result of the comparison 1195 */ 1196 1197int 1198xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1199 register int tmp; 1200 1201 if (str1 == str2) return(0); 1202 if (str1 == NULL) return(-1); 1203 if (str2 == NULL) return(1); 1204 do { 1205 tmp = *str1++ - *str2; 1206 if (tmp != 0) return(tmp); 1207 } while (*str2++ != 0); 1208 return 0; 1209} 1210 1211/** 1212 * xmlStrEqual: 1213 * @str1: the first xmlChar * 1214 * @str2: the second xmlChar * 1215 * 1216 * Check if both string are equal of have same content 1217 * Should be a bit more readable and faster than xmlStrEqual() 1218 * 1219 * Returns 1 if they are equal, 0 if they are different 1220 */ 1221 1222int 1223xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1224 if (str1 == str2) return(1); 1225 if (str1 == NULL) return(0); 1226 if (str2 == NULL) return(0); 1227 do { 1228 if (*str1++ != *str2) return(0); 1229 } while (*str2++); 1230 return(1); 1231} 1232 1233/** 1234 * xmlStrncmp: 1235 * @str1: the first xmlChar * 1236 * @str2: the second xmlChar * 1237 * @len: the max comparison length 1238 * 1239 * a strncmp for xmlChar's 1240 * 1241 * Returns the integer result of the comparison 1242 */ 1243 1244int 1245xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1246 register int tmp; 1247 1248 if (len <= 0) return(0); 1249 if (str1 == str2) return(0); 1250 if (str1 == NULL) return(-1); 1251 if (str2 == NULL) return(1); 1252 do { 1253 tmp = *str1++ - *str2; 1254 if (tmp != 0 || --len == 0) return(tmp); 1255 } while (*str2++ != 0); 1256 return 0; 1257} 1258 1259static const xmlChar casemap[256] = { 1260 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1261 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1262 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1263 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1264 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1265 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1266 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1267 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1268 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1269 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1270 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1271 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1272 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1273 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1274 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1275 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1276 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1277 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1278 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1279 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1280 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1281 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1282 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1283 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1284 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1285 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1286 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1287 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1288 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1289 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1290 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1291 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1292}; 1293 1294/** 1295 * xmlStrcasecmp: 1296 * @str1: the first xmlChar * 1297 * @str2: the second xmlChar * 1298 * 1299 * a strcasecmp for xmlChar's 1300 * 1301 * Returns the integer result of the comparison 1302 */ 1303 1304int 1305xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1306 register int tmp; 1307 1308 if (str1 == str2) return(0); 1309 if (str1 == NULL) return(-1); 1310 if (str2 == NULL) return(1); 1311 do { 1312 tmp = casemap[*str1++] - casemap[*str2]; 1313 if (tmp != 0) return(tmp); 1314 } while (*str2++ != 0); 1315 return 0; 1316} 1317 1318/** 1319 * xmlStrncasecmp: 1320 * @str1: the first xmlChar * 1321 * @str2: the second xmlChar * 1322 * @len: the max comparison length 1323 * 1324 * a strncasecmp for xmlChar's 1325 * 1326 * Returns the integer result of the comparison 1327 */ 1328 1329int 1330xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1331 register int tmp; 1332 1333 if (len <= 0) return(0); 1334 if (str1 == str2) return(0); 1335 if (str1 == NULL) return(-1); 1336 if (str2 == NULL) return(1); 1337 do { 1338 tmp = casemap[*str1++] - casemap[*str2]; 1339 if (tmp != 0 || --len == 0) return(tmp); 1340 } while (*str2++ != 0); 1341 return 0; 1342} 1343 1344/** 1345 * xmlStrchr: 1346 * @str: the xmlChar * array 1347 * @val: the xmlChar to search 1348 * 1349 * a strchr for xmlChar's 1350 * 1351 * Returns the xmlChar * for the first occurrence or NULL. 1352 */ 1353 1354const xmlChar * 1355xmlStrchr(const xmlChar *str, xmlChar val) { 1356 if (str == NULL) return(NULL); 1357 while (*str != 0) { /* non input consuming */ 1358 if (*str == val) return((xmlChar *) str); 1359 str++; 1360 } 1361 return(NULL); 1362} 1363 1364/** 1365 * xmlStrstr: 1366 * @str: the xmlChar * array (haystack) 1367 * @val: the xmlChar to search (needle) 1368 * 1369 * a strstr for xmlChar's 1370 * 1371 * Returns the xmlChar * for the first occurrence or NULL. 1372 */ 1373 1374const xmlChar * 1375xmlStrstr(const xmlChar *str, const xmlChar *val) { 1376 int n; 1377 1378 if (str == NULL) return(NULL); 1379 if (val == NULL) return(NULL); 1380 n = xmlStrlen(val); 1381 1382 if (n == 0) return(str); 1383 while (*str != 0) { /* non input consuming */ 1384 if (*str == *val) { 1385 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1386 } 1387 str++; 1388 } 1389 return(NULL); 1390} 1391 1392/** 1393 * xmlStrcasestr: 1394 * @str: the xmlChar * array (haystack) 1395 * @val: the xmlChar to search (needle) 1396 * 1397 * a case-ignoring strstr for xmlChar's 1398 * 1399 * Returns the xmlChar * for the first occurrence or NULL. 1400 */ 1401 1402const xmlChar * 1403xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1404 int n; 1405 1406 if (str == NULL) return(NULL); 1407 if (val == NULL) return(NULL); 1408 n = xmlStrlen(val); 1409 1410 if (n == 0) return(str); 1411 while (*str != 0) { /* non input consuming */ 1412 if (casemap[*str] == casemap[*val]) 1413 if (!xmlStrncasecmp(str, val, n)) return(str); 1414 str++; 1415 } 1416 return(NULL); 1417} 1418 1419/** 1420 * xmlStrsub: 1421 * @str: the xmlChar * array (haystack) 1422 * @start: the index of the first char (zero based) 1423 * @len: the length of the substring 1424 * 1425 * Extract a substring of a given string 1426 * 1427 * Returns the xmlChar * for the first occurrence or NULL. 1428 */ 1429 1430xmlChar * 1431xmlStrsub(const xmlChar *str, int start, int len) { 1432 int i; 1433 1434 if (str == NULL) return(NULL); 1435 if (start < 0) return(NULL); 1436 if (len < 0) return(NULL); 1437 1438 for (i = 0;i < start;i++) { 1439 if (*str == 0) return(NULL); 1440 str++; 1441 } 1442 if (*str == 0) return(NULL); 1443 return(xmlStrndup(str, len)); 1444} 1445 1446/** 1447 * xmlStrlen: 1448 * @str: the xmlChar * array 1449 * 1450 * length of a xmlChar's string 1451 * 1452 * Returns the number of xmlChar contained in the ARRAY. 1453 */ 1454 1455int 1456xmlStrlen(const xmlChar *str) { 1457 int len = 0; 1458 1459 if (str == NULL) return(0); 1460 while (*str != 0) { /* non input consuming */ 1461 str++; 1462 len++; 1463 } 1464 return(len); 1465} 1466 1467/** 1468 * xmlStrncat: 1469 * @cur: the original xmlChar * array 1470 * @add: the xmlChar * array added 1471 * @len: the length of @add 1472 * 1473 * a strncat for array of xmlChar's, it will extend @cur with the len 1474 * first bytes of @add. 1475 * 1476 * Returns a new xmlChar *, the original @cur is reallocated if needed 1477 * and should not be freed 1478 */ 1479 1480xmlChar * 1481xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1482 int size; 1483 xmlChar *ret; 1484 1485 if ((add == NULL) || (len == 0)) 1486 return(cur); 1487 if (cur == NULL) 1488 return(xmlStrndup(add, len)); 1489 1490 size = xmlStrlen(cur); 1491 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1492 if (ret == NULL) { 1493 xmlGenericError(xmlGenericErrorContext, 1494 "xmlStrncat: realloc of %ld byte failed\n", 1495 (size + len + 1) * (long)sizeof(xmlChar)); 1496 return(cur); 1497 } 1498 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1499 ret[size + len] = 0; 1500 return(ret); 1501} 1502 1503/** 1504 * xmlStrcat: 1505 * @cur: the original xmlChar * array 1506 * @add: the xmlChar * array added 1507 * 1508 * a strcat for array of xmlChar's. Since they are supposed to be 1509 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1510 * a termination mark of '0'. 1511 * 1512 * Returns a new xmlChar * containing the concatenated string. 1513 */ 1514xmlChar * 1515xmlStrcat(xmlChar *cur, const xmlChar *add) { 1516 const xmlChar *p = add; 1517 1518 if (add == NULL) return(cur); 1519 if (cur == NULL) 1520 return(xmlStrdup(add)); 1521 1522 while (*p != 0) p++; /* non input consuming */ 1523 return(xmlStrncat(cur, add, p - add)); 1524} 1525 1526/************************************************************************ 1527 * * 1528 * Commodity functions, cleanup needed ? * 1529 * * 1530 ************************************************************************/ 1531 1532/** 1533 * areBlanks: 1534 * @ctxt: an XML parser context 1535 * @str: a xmlChar * 1536 * @len: the size of @str 1537 * 1538 * Is this a sequence of blank chars that one can ignore ? 1539 * 1540 * Returns 1 if ignorable 0 otherwise. 1541 */ 1542 1543static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1544 int i, ret; 1545 xmlNodePtr lastChild; 1546 1547 /* 1548 * Don't spend time trying to differentiate them, the same callback is 1549 * used ! 1550 */ 1551 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 1552 return(0); 1553 1554 /* 1555 * Check for xml:space value. 1556 */ 1557 if (*(ctxt->space) == 1) 1558 return(0); 1559 1560 /* 1561 * Check that the string is made of blanks 1562 */ 1563 for (i = 0;i < len;i++) 1564 if (!(IS_BLANK(str[i]))) return(0); 1565 1566 /* 1567 * Look if the element is mixed content in the DTD if available 1568 */ 1569 if (ctxt->node == NULL) return(0); 1570 if (ctxt->myDoc != NULL) { 1571 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1572 if (ret == 0) return(1); 1573 if (ret == 1) return(0); 1574 } 1575 1576 /* 1577 * Otherwise, heuristic :-\ 1578 */ 1579 if (RAW != '<') return(0); 1580 if ((ctxt->node->children == NULL) && 1581 (RAW == '<') && (NXT(1) == '/')) return(0); 1582 1583 lastChild = xmlGetLastChild(ctxt->node); 1584 if (lastChild == NULL) { 1585 if ((ctxt->node->type != XML_ELEMENT_NODE) && 1586 (ctxt->node->content != NULL)) return(0); 1587 } else if (xmlNodeIsText(lastChild)) 1588 return(0); 1589 else if ((ctxt->node->children != NULL) && 1590 (xmlNodeIsText(ctxt->node->children))) 1591 return(0); 1592 return(1); 1593} 1594 1595/************************************************************************ 1596 * * 1597 * Extra stuff for namespace support * 1598 * Relates to http://www.w3.org/TR/WD-xml-names * 1599 * * 1600 ************************************************************************/ 1601 1602/** 1603 * xmlSplitQName: 1604 * @ctxt: an XML parser context 1605 * @name: an XML parser context 1606 * @prefix: a xmlChar ** 1607 * 1608 * parse an UTF8 encoded XML qualified name string 1609 * 1610 * [NS 5] QName ::= (Prefix ':')? LocalPart 1611 * 1612 * [NS 6] Prefix ::= NCName 1613 * 1614 * [NS 7] LocalPart ::= NCName 1615 * 1616 * Returns the local part, and prefix is updated 1617 * to get the Prefix if any. 1618 */ 1619 1620xmlChar * 1621xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1622 xmlChar buf[XML_MAX_NAMELEN + 5]; 1623 xmlChar *buffer = NULL; 1624 int len = 0; 1625 int max = XML_MAX_NAMELEN; 1626 xmlChar *ret = NULL; 1627 const xmlChar *cur = name; 1628 int c; 1629 1630 *prefix = NULL; 1631 1632#ifndef XML_XML_NAMESPACE 1633 /* xml: prefix is not really a namespace */ 1634 if ((cur[0] == 'x') && (cur[1] == 'm') && 1635 (cur[2] == 'l') && (cur[3] == ':')) 1636 return(xmlStrdup(name)); 1637#endif 1638 1639 /* nasty but valid */ 1640 if (cur[0] == ':') 1641 return(xmlStrdup(name)); 1642 1643 c = *cur++; 1644 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1645 buf[len++] = c; 1646 c = *cur++; 1647 } 1648 if (len >= max) { 1649 /* 1650 * Okay someone managed to make a huge name, so he's ready to pay 1651 * for the processing speed. 1652 */ 1653 max = len * 2; 1654 1655 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1656 if (buffer == NULL) { 1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1658 ctxt->sax->error(ctxt->userData, 1659 "xmlSplitQName: out of memory\n"); 1660 return(NULL); 1661 } 1662 memcpy(buffer, buf, len); 1663 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1664 if (len + 10 > max) { 1665 max *= 2; 1666 buffer = (xmlChar *) xmlRealloc(buffer, 1667 max * sizeof(xmlChar)); 1668 if (buffer == NULL) { 1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1670 ctxt->sax->error(ctxt->userData, 1671 "xmlSplitQName: out of memory\n"); 1672 return(NULL); 1673 } 1674 } 1675 buffer[len++] = c; 1676 c = *cur++; 1677 } 1678 buffer[len] = 0; 1679 } 1680 1681 if (buffer == NULL) 1682 ret = xmlStrndup(buf, len); 1683 else { 1684 ret = buffer; 1685 buffer = NULL; 1686 max = XML_MAX_NAMELEN; 1687 } 1688 1689 1690 if (c == ':') { 1691 c = *cur; 1692 if (c == 0) return(ret); 1693 *prefix = ret; 1694 len = 0; 1695 1696 /* 1697 * Check that the first character is proper to start 1698 * a new name 1699 */ 1700 if (!(((c >= 0x61) && (c <= 0x7A)) || 1701 ((c >= 0x41) && (c <= 0x5A)) || 1702 (c == '_') || (c == ':'))) { 1703 int l; 1704 int first = CUR_SCHAR(cur, l); 1705 1706 if (!IS_LETTER(first) && (first != '_')) { 1707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1708 ctxt->sax->error(ctxt->userData, 1709 "Name %s is not XML Namespace compliant\n", 1710 name); 1711 } 1712 } 1713 cur++; 1714 1715 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1716 buf[len++] = c; 1717 c = *cur++; 1718 } 1719 if (len >= max) { 1720 /* 1721 * Okay someone managed to make a huge name, so he's ready to pay 1722 * for the processing speed. 1723 */ 1724 max = len * 2; 1725 1726 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1727 if (buffer == NULL) { 1728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1729 ctxt->sax->error(ctxt->userData, 1730 "xmlSplitQName: out of memory\n"); 1731 return(NULL); 1732 } 1733 memcpy(buffer, buf, len); 1734 while (c != 0) { /* tested bigname2.xml */ 1735 if (len + 10 > max) { 1736 max *= 2; 1737 buffer = (xmlChar *) xmlRealloc(buffer, 1738 max * sizeof(xmlChar)); 1739 if (buffer == NULL) { 1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1741 ctxt->sax->error(ctxt->userData, 1742 "xmlSplitQName: out of memory\n"); 1743 return(NULL); 1744 } 1745 } 1746 buffer[len++] = c; 1747 c = *cur++; 1748 } 1749 buffer[len] = 0; 1750 } 1751 1752 if (buffer == NULL) 1753 ret = xmlStrndup(buf, len); 1754 else { 1755 ret = buffer; 1756 } 1757 } 1758 1759 return(ret); 1760} 1761 1762/************************************************************************ 1763 * * 1764 * The parser itself * 1765 * Relates to http://www.w3.org/TR/REC-xml * 1766 * * 1767 ************************************************************************/ 1768 1769static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1770/** 1771 * xmlParseName: 1772 * @ctxt: an XML parser context 1773 * 1774 * parse an XML name. 1775 * 1776 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1777 * CombiningChar | Extender 1778 * 1779 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1780 * 1781 * [6] Names ::= Name (S Name)* 1782 * 1783 * Returns the Name parsed or NULL 1784 */ 1785 1786xmlChar * 1787xmlParseName(xmlParserCtxtPtr ctxt) { 1788 const xmlChar *in; 1789 xmlChar *ret; 1790 int count = 0; 1791 1792 GROW; 1793 1794 /* 1795 * Accelerator for simple ASCII names 1796 */ 1797 in = ctxt->input->cur; 1798 if (((*in >= 0x61) && (*in <= 0x7A)) || 1799 ((*in >= 0x41) && (*in <= 0x5A)) || 1800 (*in == '_') || (*in == ':')) { 1801 in++; 1802 while (((*in >= 0x61) && (*in <= 0x7A)) || 1803 ((*in >= 0x41) && (*in <= 0x5A)) || 1804 ((*in >= 0x30) && (*in <= 0x39)) || 1805 (*in == '_') || (*in == '-') || 1806 (*in == ':') || (*in == '.')) 1807 in++; 1808 if ((*in > 0) && (*in < 0x80)) { 1809 count = in - ctxt->input->cur; 1810 ret = xmlStrndup(ctxt->input->cur, count); 1811 ctxt->input->cur = in; 1812 return(ret); 1813 } 1814 } 1815 return(xmlParseNameComplex(ctxt)); 1816} 1817 1818/** 1819 * xmlParseNameAndCompare: 1820 * @ctxt: an XML parser context 1821 * 1822 * parse an XML name and compares for match 1823 * (specialized for endtag parsing) 1824 * 1825 * 1826 * Returns NULL for an illegal name, (xmlChar*) 1 for success 1827 * and the name for mismatch 1828 */ 1829 1830static xmlChar * 1831xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 1832 const xmlChar *cmp = other; 1833 const xmlChar *in; 1834 xmlChar *ret; 1835 1836 GROW; 1837 1838 in = ctxt->input->cur; 1839 while (*in != 0 && *in == *cmp) { 1840 ++in; 1841 ++cmp; 1842 } 1843 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { 1844 /* success */ 1845 ctxt->input->cur = in; 1846 return (xmlChar*) 1; 1847 } 1848 /* failure (or end of input buffer), check with full function */ 1849 ret = xmlParseName (ctxt); 1850 if (ret != 0 && xmlStrEqual (ret, other)) { 1851 xmlFree (ret); 1852 return (xmlChar*) 1; 1853 } 1854 return ret; 1855} 1856 1857static xmlChar * 1858xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1859 xmlChar buf[XML_MAX_NAMELEN + 5]; 1860 int len = 0, l; 1861 int c; 1862 int count = 0; 1863 1864 /* 1865 * Handler for more complex cases 1866 */ 1867 GROW; 1868 c = CUR_CHAR(l); 1869 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1870 (!IS_LETTER(c) && (c != '_') && 1871 (c != ':'))) { 1872 return(NULL); 1873 } 1874 1875 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1876 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1877 (c == '.') || (c == '-') || 1878 (c == '_') || (c == ':') || 1879 (IS_COMBINING(c)) || 1880 (IS_EXTENDER(c)))) { 1881 if (count++ > 100) { 1882 count = 0; 1883 GROW; 1884 } 1885 COPY_BUF(l,buf,len,c); 1886 NEXTL(l); 1887 c = CUR_CHAR(l); 1888 if (len >= XML_MAX_NAMELEN) { 1889 /* 1890 * Okay someone managed to make a huge name, so he's ready to pay 1891 * for the processing speed. 1892 */ 1893 xmlChar *buffer; 1894 int max = len * 2; 1895 1896 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1897 if (buffer == NULL) { 1898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1899 ctxt->sax->error(ctxt->userData, 1900 "xmlParseNameComplex: out of memory\n"); 1901 return(NULL); 1902 } 1903 memcpy(buffer, buf, len); 1904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1905 (c == '.') || (c == '-') || 1906 (c == '_') || (c == ':') || 1907 (IS_COMBINING(c)) || 1908 (IS_EXTENDER(c))) { 1909 if (count++ > 100) { 1910 count = 0; 1911 GROW; 1912 } 1913 if (len + 10 > max) { 1914 max *= 2; 1915 buffer = (xmlChar *) xmlRealloc(buffer, 1916 max * sizeof(xmlChar)); 1917 if (buffer == NULL) { 1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1919 ctxt->sax->error(ctxt->userData, 1920 "xmlParseNameComplex: out of memory\n"); 1921 return(NULL); 1922 } 1923 } 1924 COPY_BUF(l,buffer,len,c); 1925 NEXTL(l); 1926 c = CUR_CHAR(l); 1927 } 1928 buffer[len] = 0; 1929 return(buffer); 1930 } 1931 } 1932 return(xmlStrndup(buf, len)); 1933} 1934 1935/** 1936 * xmlParseStringName: 1937 * @ctxt: an XML parser context 1938 * @str: a pointer to the string pointer (IN/OUT) 1939 * 1940 * parse an XML name. 1941 * 1942 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1943 * CombiningChar | Extender 1944 * 1945 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1946 * 1947 * [6] Names ::= Name (S Name)* 1948 * 1949 * Returns the Name parsed or NULL. The @str pointer 1950 * is updated to the current location in the string. 1951 */ 1952 1953static xmlChar * 1954xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1955 xmlChar buf[XML_MAX_NAMELEN + 5]; 1956 const xmlChar *cur = *str; 1957 int len = 0, l; 1958 int c; 1959 1960 c = CUR_SCHAR(cur, l); 1961 if (!IS_LETTER(c) && (c != '_') && 1962 (c != ':')) { 1963 return(NULL); 1964 } 1965 1966 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1967 (c == '.') || (c == '-') || 1968 (c == '_') || (c == ':') || 1969 (IS_COMBINING(c)) || 1970 (IS_EXTENDER(c))) { 1971 COPY_BUF(l,buf,len,c); 1972 cur += l; 1973 c = CUR_SCHAR(cur, l); 1974 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1975 /* 1976 * Okay someone managed to make a huge name, so he's ready to pay 1977 * for the processing speed. 1978 */ 1979 xmlChar *buffer; 1980 int max = len * 2; 1981 1982 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1983 if (buffer == NULL) { 1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1985 ctxt->sax->error(ctxt->userData, 1986 "xmlParseStringName: out of memory\n"); 1987 return(NULL); 1988 } 1989 memcpy(buffer, buf, len); 1990 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1991 (c == '.') || (c == '-') || 1992 (c == '_') || (c == ':') || 1993 (IS_COMBINING(c)) || 1994 (IS_EXTENDER(c))) { 1995 if (len + 10 > max) { 1996 max *= 2; 1997 buffer = (xmlChar *) xmlRealloc(buffer, 1998 max * sizeof(xmlChar)); 1999 if (buffer == NULL) { 2000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2001 ctxt->sax->error(ctxt->userData, 2002 "xmlParseStringName: out of memory\n"); 2003 return(NULL); 2004 } 2005 } 2006 COPY_BUF(l,buffer,len,c); 2007 cur += l; 2008 c = CUR_SCHAR(cur, l); 2009 } 2010 buffer[len] = 0; 2011 *str = cur; 2012 return(buffer); 2013 } 2014 } 2015 *str = cur; 2016 return(xmlStrndup(buf, len)); 2017} 2018 2019/** 2020 * xmlParseNmtoken: 2021 * @ctxt: an XML parser context 2022 * 2023 * parse an XML Nmtoken. 2024 * 2025 * [7] Nmtoken ::= (NameChar)+ 2026 * 2027 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2028 * 2029 * Returns the Nmtoken parsed or NULL 2030 */ 2031 2032xmlChar * 2033xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2034 xmlChar buf[XML_MAX_NAMELEN + 5]; 2035 int len = 0, l; 2036 int c; 2037 int count = 0; 2038 2039 GROW; 2040 c = CUR_CHAR(l); 2041 2042 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2043 (c == '.') || (c == '-') || 2044 (c == '_') || (c == ':') || 2045 (IS_COMBINING(c)) || 2046 (IS_EXTENDER(c))) { 2047 if (count++ > 100) { 2048 count = 0; 2049 GROW; 2050 } 2051 COPY_BUF(l,buf,len,c); 2052 NEXTL(l); 2053 c = CUR_CHAR(l); 2054 if (len >= XML_MAX_NAMELEN) { 2055 /* 2056 * Okay someone managed to make a huge token, so he's ready to pay 2057 * for the processing speed. 2058 */ 2059 xmlChar *buffer; 2060 int max = len * 2; 2061 2062 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 2063 if (buffer == NULL) { 2064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2065 ctxt->sax->error(ctxt->userData, 2066 "xmlParseNmtoken: out of memory\n"); 2067 return(NULL); 2068 } 2069 memcpy(buffer, buf, len); 2070 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2071 (c == '.') || (c == '-') || 2072 (c == '_') || (c == ':') || 2073 (IS_COMBINING(c)) || 2074 (IS_EXTENDER(c))) { 2075 if (count++ > 100) { 2076 count = 0; 2077 GROW; 2078 } 2079 if (len + 10 > max) { 2080 max *= 2; 2081 buffer = (xmlChar *) xmlRealloc(buffer, 2082 max * sizeof(xmlChar)); 2083 if (buffer == NULL) { 2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2085 ctxt->sax->error(ctxt->userData, 2086 "xmlParseNmtoken: out of memory\n"); 2087 return(NULL); 2088 } 2089 } 2090 COPY_BUF(l,buffer,len,c); 2091 NEXTL(l); 2092 c = CUR_CHAR(l); 2093 } 2094 buffer[len] = 0; 2095 return(buffer); 2096 } 2097 } 2098 if (len == 0) 2099 return(NULL); 2100 return(xmlStrndup(buf, len)); 2101} 2102 2103/** 2104 * xmlParseEntityValue: 2105 * @ctxt: an XML parser context 2106 * @orig: if non-NULL store a copy of the original entity value 2107 * 2108 * parse a value for ENTITY declarations 2109 * 2110 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2111 * "'" ([^%&'] | PEReference | Reference)* "'" 2112 * 2113 * Returns the EntityValue parsed with reference substituted or NULL 2114 */ 2115 2116xmlChar * 2117xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2118 xmlChar *buf = NULL; 2119 int len = 0; 2120 int size = XML_PARSER_BUFFER_SIZE; 2121 int c, l; 2122 xmlChar stop; 2123 xmlChar *ret = NULL; 2124 const xmlChar *cur = NULL; 2125 xmlParserInputPtr input; 2126 2127 if (RAW == '"') stop = '"'; 2128 else if (RAW == '\'') stop = '\''; 2129 else { 2130 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2132 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2133 ctxt->wellFormed = 0; 2134 ctxt->disableSAX = 1; 2135 return(NULL); 2136 } 2137 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2138 if (buf == NULL) { 2139 xmlGenericError(xmlGenericErrorContext, 2140 "malloc of %d byte failed\n", size); 2141 return(NULL); 2142 } 2143 2144 /* 2145 * The content of the entity definition is copied in a buffer. 2146 */ 2147 2148 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2149 input = ctxt->input; 2150 GROW; 2151 NEXT; 2152 c = CUR_CHAR(l); 2153 /* 2154 * NOTE: 4.4.5 Included in Literal 2155 * When a parameter entity reference appears in a literal entity 2156 * value, ... a single or double quote character in the replacement 2157 * text is always treated as a normal data character and will not 2158 * terminate the literal. 2159 * In practice it means we stop the loop only when back at parsing 2160 * the initial entity and the quote is found 2161 */ 2162 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2163 (ctxt->input != input))) { 2164 if (len + 5 >= size) { 2165 size *= 2; 2166 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2167 if (buf == NULL) { 2168 xmlGenericError(xmlGenericErrorContext, 2169 "realloc of %d byte failed\n", size); 2170 return(NULL); 2171 } 2172 } 2173 COPY_BUF(l,buf,len,c); 2174 NEXTL(l); 2175 /* 2176 * Pop-up of finished entities. 2177 */ 2178 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2179 xmlPopInput(ctxt); 2180 2181 GROW; 2182 c = CUR_CHAR(l); 2183 if (c == 0) { 2184 GROW; 2185 c = CUR_CHAR(l); 2186 } 2187 } 2188 buf[len] = 0; 2189 2190 /* 2191 * Raise problem w.r.t. '&' and '%' being used in non-entities 2192 * reference constructs. Note Charref will be handled in 2193 * xmlStringDecodeEntities() 2194 */ 2195 cur = buf; 2196 while (*cur != 0) { /* non input consuming */ 2197 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2198 xmlChar *name; 2199 xmlChar tmp = *cur; 2200 2201 cur++; 2202 name = xmlParseStringName(ctxt, &cur); 2203 if ((name == NULL) || (*cur != ';')) { 2204 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 2205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2206 ctxt->sax->error(ctxt->userData, 2207 "EntityValue: '%c' forbidden except for entities references\n", 2208 tmp); 2209 ctxt->wellFormed = 0; 2210 ctxt->disableSAX = 1; 2211 } 2212 if ((tmp == '%') && (ctxt->inSubset == 1) && 2213 (ctxt->inputNr == 1)) { 2214 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2216 ctxt->sax->error(ctxt->userData, 2217 "EntityValue: PEReferences forbidden in internal subset\n", 2218 tmp); 2219 ctxt->wellFormed = 0; 2220 ctxt->disableSAX = 1; 2221 } 2222 if (name != NULL) 2223 xmlFree(name); 2224 } 2225 cur++; 2226 } 2227 2228 /* 2229 * Then PEReference entities are substituted. 2230 */ 2231 if (c != stop) { 2232 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2234 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2235 ctxt->wellFormed = 0; 2236 ctxt->disableSAX = 1; 2237 xmlFree(buf); 2238 } else { 2239 NEXT; 2240 /* 2241 * NOTE: 4.4.7 Bypassed 2242 * When a general entity reference appears in the EntityValue in 2243 * an entity declaration, it is bypassed and left as is. 2244 * so XML_SUBSTITUTE_REF is not set here. 2245 */ 2246 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2247 0, 0, 0); 2248 if (orig != NULL) 2249 *orig = buf; 2250 else 2251 xmlFree(buf); 2252 } 2253 2254 return(ret); 2255} 2256 2257/** 2258 * xmlParseAttValue: 2259 * @ctxt: an XML parser context 2260 * 2261 * parse a value for an attribute 2262 * Note: the parser won't do substitution of entities here, this 2263 * will be handled later in xmlStringGetNodeList 2264 * 2265 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2266 * "'" ([^<&'] | Reference)* "'" 2267 * 2268 * 3.3.3 Attribute-Value Normalization: 2269 * Before the value of an attribute is passed to the application or 2270 * checked for validity, the XML processor must normalize it as follows: 2271 * - a character reference is processed by appending the referenced 2272 * character to the attribute value 2273 * - an entity reference is processed by recursively processing the 2274 * replacement text of the entity 2275 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2276 * appending #x20 to the normalized value, except that only a single 2277 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2278 * parsed entity or the literal entity value of an internal parsed entity 2279 * - other characters are processed by appending them to the normalized value 2280 * If the declared value is not CDATA, then the XML processor must further 2281 * process the normalized attribute value by discarding any leading and 2282 * trailing space (#x20) characters, and by replacing sequences of space 2283 * (#x20) characters by a single space (#x20) character. 2284 * All attributes for which no declaration has been read should be treated 2285 * by a non-validating parser as if declared CDATA. 2286 * 2287 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2288 */ 2289 2290xmlChar * 2291xmlParseAttValueComplex(xmlParserCtxtPtr ctxt); 2292 2293xmlChar * 2294xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2295 xmlChar limit = 0; 2296 const xmlChar *in = NULL; 2297 xmlChar *ret = NULL; 2298 SHRINK; 2299 GROW; 2300 in = (xmlChar *) CUR_PTR; 2301 if (*in != '"' && *in != '\'') { 2302 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2304 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2305 ctxt->wellFormed = 0; 2306 ctxt->disableSAX = 1; 2307 return(NULL); 2308 } 2309 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2310 limit = *in; 2311 ++in; 2312 2313 while (*in != limit && *in >= 0x20 && *in <= 0x7f && 2314 *in != '&' && *in != '<' 2315 ) { 2316 ++in; 2317 } 2318 if (*in != limit) { 2319 return xmlParseAttValueComplex(ctxt); 2320 } 2321 ++in; 2322 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2); 2323 CUR_PTR = in; 2324 return ret; 2325} 2326 2327xmlChar * 2328xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) { 2329 xmlChar limit = 0; 2330 xmlChar *buf = NULL; 2331 int len = 0; 2332 int buf_size = 0; 2333 int c, l; 2334 xmlChar *current = NULL; 2335 xmlEntityPtr ent; 2336 2337 2338 SHRINK; 2339 if (NXT(0) == '"') { 2340 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2341 limit = '"'; 2342 NEXT; 2343 } else if (NXT(0) == '\'') { 2344 limit = '\''; 2345 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2346 NEXT; 2347 } else { 2348 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2350 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2351 ctxt->wellFormed = 0; 2352 ctxt->disableSAX = 1; 2353 return(NULL); 2354 } 2355 2356 /* 2357 * allocate a translation buffer. 2358 */ 2359 buf_size = XML_PARSER_BUFFER_SIZE; 2360 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2361 if (buf == NULL) { 2362 xmlGenericError(xmlGenericErrorContext, 2363 "xmlParseAttValue: malloc failed"); 2364 return(NULL); 2365 } 2366 2367 /* 2368 * OK loop until we reach one of the ending char or a size limit. 2369 */ 2370 c = CUR_CHAR(l); 2371 while ((NXT(0) != limit) && /* checked */ 2372 (c != '<')) { 2373 if (c == 0) break; 2374 if (c == '&') { 2375 if (NXT(1) == '#') { 2376 int val = xmlParseCharRef(ctxt); 2377 if (val == '&') { 2378 if (ctxt->replaceEntities) { 2379 if (len > buf_size - 10) { 2380 growBuffer(buf); 2381 } 2382 buf[len++] = '&'; 2383 } else { 2384 /* 2385 * The reparsing will be done in xmlStringGetNodeList() 2386 * called by the attribute() function in SAX.c 2387 */ 2388 static xmlChar buffer[6] = "&"; 2389 2390 if (len > buf_size - 10) { 2391 growBuffer(buf); 2392 } 2393 current = &buffer[0]; 2394 while (*current != 0) { /* non input consuming */ 2395 buf[len++] = *current++; 2396 } 2397 } 2398 } else { 2399 if (len > buf_size - 10) { 2400 growBuffer(buf); 2401 } 2402 len += xmlCopyChar(0, &buf[len], val); 2403 } 2404 } else { 2405 ent = xmlParseEntityRef(ctxt); 2406 if ((ent != NULL) && 2407 (ctxt->replaceEntities != 0)) { 2408 xmlChar *rep; 2409 2410 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2411 rep = xmlStringDecodeEntities(ctxt, ent->content, 2412 XML_SUBSTITUTE_REF, 0, 0, 0); 2413 if (rep != NULL) { 2414 current = rep; 2415 while (*current != 0) { /* non input consuming */ 2416 buf[len++] = *current++; 2417 if (len > buf_size - 10) { 2418 growBuffer(buf); 2419 } 2420 } 2421 xmlFree(rep); 2422 } 2423 } else { 2424 if (len > buf_size - 10) { 2425 growBuffer(buf); 2426 } 2427 if (ent->content != NULL) 2428 buf[len++] = ent->content[0]; 2429 } 2430 } else if (ent != NULL) { 2431 int i = xmlStrlen(ent->name); 2432 const xmlChar *cur = ent->name; 2433 2434 /* 2435 * This may look absurd but is needed to detect 2436 * entities problems 2437 */ 2438 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2439 (ent->content != NULL)) { 2440 xmlChar *rep; 2441 rep = xmlStringDecodeEntities(ctxt, ent->content, 2442 XML_SUBSTITUTE_REF, 0, 0, 0); 2443 if (rep != NULL) 2444 xmlFree(rep); 2445 } 2446 2447 /* 2448 * Just output the reference 2449 */ 2450 buf[len++] = '&'; 2451 if (len > buf_size - i - 10) { 2452 growBuffer(buf); 2453 } 2454 for (;i > 0;i--) 2455 buf[len++] = *cur++; 2456 buf[len++] = ';'; 2457 } 2458 } 2459 } else { 2460 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2461 COPY_BUF(l,buf,len,0x20); 2462 if (len > buf_size - 10) { 2463 growBuffer(buf); 2464 } 2465 } else { 2466 COPY_BUF(l,buf,len,c); 2467 if (len > buf_size - 10) { 2468 growBuffer(buf); 2469 } 2470 } 2471 NEXTL(l); 2472 } 2473 GROW; 2474 c = CUR_CHAR(l); 2475 } 2476 buf[len++] = 0; 2477 if (RAW == '<') { 2478 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2480 ctxt->sax->error(ctxt->userData, 2481 "Unescaped '<' not allowed in attributes values\n"); 2482 ctxt->wellFormed = 0; 2483 ctxt->disableSAX = 1; 2484 } else if (RAW != limit) { 2485 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2487 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2488 ctxt->wellFormed = 0; 2489 ctxt->disableSAX = 1; 2490 } else 2491 NEXT; 2492 return(buf); 2493} 2494 2495/** 2496 * xmlParseSystemLiteral: 2497 * @ctxt: an XML parser context 2498 * 2499 * parse an XML Literal 2500 * 2501 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2502 * 2503 * Returns the SystemLiteral parsed or NULL 2504 */ 2505 2506xmlChar * 2507xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2508 xmlChar *buf = NULL; 2509 int len = 0; 2510 int size = XML_PARSER_BUFFER_SIZE; 2511 int cur, l; 2512 xmlChar stop; 2513 int state = ctxt->instate; 2514 int count = 0; 2515 2516 SHRINK; 2517 if (RAW == '"') { 2518 NEXT; 2519 stop = '"'; 2520 } else if (RAW == '\'') { 2521 NEXT; 2522 stop = '\''; 2523 } else { 2524 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2526 ctxt->sax->error(ctxt->userData, 2527 "SystemLiteral \" or ' expected\n"); 2528 ctxt->wellFormed = 0; 2529 ctxt->disableSAX = 1; 2530 return(NULL); 2531 } 2532 2533 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2534 if (buf == NULL) { 2535 xmlGenericError(xmlGenericErrorContext, 2536 "malloc of %d byte failed\n", size); 2537 return(NULL); 2538 } 2539 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2540 cur = CUR_CHAR(l); 2541 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2542 if (len + 5 >= size) { 2543 size *= 2; 2544 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2545 if (buf == NULL) { 2546 xmlGenericError(xmlGenericErrorContext, 2547 "realloc of %d byte failed\n", size); 2548 ctxt->instate = (xmlParserInputState) state; 2549 return(NULL); 2550 } 2551 } 2552 count++; 2553 if (count > 50) { 2554 GROW; 2555 count = 0; 2556 } 2557 COPY_BUF(l,buf,len,cur); 2558 NEXTL(l); 2559 cur = CUR_CHAR(l); 2560 if (cur == 0) { 2561 GROW; 2562 SHRINK; 2563 cur = CUR_CHAR(l); 2564 } 2565 } 2566 buf[len] = 0; 2567 ctxt->instate = (xmlParserInputState) state; 2568 if (!IS_CHAR(cur)) { 2569 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2571 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2572 ctxt->wellFormed = 0; 2573 ctxt->disableSAX = 1; 2574 } else { 2575 NEXT; 2576 } 2577 return(buf); 2578} 2579 2580/** 2581 * xmlParsePubidLiteral: 2582 * @ctxt: an XML parser context 2583 * 2584 * parse an XML public literal 2585 * 2586 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2587 * 2588 * Returns the PubidLiteral parsed or NULL. 2589 */ 2590 2591xmlChar * 2592xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2593 xmlChar *buf = NULL; 2594 int len = 0; 2595 int size = XML_PARSER_BUFFER_SIZE; 2596 xmlChar cur; 2597 xmlChar stop; 2598 int count = 0; 2599 xmlParserInputState oldstate = ctxt->instate; 2600 2601 SHRINK; 2602 if (RAW == '"') { 2603 NEXT; 2604 stop = '"'; 2605 } else if (RAW == '\'') { 2606 NEXT; 2607 stop = '\''; 2608 } else { 2609 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2611 ctxt->sax->error(ctxt->userData, 2612 "SystemLiteral \" or ' expected\n"); 2613 ctxt->wellFormed = 0; 2614 ctxt->disableSAX = 1; 2615 return(NULL); 2616 } 2617 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2618 if (buf == NULL) { 2619 xmlGenericError(xmlGenericErrorContext, 2620 "malloc of %d byte failed\n", size); 2621 return(NULL); 2622 } 2623 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 2624 cur = CUR; 2625 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2626 if (len + 1 >= size) { 2627 size *= 2; 2628 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2629 if (buf == NULL) { 2630 xmlGenericError(xmlGenericErrorContext, 2631 "realloc of %d byte failed\n", size); 2632 return(NULL); 2633 } 2634 } 2635 buf[len++] = cur; 2636 count++; 2637 if (count > 50) { 2638 GROW; 2639 count = 0; 2640 } 2641 NEXT; 2642 cur = CUR; 2643 if (cur == 0) { 2644 GROW; 2645 SHRINK; 2646 cur = CUR; 2647 } 2648 } 2649 buf[len] = 0; 2650 if (cur != stop) { 2651 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2653 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2654 ctxt->wellFormed = 0; 2655 ctxt->disableSAX = 1; 2656 } else { 2657 NEXT; 2658 } 2659 ctxt->instate = oldstate; 2660 return(buf); 2661} 2662 2663void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2664/** 2665 * xmlParseCharData: 2666 * @ctxt: an XML parser context 2667 * @cdata: int indicating whether we are within a CDATA section 2668 * 2669 * parse a CharData section. 2670 * if we are within a CDATA section ']]>' marks an end of section. 2671 * 2672 * The right angle bracket (>) may be represented using the string ">", 2673 * and must, for compatibility, be escaped using ">" or a character 2674 * reference when it appears in the string "]]>" in content, when that 2675 * string is not marking the end of a CDATA section. 2676 * 2677 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2678 */ 2679 2680void 2681xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2682 const xmlChar *in; 2683 int nbchar = 0; 2684 int line = ctxt->input->line; 2685 int col = ctxt->input->col; 2686 2687 SHRINK; 2688 GROW; 2689 /* 2690 * Accelerated common case where input don't need to be 2691 * modified before passing it to the handler. 2692 */ 2693 if (!cdata) { 2694 in = ctxt->input->cur; 2695 do { 2696get_more: 2697 while (((*in >= 0x20) && (*in != '<') && (*in != ']') && 2698 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2699 in++; 2700 if (*in == 0xA) { 2701 ctxt->input->line++; 2702 in++; 2703 while (*in == 0xA) { 2704 ctxt->input->line++; 2705 in++; 2706 } 2707 goto get_more; 2708 } 2709 if (*in == ']') { 2710 if ((in[1] == ']') && (in[2] == '>')) { 2711 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2713 ctxt->sax->error(ctxt->userData, 2714 "Sequence ']]>' not allowed in content\n"); 2715 ctxt->input->cur = in; 2716 ctxt->wellFormed = 0; 2717 ctxt->disableSAX = 1; 2718 return; 2719 } 2720 in++; 2721 goto get_more; 2722 } 2723 nbchar = in - ctxt->input->cur; 2724 if (nbchar > 0) { 2725 if (IS_BLANK(*ctxt->input->cur)) { 2726 const xmlChar *tmp = ctxt->input->cur; 2727 ctxt->input->cur = in; 2728 if (areBlanks(ctxt, tmp, nbchar)) { 2729 if (ctxt->sax->ignorableWhitespace != NULL) 2730 ctxt->sax->ignorableWhitespace(ctxt->userData, 2731 tmp, nbchar); 2732 } else { 2733 if (ctxt->sax->characters != NULL) 2734 ctxt->sax->characters(ctxt->userData, 2735 tmp, nbchar); 2736 } 2737 line = ctxt->input->line; 2738 col = ctxt->input->col; 2739 } else { 2740 if (ctxt->sax->characters != NULL) 2741 ctxt->sax->characters(ctxt->userData, 2742 ctxt->input->cur, nbchar); 2743 line = ctxt->input->line; 2744 col = ctxt->input->col; 2745 } 2746 } 2747 ctxt->input->cur = in; 2748 if (*in == 0xD) { 2749 in++; 2750 if (*in == 0xA) { 2751 ctxt->input->cur = in; 2752 in++; 2753 ctxt->input->line++; 2754 continue; /* while */ 2755 } 2756 in--; 2757 } 2758 if (*in == '<') { 2759 return; 2760 } 2761 if (*in == '&') { 2762 return; 2763 } 2764 SHRINK; 2765 GROW; 2766 in = ctxt->input->cur; 2767 } while ((*in >= 0x20) && (*in <= 0x7F)); 2768 nbchar = 0; 2769 } 2770 ctxt->input->line = line; 2771 ctxt->input->col = col; 2772 xmlParseCharDataComplex(ctxt, cdata); 2773} 2774 2775void 2776xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2777 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2778 int nbchar = 0; 2779 int cur, l; 2780 int count = 0; 2781 2782 SHRINK; 2783 GROW; 2784 cur = CUR_CHAR(l); 2785 while ((cur != '<') && /* checked */ 2786 (cur != '&') && 2787 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2788 if ((cur == ']') && (NXT(1) == ']') && 2789 (NXT(2) == '>')) { 2790 if (cdata) break; 2791 else { 2792 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2794 ctxt->sax->error(ctxt->userData, 2795 "Sequence ']]>' not allowed in content\n"); 2796 /* Should this be relaxed ??? I see a "must here */ 2797 ctxt->wellFormed = 0; 2798 ctxt->disableSAX = 1; 2799 } 2800 } 2801 COPY_BUF(l,buf,nbchar,cur); 2802 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2803 /* 2804 * OK the segment is to be consumed as chars. 2805 */ 2806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2807 if (areBlanks(ctxt, buf, nbchar)) { 2808 if (ctxt->sax->ignorableWhitespace != NULL) 2809 ctxt->sax->ignorableWhitespace(ctxt->userData, 2810 buf, nbchar); 2811 } else { 2812 if (ctxt->sax->characters != NULL) 2813 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2814 } 2815 } 2816 nbchar = 0; 2817 } 2818 count++; 2819 if (count > 50) { 2820 GROW; 2821 count = 0; 2822 } 2823 NEXTL(l); 2824 cur = CUR_CHAR(l); 2825 } 2826 if (nbchar != 0) { 2827 /* 2828 * OK the segment is to be consumed as chars. 2829 */ 2830 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2831 if (areBlanks(ctxt, buf, nbchar)) { 2832 if (ctxt->sax->ignorableWhitespace != NULL) 2833 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2834 } else { 2835 if (ctxt->sax->characters != NULL) 2836 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2837 } 2838 } 2839 } 2840} 2841 2842/** 2843 * xmlParseExternalID: 2844 * @ctxt: an XML parser context 2845 * @publicID: a xmlChar** receiving PubidLiteral 2846 * @strict: indicate whether we should restrict parsing to only 2847 * production [75], see NOTE below 2848 * 2849 * Parse an External ID or a Public ID 2850 * 2851 * NOTE: Productions [75] and [83] interact badly since [75] can generate 2852 * 'PUBLIC' S PubidLiteral S SystemLiteral 2853 * 2854 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2855 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2856 * 2857 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2858 * 2859 * Returns the function returns SystemLiteral and in the second 2860 * case publicID receives PubidLiteral, is strict is off 2861 * it is possible to return NULL and have publicID set. 2862 */ 2863 2864xmlChar * 2865xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2866 xmlChar *URI = NULL; 2867 2868 SHRINK; 2869 2870 *publicID = NULL; 2871 if ((RAW == 'S') && (NXT(1) == 'Y') && 2872 (NXT(2) == 'S') && (NXT(3) == 'T') && 2873 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2874 SKIP(6); 2875 if (!IS_BLANK(CUR)) { 2876 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2878 ctxt->sax->error(ctxt->userData, 2879 "Space required after 'SYSTEM'\n"); 2880 ctxt->wellFormed = 0; 2881 ctxt->disableSAX = 1; 2882 } 2883 SKIP_BLANKS; 2884 URI = xmlParseSystemLiteral(ctxt); 2885 if (URI == NULL) { 2886 ctxt->errNo = XML_ERR_URI_REQUIRED; 2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2888 ctxt->sax->error(ctxt->userData, 2889 "xmlParseExternalID: SYSTEM, no URI\n"); 2890 ctxt->wellFormed = 0; 2891 ctxt->disableSAX = 1; 2892 } 2893 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2894 (NXT(2) == 'B') && (NXT(3) == 'L') && 2895 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2896 SKIP(6); 2897 if (!IS_BLANK(CUR)) { 2898 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2900 ctxt->sax->error(ctxt->userData, 2901 "Space required after 'PUBLIC'\n"); 2902 ctxt->wellFormed = 0; 2903 ctxt->disableSAX = 1; 2904 } 2905 SKIP_BLANKS; 2906 *publicID = xmlParsePubidLiteral(ctxt); 2907 if (*publicID == NULL) { 2908 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2910 ctxt->sax->error(ctxt->userData, 2911 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2912 ctxt->wellFormed = 0; 2913 ctxt->disableSAX = 1; 2914 } 2915 if (strict) { 2916 /* 2917 * We don't handle [83] so "S SystemLiteral" is required. 2918 */ 2919 if (!IS_BLANK(CUR)) { 2920 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2922 ctxt->sax->error(ctxt->userData, 2923 "Space required after the Public Identifier\n"); 2924 ctxt->wellFormed = 0; 2925 ctxt->disableSAX = 1; 2926 } 2927 } else { 2928 /* 2929 * We handle [83] so we return immediately, if 2930 * "S SystemLiteral" is not detected. From a purely parsing 2931 * point of view that's a nice mess. 2932 */ 2933 const xmlChar *ptr; 2934 GROW; 2935 2936 ptr = CUR_PTR; 2937 if (!IS_BLANK(*ptr)) return(NULL); 2938 2939 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2940 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2941 } 2942 SKIP_BLANKS; 2943 URI = xmlParseSystemLiteral(ctxt); 2944 if (URI == NULL) { 2945 ctxt->errNo = XML_ERR_URI_REQUIRED; 2946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2947 ctxt->sax->error(ctxt->userData, 2948 "xmlParseExternalID: PUBLIC, no URI\n"); 2949 ctxt->wellFormed = 0; 2950 ctxt->disableSAX = 1; 2951 } 2952 } 2953 return(URI); 2954} 2955 2956/** 2957 * xmlParseComment: 2958 * @ctxt: an XML parser context 2959 * 2960 * Skip an XML (SGML) comment <!-- .... --> 2961 * The spec says that "For compatibility, the string "--" (double-hyphen) 2962 * must not occur within comments. " 2963 * 2964 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2965 */ 2966void 2967xmlParseComment(xmlParserCtxtPtr ctxt) { 2968 xmlChar *buf = NULL; 2969 int len; 2970 int size = XML_PARSER_BUFFER_SIZE; 2971 int q, ql; 2972 int r, rl; 2973 int cur, l; 2974 xmlParserInputState state; 2975 xmlParserInputPtr input = ctxt->input; 2976 int count = 0; 2977 2978 /* 2979 * Check that there is a comment right here. 2980 */ 2981 if ((RAW != '<') || (NXT(1) != '!') || 2982 (NXT(2) != '-') || (NXT(3) != '-')) return; 2983 2984 state = ctxt->instate; 2985 ctxt->instate = XML_PARSER_COMMENT; 2986 SHRINK; 2987 SKIP(4); 2988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2989 if (buf == NULL) { 2990 xmlGenericError(xmlGenericErrorContext, 2991 "malloc of %d byte failed\n", size); 2992 ctxt->instate = state; 2993 return; 2994 } 2995 q = CUR_CHAR(ql); 2996 NEXTL(ql); 2997 r = CUR_CHAR(rl); 2998 NEXTL(rl); 2999 cur = CUR_CHAR(l); 3000 len = 0; 3001 while (IS_CHAR(cur) && /* checked */ 3002 ((cur != '>') || 3003 (r != '-') || (q != '-'))) { 3004 if ((r == '-') && (q == '-')) { 3005 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3007 ctxt->sax->error(ctxt->userData, 3008 "Comment must not contain '--' (double-hyphen)`\n"); 3009 ctxt->wellFormed = 0; 3010 ctxt->disableSAX = 1; 3011 } 3012 if (len + 5 >= size) { 3013 size *= 2; 3014 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3015 if (buf == NULL) { 3016 xmlGenericError(xmlGenericErrorContext, 3017 "realloc of %d byte failed\n", size); 3018 ctxt->instate = state; 3019 return; 3020 } 3021 } 3022 COPY_BUF(ql,buf,len,q); 3023 q = r; 3024 ql = rl; 3025 r = cur; 3026 rl = l; 3027 3028 count++; 3029 if (count > 50) { 3030 GROW; 3031 count = 0; 3032 } 3033 NEXTL(l); 3034 cur = CUR_CHAR(l); 3035 if (cur == 0) { 3036 SHRINK; 3037 GROW; 3038 cur = CUR_CHAR(l); 3039 } 3040 } 3041 buf[len] = 0; 3042 if (!IS_CHAR(cur)) { 3043 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 3044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3045 ctxt->sax->error(ctxt->userData, 3046 "Comment not terminated \n<!--%.50s\n", buf); 3047 ctxt->wellFormed = 0; 3048 ctxt->disableSAX = 1; 3049 xmlFree(buf); 3050 } else { 3051 if (input != ctxt->input) { 3052 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3054 ctxt->sax->error(ctxt->userData, 3055"Comment doesn't start and stop in the same entity\n"); 3056 ctxt->wellFormed = 0; 3057 ctxt->disableSAX = 1; 3058 } 3059 NEXT; 3060 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3061 (!ctxt->disableSAX)) 3062 ctxt->sax->comment(ctxt->userData, buf); 3063 xmlFree(buf); 3064 } 3065 ctxt->instate = state; 3066} 3067 3068/** 3069 * xmlParsePITarget: 3070 * @ctxt: an XML parser context 3071 * 3072 * parse the name of a PI 3073 * 3074 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3075 * 3076 * Returns the PITarget name or NULL 3077 */ 3078 3079xmlChar * 3080xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3081 xmlChar *name; 3082 3083 name = xmlParseName(ctxt); 3084 if ((name != NULL) && 3085 ((name[0] == 'x') || (name[0] == 'X')) && 3086 ((name[1] == 'm') || (name[1] == 'M')) && 3087 ((name[2] == 'l') || (name[2] == 'L'))) { 3088 int i; 3089 if ((name[0] == 'x') && (name[1] == 'm') && 3090 (name[2] == 'l') && (name[3] == 0)) { 3091 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3093 ctxt->sax->error(ctxt->userData, 3094 "XML declaration allowed only at the start of the document\n"); 3095 ctxt->wellFormed = 0; 3096 ctxt->disableSAX = 1; 3097 return(name); 3098 } else if (name[3] == 0) { 3099 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3101 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 3102 ctxt->wellFormed = 0; 3103 ctxt->disableSAX = 1; 3104 return(name); 3105 } 3106 for (i = 0;;i++) { 3107 if (xmlW3CPIs[i] == NULL) break; 3108 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3109 return(name); 3110 } 3111 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 3112 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3113 ctxt->sax->warning(ctxt->userData, 3114 "xmlParsePITarget: invalid name prefix 'xml'\n"); 3115 } 3116 } 3117 return(name); 3118} 3119 3120#ifdef LIBXML_CATALOG_ENABLED 3121/** 3122 * xmlParseCatalogPI: 3123 * @ctxt: an XML parser context 3124 * @catalog: the PI value string 3125 * 3126 * parse an XML Catalog Processing Instruction. 3127 * 3128 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3129 * 3130 * Occurs only if allowed by the user and if happening in the Misc 3131 * part of the document before any doctype informations 3132 * This will add the given catalog to the parsing context in order 3133 * to be used if there is a resolution need further down in the document 3134 */ 3135 3136static void 3137xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3138 xmlChar *URL = NULL; 3139 const xmlChar *tmp, *base; 3140 xmlChar marker; 3141 3142 tmp = catalog; 3143 while (IS_BLANK(*tmp)) tmp++; 3144 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3145 goto error; 3146 tmp += 7; 3147 while (IS_BLANK(*tmp)) tmp++; 3148 if (*tmp != '=') { 3149 return; 3150 } 3151 tmp++; 3152 while (IS_BLANK(*tmp)) tmp++; 3153 marker = *tmp; 3154 if ((marker != '\'') && (marker != '"')) 3155 goto error; 3156 tmp++; 3157 base = tmp; 3158 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3159 if (*tmp == 0) 3160 goto error; 3161 URL = xmlStrndup(base, tmp - base); 3162 tmp++; 3163 while (IS_BLANK(*tmp)) tmp++; 3164 if (*tmp != 0) 3165 goto error; 3166 3167 if (URL != NULL) { 3168 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3169 xmlFree(URL); 3170 } 3171 return; 3172 3173error: 3174 ctxt->errNo = XML_WAR_CATALOG_PI; 3175 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3176 ctxt->sax->warning(ctxt->userData, 3177 "Catalog PI syntax error: %s\n", catalog); 3178 if (URL != NULL) 3179 xmlFree(URL); 3180} 3181#endif 3182 3183/** 3184 * xmlParsePI: 3185 * @ctxt: an XML parser context 3186 * 3187 * parse an XML Processing Instruction. 3188 * 3189 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3190 * 3191 * The processing is transfered to SAX once parsed. 3192 */ 3193 3194void 3195xmlParsePI(xmlParserCtxtPtr ctxt) { 3196 xmlChar *buf = NULL; 3197 int len = 0; 3198 int size = XML_PARSER_BUFFER_SIZE; 3199 int cur, l; 3200 xmlChar *target; 3201 xmlParserInputState state; 3202 int count = 0; 3203 3204 if ((RAW == '<') && (NXT(1) == '?')) { 3205 xmlParserInputPtr input = ctxt->input; 3206 state = ctxt->instate; 3207 ctxt->instate = XML_PARSER_PI; 3208 /* 3209 * this is a Processing Instruction. 3210 */ 3211 SKIP(2); 3212 SHRINK; 3213 3214 /* 3215 * Parse the target name and check for special support like 3216 * namespace. 3217 */ 3218 target = xmlParsePITarget(ctxt); 3219 if (target != NULL) { 3220 if ((RAW == '?') && (NXT(1) == '>')) { 3221 if (input != ctxt->input) { 3222 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3224 ctxt->sax->error(ctxt->userData, 3225 "PI declaration doesn't start and stop in the same entity\n"); 3226 ctxt->wellFormed = 0; 3227 ctxt->disableSAX = 1; 3228 } 3229 SKIP(2); 3230 3231 /* 3232 * SAX: PI detected. 3233 */ 3234 if ((ctxt->sax) && (!ctxt->disableSAX) && 3235 (ctxt->sax->processingInstruction != NULL)) 3236 ctxt->sax->processingInstruction(ctxt->userData, 3237 target, NULL); 3238 ctxt->instate = state; 3239 xmlFree(target); 3240 return; 3241 } 3242 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3243 if (buf == NULL) { 3244 xmlGenericError(xmlGenericErrorContext, 3245 "malloc of %d byte failed\n", size); 3246 ctxt->instate = state; 3247 return; 3248 } 3249 cur = CUR; 3250 if (!IS_BLANK(cur)) { 3251 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3253 ctxt->sax->error(ctxt->userData, 3254 "xmlParsePI: PI %s space expected\n", target); 3255 ctxt->wellFormed = 0; 3256 ctxt->disableSAX = 1; 3257 } 3258 SKIP_BLANKS; 3259 cur = CUR_CHAR(l); 3260 while (IS_CHAR(cur) && /* checked */ 3261 ((cur != '?') || (NXT(1) != '>'))) { 3262 if (len + 5 >= size) { 3263 size *= 2; 3264 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3265 if (buf == NULL) { 3266 xmlGenericError(xmlGenericErrorContext, 3267 "realloc of %d byte failed\n", size); 3268 ctxt->instate = state; 3269 return; 3270 } 3271 } 3272 count++; 3273 if (count > 50) { 3274 GROW; 3275 count = 0; 3276 } 3277 COPY_BUF(l,buf,len,cur); 3278 NEXTL(l); 3279 cur = CUR_CHAR(l); 3280 if (cur == 0) { 3281 SHRINK; 3282 GROW; 3283 cur = CUR_CHAR(l); 3284 } 3285 } 3286 buf[len] = 0; 3287 if (cur != '?') { 3288 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3290 ctxt->sax->error(ctxt->userData, 3291 "xmlParsePI: PI %s never end ...\n", target); 3292 ctxt->wellFormed = 0; 3293 ctxt->disableSAX = 1; 3294 } else { 3295 if (input != ctxt->input) { 3296 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3298 ctxt->sax->error(ctxt->userData, 3299 "PI declaration doesn't start and stop in the same entity\n"); 3300 ctxt->wellFormed = 0; 3301 ctxt->disableSAX = 1; 3302 } 3303 SKIP(2); 3304 3305#ifdef LIBXML_CATALOG_ENABLED 3306 if (((state == XML_PARSER_MISC) || 3307 (state == XML_PARSER_START)) && 3308 (xmlStrEqual(target, XML_CATALOG_PI))) { 3309 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3310 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3311 (allow == XML_CATA_ALLOW_ALL)) 3312 xmlParseCatalogPI(ctxt, buf); 3313 } 3314#endif 3315 3316 3317 /* 3318 * SAX: PI detected. 3319 */ 3320 if ((ctxt->sax) && (!ctxt->disableSAX) && 3321 (ctxt->sax->processingInstruction != NULL)) 3322 ctxt->sax->processingInstruction(ctxt->userData, 3323 target, buf); 3324 } 3325 xmlFree(buf); 3326 xmlFree(target); 3327 } else { 3328 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3330 ctxt->sax->error(ctxt->userData, 3331 "xmlParsePI : no target name\n"); 3332 ctxt->wellFormed = 0; 3333 ctxt->disableSAX = 1; 3334 } 3335 ctxt->instate = state; 3336 } 3337} 3338 3339/** 3340 * xmlParseNotationDecl: 3341 * @ctxt: an XML parser context 3342 * 3343 * parse a notation declaration 3344 * 3345 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3346 * 3347 * Hence there is actually 3 choices: 3348 * 'PUBLIC' S PubidLiteral 3349 * 'PUBLIC' S PubidLiteral S SystemLiteral 3350 * and 'SYSTEM' S SystemLiteral 3351 * 3352 * See the NOTE on xmlParseExternalID(). 3353 */ 3354 3355void 3356xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3357 xmlChar *name; 3358 xmlChar *Pubid; 3359 xmlChar *Systemid; 3360 3361 if ((RAW == '<') && (NXT(1) == '!') && 3362 (NXT(2) == 'N') && (NXT(3) == 'O') && 3363 (NXT(4) == 'T') && (NXT(5) == 'A') && 3364 (NXT(6) == 'T') && (NXT(7) == 'I') && 3365 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3366 xmlParserInputPtr input = ctxt->input; 3367 SHRINK; 3368 SKIP(10); 3369 if (!IS_BLANK(CUR)) { 3370 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3372 ctxt->sax->error(ctxt->userData, 3373 "Space required after '<!NOTATION'\n"); 3374 ctxt->wellFormed = 0; 3375 ctxt->disableSAX = 1; 3376 return; 3377 } 3378 SKIP_BLANKS; 3379 3380 name = xmlParseName(ctxt); 3381 if (name == NULL) { 3382 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3384 ctxt->sax->error(ctxt->userData, 3385 "NOTATION: Name expected here\n"); 3386 ctxt->wellFormed = 0; 3387 ctxt->disableSAX = 1; 3388 return; 3389 } 3390 if (!IS_BLANK(CUR)) { 3391 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3393 ctxt->sax->error(ctxt->userData, 3394 "Space required after the NOTATION name'\n"); 3395 ctxt->wellFormed = 0; 3396 ctxt->disableSAX = 1; 3397 return; 3398 } 3399 SKIP_BLANKS; 3400 3401 /* 3402 * Parse the IDs. 3403 */ 3404 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3405 SKIP_BLANKS; 3406 3407 if (RAW == '>') { 3408 if (input != ctxt->input) { 3409 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3411 ctxt->sax->error(ctxt->userData, 3412"Notation declaration doesn't start and stop in the same entity\n"); 3413 ctxt->wellFormed = 0; 3414 ctxt->disableSAX = 1; 3415 } 3416 NEXT; 3417 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3418 (ctxt->sax->notationDecl != NULL)) 3419 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3420 } else { 3421 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3423 ctxt->sax->error(ctxt->userData, 3424 "'>' required to close NOTATION declaration\n"); 3425 ctxt->wellFormed = 0; 3426 ctxt->disableSAX = 1; 3427 } 3428 xmlFree(name); 3429 if (Systemid != NULL) xmlFree(Systemid); 3430 if (Pubid != NULL) xmlFree(Pubid); 3431 } 3432} 3433 3434/** 3435 * xmlParseEntityDecl: 3436 * @ctxt: an XML parser context 3437 * 3438 * parse <!ENTITY declarations 3439 * 3440 * [70] EntityDecl ::= GEDecl | PEDecl 3441 * 3442 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3443 * 3444 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3445 * 3446 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3447 * 3448 * [74] PEDef ::= EntityValue | ExternalID 3449 * 3450 * [76] NDataDecl ::= S 'NDATA' S Name 3451 * 3452 * [ VC: Notation Declared ] 3453 * The Name must match the declared name of a notation. 3454 */ 3455 3456void 3457xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3458 xmlChar *name = NULL; 3459 xmlChar *value = NULL; 3460 xmlChar *URI = NULL, *literal = NULL; 3461 xmlChar *ndata = NULL; 3462 int isParameter = 0; 3463 xmlChar *orig = NULL; 3464 int skipped; 3465 3466 GROW; 3467 if ((RAW == '<') && (NXT(1) == '!') && 3468 (NXT(2) == 'E') && (NXT(3) == 'N') && 3469 (NXT(4) == 'T') && (NXT(5) == 'I') && 3470 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3471 xmlParserInputPtr input = ctxt->input; 3472 SHRINK; 3473 SKIP(8); 3474 skipped = SKIP_BLANKS; 3475 if (skipped == 0) { 3476 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3478 ctxt->sax->error(ctxt->userData, 3479 "Space required after '<!ENTITY'\n"); 3480 ctxt->wellFormed = 0; 3481 ctxt->disableSAX = 1; 3482 } 3483 3484 if (RAW == '%') { 3485 NEXT; 3486 skipped = SKIP_BLANKS; 3487 if (skipped == 0) { 3488 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3490 ctxt->sax->error(ctxt->userData, 3491 "Space required after '%'\n"); 3492 ctxt->wellFormed = 0; 3493 ctxt->disableSAX = 1; 3494 } 3495 isParameter = 1; 3496 } 3497 3498 name = xmlParseName(ctxt); 3499 if (name == NULL) { 3500 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3502 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3503 ctxt->wellFormed = 0; 3504 ctxt->disableSAX = 1; 3505 return; 3506 } 3507 skipped = SKIP_BLANKS; 3508 if (skipped == 0) { 3509 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3511 ctxt->sax->error(ctxt->userData, 3512 "Space required after the entity name\n"); 3513 ctxt->wellFormed = 0; 3514 ctxt->disableSAX = 1; 3515 } 3516 3517 ctxt->instate = XML_PARSER_ENTITY_DECL; 3518 /* 3519 * handle the various case of definitions... 3520 */ 3521 if (isParameter) { 3522 if ((RAW == '"') || (RAW == '\'')) { 3523 value = xmlParseEntityValue(ctxt, &orig); 3524 if (value) { 3525 if ((ctxt->sax != NULL) && 3526 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3527 ctxt->sax->entityDecl(ctxt->userData, name, 3528 XML_INTERNAL_PARAMETER_ENTITY, 3529 NULL, NULL, value); 3530 } 3531 } else { 3532 URI = xmlParseExternalID(ctxt, &literal, 1); 3533 if ((URI == NULL) && (literal == NULL)) { 3534 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3536 ctxt->sax->error(ctxt->userData, 3537 "Entity value required\n"); 3538 ctxt->wellFormed = 0; 3539 ctxt->disableSAX = 1; 3540 } 3541 if (URI) { 3542 xmlURIPtr uri; 3543 3544 uri = xmlParseURI((const char *) URI); 3545 if (uri == NULL) { 3546 ctxt->errNo = XML_ERR_INVALID_URI; 3547 if ((ctxt->sax != NULL) && 3548 (!ctxt->disableSAX) && 3549 (ctxt->sax->error != NULL)) 3550 ctxt->sax->error(ctxt->userData, 3551 "Invalid URI: %s\n", URI); 3552 /* 3553 * This really ought to be a well formedness error 3554 * but the XML Core WG decided otherwise c.f. issue 3555 * E26 of the XML erratas. 3556 */ 3557 } else { 3558 if (uri->fragment != NULL) { 3559 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3560 if ((ctxt->sax != NULL) && 3561 (!ctxt->disableSAX) && 3562 (ctxt->sax->error != NULL)) 3563 ctxt->sax->error(ctxt->userData, 3564 "Fragment not allowed: %s\n", URI); 3565 /* 3566 * Okay this is foolish to block those but not 3567 * invalid URIs. 3568 */ 3569 ctxt->wellFormed = 0; 3570 } else { 3571 if ((ctxt->sax != NULL) && 3572 (!ctxt->disableSAX) && 3573 (ctxt->sax->entityDecl != NULL)) 3574 ctxt->sax->entityDecl(ctxt->userData, name, 3575 XML_EXTERNAL_PARAMETER_ENTITY, 3576 literal, URI, NULL); 3577 } 3578 xmlFreeURI(uri); 3579 } 3580 } 3581 } 3582 } else { 3583 if ((RAW == '"') || (RAW == '\'')) { 3584 value = xmlParseEntityValue(ctxt, &orig); 3585 if ((ctxt->sax != NULL) && 3586 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3587 ctxt->sax->entityDecl(ctxt->userData, name, 3588 XML_INTERNAL_GENERAL_ENTITY, 3589 NULL, NULL, value); 3590 /* 3591 * For expat compatibility in SAX mode. 3592 */ 3593 if ((ctxt->myDoc == NULL) || 3594 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 3595 if (ctxt->myDoc == NULL) { 3596 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3597 } 3598 if (ctxt->myDoc->intSubset == NULL) 3599 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3600 BAD_CAST "fake", NULL, NULL); 3601 3602 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 3603 NULL, NULL, value); 3604 } 3605 } else { 3606 URI = xmlParseExternalID(ctxt, &literal, 1); 3607 if ((URI == NULL) && (literal == NULL)) { 3608 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3610 ctxt->sax->error(ctxt->userData, 3611 "Entity value required\n"); 3612 ctxt->wellFormed = 0; 3613 ctxt->disableSAX = 1; 3614 } 3615 if (URI) { 3616 xmlURIPtr uri; 3617 3618 uri = xmlParseURI((const char *)URI); 3619 if (uri == NULL) { 3620 ctxt->errNo = XML_ERR_INVALID_URI; 3621 if ((ctxt->sax != NULL) && 3622 (!ctxt->disableSAX) && 3623 (ctxt->sax->error != NULL)) 3624 ctxt->sax->error(ctxt->userData, 3625 "Invalid URI: %s\n", URI); 3626 /* 3627 * This really ought to be a well formedness error 3628 * but the XML Core WG decided otherwise c.f. issue 3629 * E26 of the XML erratas. 3630 */ 3631 } else { 3632 if (uri->fragment != NULL) { 3633 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3634 if ((ctxt->sax != NULL) && 3635 (!ctxt->disableSAX) && 3636 (ctxt->sax->error != NULL)) 3637 ctxt->sax->error(ctxt->userData, 3638 "Fragment not allowed: %s\n", URI); 3639 /* 3640 * Okay this is foolish to block those but not 3641 * invalid URIs. 3642 */ 3643 ctxt->wellFormed = 0; 3644 } 3645 xmlFreeURI(uri); 3646 } 3647 } 3648 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3649 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3651 ctxt->sax->error(ctxt->userData, 3652 "Space required before 'NDATA'\n"); 3653 ctxt->wellFormed = 0; 3654 ctxt->disableSAX = 1; 3655 } 3656 SKIP_BLANKS; 3657 if ((RAW == 'N') && (NXT(1) == 'D') && 3658 (NXT(2) == 'A') && (NXT(3) == 'T') && 3659 (NXT(4) == 'A')) { 3660 SKIP(5); 3661 if (!IS_BLANK(CUR)) { 3662 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3664 ctxt->sax->error(ctxt->userData, 3665 "Space required after 'NDATA'\n"); 3666 ctxt->wellFormed = 0; 3667 ctxt->disableSAX = 1; 3668 } 3669 SKIP_BLANKS; 3670 ndata = xmlParseName(ctxt); 3671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3672 (ctxt->sax->unparsedEntityDecl != NULL)) 3673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3674 literal, URI, ndata); 3675 } else { 3676 if ((ctxt->sax != NULL) && 3677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3678 ctxt->sax->entityDecl(ctxt->userData, name, 3679 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3680 literal, URI, NULL); 3681 /* 3682 * For expat compatibility in SAX mode. 3683 * assuming the entity repalcement was asked for 3684 */ 3685 if ((ctxt->replaceEntities != 0) && 3686 ((ctxt->myDoc == NULL) || 3687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 3688 if (ctxt->myDoc == NULL) { 3689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3690 } 3691 3692 if (ctxt->myDoc->intSubset == NULL) 3693 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3694 BAD_CAST "fake", NULL, NULL); 3695 entityDecl(ctxt, name, 3696 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3697 literal, URI, NULL); 3698 } 3699 } 3700 } 3701 } 3702 SKIP_BLANKS; 3703 if (RAW != '>') { 3704 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3706 ctxt->sax->error(ctxt->userData, 3707 "xmlParseEntityDecl: entity %s not terminated\n", name); 3708 ctxt->wellFormed = 0; 3709 ctxt->disableSAX = 1; 3710 } else { 3711 if (input != ctxt->input) { 3712 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3714 ctxt->sax->error(ctxt->userData, 3715"Entity declaration doesn't start and stop in the same entity\n"); 3716 ctxt->wellFormed = 0; 3717 ctxt->disableSAX = 1; 3718 } 3719 NEXT; 3720 } 3721 if (orig != NULL) { 3722 /* 3723 * Ugly mechanism to save the raw entity value. 3724 */ 3725 xmlEntityPtr cur = NULL; 3726 3727 if (isParameter) { 3728 if ((ctxt->sax != NULL) && 3729 (ctxt->sax->getParameterEntity != NULL)) 3730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3731 } else { 3732 if ((ctxt->sax != NULL) && 3733 (ctxt->sax->getEntity != NULL)) 3734 cur = ctxt->sax->getEntity(ctxt->userData, name); 3735 if ((cur == NULL) && (ctxt->userData==ctxt)) { 3736 cur = getEntity(ctxt, name); 3737 } 3738 } 3739 if (cur != NULL) { 3740 if (cur->orig != NULL) 3741 xmlFree(orig); 3742 else 3743 cur->orig = orig; 3744 } else 3745 xmlFree(orig); 3746 } 3747 if (name != NULL) xmlFree(name); 3748 if (value != NULL) xmlFree(value); 3749 if (URI != NULL) xmlFree(URI); 3750 if (literal != NULL) xmlFree(literal); 3751 if (ndata != NULL) xmlFree(ndata); 3752 } 3753} 3754 3755/** 3756 * xmlParseDefaultDecl: 3757 * @ctxt: an XML parser context 3758 * @value: Receive a possible fixed default value for the attribute 3759 * 3760 * Parse an attribute default declaration 3761 * 3762 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3763 * 3764 * [ VC: Required Attribute ] 3765 * if the default declaration is the keyword #REQUIRED, then the 3766 * attribute must be specified for all elements of the type in the 3767 * attribute-list declaration. 3768 * 3769 * [ VC: Attribute Default Legal ] 3770 * The declared default value must meet the lexical constraints of 3771 * the declared attribute type c.f. xmlValidateAttributeDecl() 3772 * 3773 * [ VC: Fixed Attribute Default ] 3774 * if an attribute has a default value declared with the #FIXED 3775 * keyword, instances of that attribute must match the default value. 3776 * 3777 * [ WFC: No < in Attribute Values ] 3778 * handled in xmlParseAttValue() 3779 * 3780 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3781 * or XML_ATTRIBUTE_FIXED. 3782 */ 3783 3784int 3785xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3786 int val; 3787 xmlChar *ret; 3788 3789 *value = NULL; 3790 if ((RAW == '#') && (NXT(1) == 'R') && 3791 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3792 (NXT(4) == 'U') && (NXT(5) == 'I') && 3793 (NXT(6) == 'R') && (NXT(7) == 'E') && 3794 (NXT(8) == 'D')) { 3795 SKIP(9); 3796 return(XML_ATTRIBUTE_REQUIRED); 3797 } 3798 if ((RAW == '#') && (NXT(1) == 'I') && 3799 (NXT(2) == 'M') && (NXT(3) == 'P') && 3800 (NXT(4) == 'L') && (NXT(5) == 'I') && 3801 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3802 SKIP(8); 3803 return(XML_ATTRIBUTE_IMPLIED); 3804 } 3805 val = XML_ATTRIBUTE_NONE; 3806 if ((RAW == '#') && (NXT(1) == 'F') && 3807 (NXT(2) == 'I') && (NXT(3) == 'X') && 3808 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3809 SKIP(6); 3810 val = XML_ATTRIBUTE_FIXED; 3811 if (!IS_BLANK(CUR)) { 3812 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3814 ctxt->sax->error(ctxt->userData, 3815 "Space required after '#FIXED'\n"); 3816 ctxt->wellFormed = 0; 3817 ctxt->disableSAX = 1; 3818 } 3819 SKIP_BLANKS; 3820 } 3821 ret = xmlParseAttValue(ctxt); 3822 ctxt->instate = XML_PARSER_DTD; 3823 if (ret == NULL) { 3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3825 ctxt->sax->error(ctxt->userData, 3826 "Attribute default value declaration error\n"); 3827 ctxt->wellFormed = 0; 3828 ctxt->disableSAX = 1; 3829 } else 3830 *value = ret; 3831 return(val); 3832} 3833 3834/** 3835 * xmlParseNotationType: 3836 * @ctxt: an XML parser context 3837 * 3838 * parse an Notation attribute type. 3839 * 3840 * Note: the leading 'NOTATION' S part has already being parsed... 3841 * 3842 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3843 * 3844 * [ VC: Notation Attributes ] 3845 * Values of this type must match one of the notation names included 3846 * in the declaration; all notation names in the declaration must be declared. 3847 * 3848 * Returns: the notation attribute tree built while parsing 3849 */ 3850 3851xmlEnumerationPtr 3852xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3853 xmlChar *name; 3854 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3855 3856 if (RAW != '(') { 3857 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3859 ctxt->sax->error(ctxt->userData, 3860 "'(' required to start 'NOTATION'\n"); 3861 ctxt->wellFormed = 0; 3862 ctxt->disableSAX = 1; 3863 return(NULL); 3864 } 3865 SHRINK; 3866 do { 3867 NEXT; 3868 SKIP_BLANKS; 3869 name = xmlParseName(ctxt); 3870 if (name == NULL) { 3871 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3873 ctxt->sax->error(ctxt->userData, 3874 "Name expected in NOTATION declaration\n"); 3875 ctxt->wellFormed = 0; 3876 ctxt->disableSAX = 1; 3877 return(ret); 3878 } 3879 cur = xmlCreateEnumeration(name); 3880 xmlFree(name); 3881 if (cur == NULL) return(ret); 3882 if (last == NULL) ret = last = cur; 3883 else { 3884 last->next = cur; 3885 last = cur; 3886 } 3887 SKIP_BLANKS; 3888 } while (RAW == '|'); 3889 if (RAW != ')') { 3890 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3892 ctxt->sax->error(ctxt->userData, 3893 "')' required to finish NOTATION declaration\n"); 3894 ctxt->wellFormed = 0; 3895 ctxt->disableSAX = 1; 3896 if ((last != NULL) && (last != ret)) 3897 xmlFreeEnumeration(last); 3898 return(ret); 3899 } 3900 NEXT; 3901 return(ret); 3902} 3903 3904/** 3905 * xmlParseEnumerationType: 3906 * @ctxt: an XML parser context 3907 * 3908 * parse an Enumeration attribute type. 3909 * 3910 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3911 * 3912 * [ VC: Enumeration ] 3913 * Values of this type must match one of the Nmtoken tokens in 3914 * the declaration 3915 * 3916 * Returns: the enumeration attribute tree built while parsing 3917 */ 3918 3919xmlEnumerationPtr 3920xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3921 xmlChar *name; 3922 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3923 3924 if (RAW != '(') { 3925 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3927 ctxt->sax->error(ctxt->userData, 3928 "'(' required to start ATTLIST enumeration\n"); 3929 ctxt->wellFormed = 0; 3930 ctxt->disableSAX = 1; 3931 return(NULL); 3932 } 3933 SHRINK; 3934 do { 3935 NEXT; 3936 SKIP_BLANKS; 3937 name = xmlParseNmtoken(ctxt); 3938 if (name == NULL) { 3939 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3941 ctxt->sax->error(ctxt->userData, 3942 "NmToken expected in ATTLIST enumeration\n"); 3943 ctxt->wellFormed = 0; 3944 ctxt->disableSAX = 1; 3945 return(ret); 3946 } 3947 cur = xmlCreateEnumeration(name); 3948 xmlFree(name); 3949 if (cur == NULL) return(ret); 3950 if (last == NULL) ret = last = cur; 3951 else { 3952 last->next = cur; 3953 last = cur; 3954 } 3955 SKIP_BLANKS; 3956 } while (RAW == '|'); 3957 if (RAW != ')') { 3958 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3960 ctxt->sax->error(ctxt->userData, 3961 "')' required to finish ATTLIST enumeration\n"); 3962 ctxt->wellFormed = 0; 3963 ctxt->disableSAX = 1; 3964 return(ret); 3965 } 3966 NEXT; 3967 return(ret); 3968} 3969 3970/** 3971 * xmlParseEnumeratedType: 3972 * @ctxt: an XML parser context 3973 * @tree: the enumeration tree built while parsing 3974 * 3975 * parse an Enumerated attribute type. 3976 * 3977 * [57] EnumeratedType ::= NotationType | Enumeration 3978 * 3979 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3980 * 3981 * 3982 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3983 */ 3984 3985int 3986xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3987 if ((RAW == 'N') && (NXT(1) == 'O') && 3988 (NXT(2) == 'T') && (NXT(3) == 'A') && 3989 (NXT(4) == 'T') && (NXT(5) == 'I') && 3990 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3991 SKIP(8); 3992 if (!IS_BLANK(CUR)) { 3993 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3995 ctxt->sax->error(ctxt->userData, 3996 "Space required after 'NOTATION'\n"); 3997 ctxt->wellFormed = 0; 3998 ctxt->disableSAX = 1; 3999 return(0); 4000 } 4001 SKIP_BLANKS; 4002 *tree = xmlParseNotationType(ctxt); 4003 if (*tree == NULL) return(0); 4004 return(XML_ATTRIBUTE_NOTATION); 4005 } 4006 *tree = xmlParseEnumerationType(ctxt); 4007 if (*tree == NULL) return(0); 4008 return(XML_ATTRIBUTE_ENUMERATION); 4009} 4010 4011/** 4012 * xmlParseAttributeType: 4013 * @ctxt: an XML parser context 4014 * @tree: the enumeration tree built while parsing 4015 * 4016 * parse the Attribute list def for an element 4017 * 4018 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4019 * 4020 * [55] StringType ::= 'CDATA' 4021 * 4022 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4023 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4024 * 4025 * Validity constraints for attribute values syntax are checked in 4026 * xmlValidateAttributeValue() 4027 * 4028 * [ VC: ID ] 4029 * Values of type ID must match the Name production. A name must not 4030 * appear more than once in an XML document as a value of this type; 4031 * i.e., ID values must uniquely identify the elements which bear them. 4032 * 4033 * [ VC: One ID per Element Type ] 4034 * No element type may have more than one ID attribute specified. 4035 * 4036 * [ VC: ID Attribute Default ] 4037 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4038 * 4039 * [ VC: IDREF ] 4040 * Values of type IDREF must match the Name production, and values 4041 * of type IDREFS must match Names; each IDREF Name must match the value 4042 * of an ID attribute on some element in the XML document; i.e. IDREF 4043 * values must match the value of some ID attribute. 4044 * 4045 * [ VC: Entity Name ] 4046 * Values of type ENTITY must match the Name production, values 4047 * of type ENTITIES must match Names; each Entity Name must match the 4048 * name of an unparsed entity declared in the DTD. 4049 * 4050 * [ VC: Name Token ] 4051 * Values of type NMTOKEN must match the Nmtoken production; values 4052 * of type NMTOKENS must match Nmtokens. 4053 * 4054 * Returns the attribute type 4055 */ 4056int 4057xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4058 SHRINK; 4059 if ((RAW == 'C') && (NXT(1) == 'D') && 4060 (NXT(2) == 'A') && (NXT(3) == 'T') && 4061 (NXT(4) == 'A')) { 4062 SKIP(5); 4063 return(XML_ATTRIBUTE_CDATA); 4064 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4065 (NXT(2) == 'R') && (NXT(3) == 'E') && 4066 (NXT(4) == 'F') && (NXT(5) == 'S')) { 4067 SKIP(6); 4068 return(XML_ATTRIBUTE_IDREFS); 4069 } else if ((RAW == 'I') && (NXT(1) == 'D') && 4070 (NXT(2) == 'R') && (NXT(3) == 'E') && 4071 (NXT(4) == 'F')) { 4072 SKIP(5); 4073 return(XML_ATTRIBUTE_IDREF); 4074 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4075 SKIP(2); 4076 return(XML_ATTRIBUTE_ID); 4077 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4078 (NXT(2) == 'T') && (NXT(3) == 'I') && 4079 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 4080 SKIP(6); 4081 return(XML_ATTRIBUTE_ENTITY); 4082 } else if ((RAW == 'E') && (NXT(1) == 'N') && 4083 (NXT(2) == 'T') && (NXT(3) == 'I') && 4084 (NXT(4) == 'T') && (NXT(5) == 'I') && 4085 (NXT(6) == 'E') && (NXT(7) == 'S')) { 4086 SKIP(8); 4087 return(XML_ATTRIBUTE_ENTITIES); 4088 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4089 (NXT(2) == 'T') && (NXT(3) == 'O') && 4090 (NXT(4) == 'K') && (NXT(5) == 'E') && 4091 (NXT(6) == 'N') && (NXT(7) == 'S')) { 4092 SKIP(8); 4093 return(XML_ATTRIBUTE_NMTOKENS); 4094 } else if ((RAW == 'N') && (NXT(1) == 'M') && 4095 (NXT(2) == 'T') && (NXT(3) == 'O') && 4096 (NXT(4) == 'K') && (NXT(5) == 'E') && 4097 (NXT(6) == 'N')) { 4098 SKIP(7); 4099 return(XML_ATTRIBUTE_NMTOKEN); 4100 } 4101 return(xmlParseEnumeratedType(ctxt, tree)); 4102} 4103 4104/** 4105 * xmlParseAttributeListDecl: 4106 * @ctxt: an XML parser context 4107 * 4108 * : parse the Attribute list def for an element 4109 * 4110 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4111 * 4112 * [53] AttDef ::= S Name S AttType S DefaultDecl 4113 * 4114 */ 4115void 4116xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4117 xmlChar *elemName; 4118 xmlChar *attrName; 4119 xmlEnumerationPtr tree; 4120 4121 if ((RAW == '<') && (NXT(1) == '!') && 4122 (NXT(2) == 'A') && (NXT(3) == 'T') && 4123 (NXT(4) == 'T') && (NXT(5) == 'L') && 4124 (NXT(6) == 'I') && (NXT(7) == 'S') && 4125 (NXT(8) == 'T')) { 4126 xmlParserInputPtr input = ctxt->input; 4127 4128 SKIP(9); 4129 if (!IS_BLANK(CUR)) { 4130 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4132 ctxt->sax->error(ctxt->userData, 4133 "Space required after '<!ATTLIST'\n"); 4134 ctxt->wellFormed = 0; 4135 ctxt->disableSAX = 1; 4136 } 4137 SKIP_BLANKS; 4138 elemName = xmlParseName(ctxt); 4139 if (elemName == NULL) { 4140 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4142 ctxt->sax->error(ctxt->userData, 4143 "ATTLIST: no name for Element\n"); 4144 ctxt->wellFormed = 0; 4145 ctxt->disableSAX = 1; 4146 return; 4147 } 4148 SKIP_BLANKS; 4149 GROW; 4150 while (RAW != '>') { 4151 const xmlChar *check = CUR_PTR; 4152 int type; 4153 int def; 4154 xmlChar *defaultValue = NULL; 4155 4156 GROW; 4157 tree = NULL; 4158 attrName = xmlParseName(ctxt); 4159 if (attrName == NULL) { 4160 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4162 ctxt->sax->error(ctxt->userData, 4163 "ATTLIST: no name for Attribute\n"); 4164 ctxt->wellFormed = 0; 4165 ctxt->disableSAX = 1; 4166 break; 4167 } 4168 GROW; 4169 if (!IS_BLANK(CUR)) { 4170 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4172 ctxt->sax->error(ctxt->userData, 4173 "Space required after the attribute name\n"); 4174 ctxt->wellFormed = 0; 4175 ctxt->disableSAX = 1; 4176 if (attrName != NULL) 4177 xmlFree(attrName); 4178 if (defaultValue != NULL) 4179 xmlFree(defaultValue); 4180 break; 4181 } 4182 SKIP_BLANKS; 4183 4184 type = xmlParseAttributeType(ctxt, &tree); 4185 if (type <= 0) { 4186 if (attrName != NULL) 4187 xmlFree(attrName); 4188 if (defaultValue != NULL) 4189 xmlFree(defaultValue); 4190 break; 4191 } 4192 4193 GROW; 4194 if (!IS_BLANK(CUR)) { 4195 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4197 ctxt->sax->error(ctxt->userData, 4198 "Space required after the attribute type\n"); 4199 ctxt->wellFormed = 0; 4200 ctxt->disableSAX = 1; 4201 if (attrName != NULL) 4202 xmlFree(attrName); 4203 if (defaultValue != NULL) 4204 xmlFree(defaultValue); 4205 if (tree != NULL) 4206 xmlFreeEnumeration(tree); 4207 break; 4208 } 4209 SKIP_BLANKS; 4210 4211 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4212 if (def <= 0) { 4213 if (attrName != NULL) 4214 xmlFree(attrName); 4215 if (defaultValue != NULL) 4216 xmlFree(defaultValue); 4217 if (tree != NULL) 4218 xmlFreeEnumeration(tree); 4219 break; 4220 } 4221 4222 GROW; 4223 if (RAW != '>') { 4224 if (!IS_BLANK(CUR)) { 4225 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4227 ctxt->sax->error(ctxt->userData, 4228 "Space required after the attribute default value\n"); 4229 ctxt->wellFormed = 0; 4230 ctxt->disableSAX = 1; 4231 if (attrName != NULL) 4232 xmlFree(attrName); 4233 if (defaultValue != NULL) 4234 xmlFree(defaultValue); 4235 if (tree != NULL) 4236 xmlFreeEnumeration(tree); 4237 break; 4238 } 4239 SKIP_BLANKS; 4240 } 4241 if (check == CUR_PTR) { 4242 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4244 ctxt->sax->error(ctxt->userData, 4245 "xmlParseAttributeListDecl: detected internal error\n"); 4246 if (attrName != NULL) 4247 xmlFree(attrName); 4248 if (defaultValue != NULL) 4249 xmlFree(defaultValue); 4250 if (tree != NULL) 4251 xmlFreeEnumeration(tree); 4252 break; 4253 } 4254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4255 (ctxt->sax->attributeDecl != NULL)) 4256 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4257 type, def, defaultValue, tree); 4258 if (attrName != NULL) 4259 xmlFree(attrName); 4260 if (defaultValue != NULL) 4261 xmlFree(defaultValue); 4262 GROW; 4263 } 4264 if (RAW == '>') { 4265 if (input != ctxt->input) { 4266 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4268 ctxt->sax->error(ctxt->userData, 4269"Attribute list declaration doesn't start and stop in the same entity\n"); 4270 ctxt->wellFormed = 0; 4271 ctxt->disableSAX = 1; 4272 } 4273 NEXT; 4274 } 4275 4276 xmlFree(elemName); 4277 } 4278} 4279 4280/** 4281 * xmlParseElementMixedContentDecl: 4282 * @ctxt: an XML parser context 4283 * 4284 * parse the declaration for a Mixed Element content 4285 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4286 * 4287 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4288 * '(' S? '#PCDATA' S? ')' 4289 * 4290 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4291 * 4292 * [ VC: No Duplicate Types ] 4293 * The same name must not appear more than once in a single 4294 * mixed-content declaration. 4295 * 4296 * returns: the list of the xmlElementContentPtr describing the element choices 4297 */ 4298xmlElementContentPtr 4299xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { 4300 xmlElementContentPtr ret = NULL, cur = NULL, n; 4301 xmlChar *elem = NULL; 4302 4303 GROW; 4304 if ((RAW == '#') && (NXT(1) == 'P') && 4305 (NXT(2) == 'C') && (NXT(3) == 'D') && 4306 (NXT(4) == 'A') && (NXT(5) == 'T') && 4307 (NXT(6) == 'A')) { 4308 SKIP(7); 4309 SKIP_BLANKS; 4310 SHRINK; 4311 if (RAW == ')') { 4312 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4313 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4314 if (ctxt->vctxt.error != NULL) 4315 ctxt->vctxt.error(ctxt->vctxt.userData, 4316"Element content declaration doesn't start and stop in the same entity\n"); 4317 ctxt->valid = 0; 4318 } 4319 NEXT; 4320 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4321 if (RAW == '*') { 4322 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4323 NEXT; 4324 } 4325 return(ret); 4326 } 4327 if ((RAW == '(') || (RAW == '|')) { 4328 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4329 if (ret == NULL) return(NULL); 4330 } 4331 while (RAW == '|') { 4332 NEXT; 4333 if (elem == NULL) { 4334 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4335 if (ret == NULL) return(NULL); 4336 ret->c1 = cur; 4337 if (cur != NULL) 4338 cur->parent = ret; 4339 cur = ret; 4340 } else { 4341 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4342 if (n == NULL) return(NULL); 4343 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4344 if (n->c1 != NULL) 4345 n->c1->parent = n; 4346 cur->c2 = n; 4347 if (n != NULL) 4348 n->parent = cur; 4349 cur = n; 4350 xmlFree(elem); 4351 } 4352 SKIP_BLANKS; 4353 elem = xmlParseName(ctxt); 4354 if (elem == NULL) { 4355 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4357 ctxt->sax->error(ctxt->userData, 4358 "xmlParseElementMixedContentDecl : Name expected\n"); 4359 ctxt->wellFormed = 0; 4360 ctxt->disableSAX = 1; 4361 xmlFreeElementContent(cur); 4362 return(NULL); 4363 } 4364 SKIP_BLANKS; 4365 GROW; 4366 } 4367 if ((RAW == ')') && (NXT(1) == '*')) { 4368 if (elem != NULL) { 4369 cur->c2 = xmlNewElementContent(elem, 4370 XML_ELEMENT_CONTENT_ELEMENT); 4371 if (cur->c2 != NULL) 4372 cur->c2->parent = cur; 4373 xmlFree(elem); 4374 } 4375 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4376 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4377 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4378 if (ctxt->vctxt.error != NULL) 4379 ctxt->vctxt.error(ctxt->vctxt.userData, 4380"Element content declaration doesn't start and stop in the same entity\n"); 4381 ctxt->valid = 0; 4382 } 4383 SKIP(2); 4384 } else { 4385 if (elem != NULL) xmlFree(elem); 4386 xmlFreeElementContent(ret); 4387 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4389 ctxt->sax->error(ctxt->userData, 4390 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4391 ctxt->wellFormed = 0; 4392 ctxt->disableSAX = 1; 4393 return(NULL); 4394 } 4395 4396 } else { 4397 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4399 ctxt->sax->error(ctxt->userData, 4400 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4401 ctxt->wellFormed = 0; 4402 ctxt->disableSAX = 1; 4403 } 4404 return(ret); 4405} 4406 4407/** 4408 * xmlParseElementChildrenContentDecl: 4409 * @ctxt: an XML parser context 4410 * 4411 * parse the declaration for a Mixed Element content 4412 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4413 * 4414 * 4415 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4416 * 4417 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4418 * 4419 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4420 * 4421 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4422 * 4423 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4424 * TODO Parameter-entity replacement text must be properly nested 4425 * with parenthesized groups. That is to say, if either of the 4426 * opening or closing parentheses in a choice, seq, or Mixed 4427 * construct is contained in the replacement text for a parameter 4428 * entity, both must be contained in the same replacement text. For 4429 * interoperability, if a parameter-entity reference appears in a 4430 * choice, seq, or Mixed construct, its replacement text should not 4431 * be empty, and neither the first nor last non-blank character of 4432 * the replacement text should be a connector (| or ,). 4433 * 4434 * Returns the tree of xmlElementContentPtr describing the element 4435 * hierarchy. 4436 */ 4437xmlElementContentPtr 4438xmlParseElementChildrenContentDecl 4439(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { 4440 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4441 xmlChar *elem; 4442 xmlChar type = 0; 4443 4444 SKIP_BLANKS; 4445 GROW; 4446 if (RAW == '(') { 4447 xmlParserInputPtr input = ctxt->input; 4448 4449 /* Recurse on first child */ 4450 NEXT; 4451 SKIP_BLANKS; 4452 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input); 4453 SKIP_BLANKS; 4454 GROW; 4455 } else { 4456 elem = xmlParseName(ctxt); 4457 if (elem == NULL) { 4458 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4460 ctxt->sax->error(ctxt->userData, 4461 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4462 ctxt->wellFormed = 0; 4463 ctxt->disableSAX = 1; 4464 return(NULL); 4465 } 4466 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4467 GROW; 4468 if (RAW == '?') { 4469 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4470 NEXT; 4471 } else if (RAW == '*') { 4472 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4473 NEXT; 4474 } else if (RAW == '+') { 4475 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4476 NEXT; 4477 } else { 4478 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4479 } 4480 xmlFree(elem); 4481 GROW; 4482 } 4483 SKIP_BLANKS; 4484 SHRINK; 4485 while (RAW != ')') { 4486 /* 4487 * Each loop we parse one separator and one element. 4488 */ 4489 if (RAW == ',') { 4490 if (type == 0) type = CUR; 4491 4492 /* 4493 * Detect "Name | Name , Name" error 4494 */ 4495 else if (type != CUR) { 4496 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4498 ctxt->sax->error(ctxt->userData, 4499 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4500 type); 4501 ctxt->wellFormed = 0; 4502 ctxt->disableSAX = 1; 4503 if ((last != NULL) && (last != ret)) 4504 xmlFreeElementContent(last); 4505 if (ret != NULL) 4506 xmlFreeElementContent(ret); 4507 return(NULL); 4508 } 4509 NEXT; 4510 4511 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4512 if (op == NULL) { 4513 if ((last != NULL) && (last != ret)) 4514 xmlFreeElementContent(last); 4515 xmlFreeElementContent(ret); 4516 return(NULL); 4517 } 4518 if (last == NULL) { 4519 op->c1 = ret; 4520 if (ret != NULL) 4521 ret->parent = op; 4522 ret = cur = op; 4523 } else { 4524 cur->c2 = op; 4525 if (op != NULL) 4526 op->parent = cur; 4527 op->c1 = last; 4528 if (last != NULL) 4529 last->parent = op; 4530 cur =op; 4531 last = NULL; 4532 } 4533 } else if (RAW == '|') { 4534 if (type == 0) type = CUR; 4535 4536 /* 4537 * Detect "Name , Name | Name" error 4538 */ 4539 else if (type != CUR) { 4540 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4542 ctxt->sax->error(ctxt->userData, 4543 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4544 type); 4545 ctxt->wellFormed = 0; 4546 ctxt->disableSAX = 1; 4547 if ((last != NULL) && (last != ret)) 4548 xmlFreeElementContent(last); 4549 if (ret != NULL) 4550 xmlFreeElementContent(ret); 4551 return(NULL); 4552 } 4553 NEXT; 4554 4555 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4556 if (op == NULL) { 4557 if ((last != NULL) && (last != ret)) 4558 xmlFreeElementContent(last); 4559 if (ret != NULL) 4560 xmlFreeElementContent(ret); 4561 return(NULL); 4562 } 4563 if (last == NULL) { 4564 op->c1 = ret; 4565 if (ret != NULL) 4566 ret->parent = op; 4567 ret = cur = op; 4568 } else { 4569 cur->c2 = op; 4570 if (op != NULL) 4571 op->parent = cur; 4572 op->c1 = last; 4573 if (last != NULL) 4574 last->parent = op; 4575 cur =op; 4576 last = NULL; 4577 } 4578 } else { 4579 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4581 ctxt->sax->error(ctxt->userData, 4582 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4583 ctxt->wellFormed = 0; 4584 ctxt->disableSAX = 1; 4585 if (ret != NULL) 4586 xmlFreeElementContent(ret); 4587 return(NULL); 4588 } 4589 GROW; 4590 SKIP_BLANKS; 4591 GROW; 4592 if (RAW == '(') { 4593 xmlParserInputPtr input = ctxt->input; 4594 /* Recurse on second child */ 4595 NEXT; 4596 SKIP_BLANKS; 4597 last = xmlParseElementChildrenContentDecl(ctxt, input); 4598 SKIP_BLANKS; 4599 } else { 4600 elem = xmlParseName(ctxt); 4601 if (elem == NULL) { 4602 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4604 ctxt->sax->error(ctxt->userData, 4605 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4606 ctxt->wellFormed = 0; 4607 ctxt->disableSAX = 1; 4608 if (ret != NULL) 4609 xmlFreeElementContent(ret); 4610 return(NULL); 4611 } 4612 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4613 xmlFree(elem); 4614 if (RAW == '?') { 4615 last->ocur = XML_ELEMENT_CONTENT_OPT; 4616 NEXT; 4617 } else if (RAW == '*') { 4618 last->ocur = XML_ELEMENT_CONTENT_MULT; 4619 NEXT; 4620 } else if (RAW == '+') { 4621 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4622 NEXT; 4623 } else { 4624 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4625 } 4626 } 4627 SKIP_BLANKS; 4628 GROW; 4629 } 4630 if ((cur != NULL) && (last != NULL)) { 4631 cur->c2 = last; 4632 if (last != NULL) 4633 last->parent = cur; 4634 } 4635 if ((ctxt->validate) && (ctxt->input != inputchk)) { 4636 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4637 if (ctxt->vctxt.error != NULL) 4638 ctxt->vctxt.error(ctxt->vctxt.userData, 4639"Element content declaration doesn't start and stop in the same entity\n"); 4640 ctxt->valid = 0; 4641 } 4642 NEXT; 4643 if (RAW == '?') { 4644 if (ret != NULL) 4645 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4646 NEXT; 4647 } else if (RAW == '*') { 4648 if (ret != NULL) { 4649 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4650 cur = ret; 4651 /* 4652 * Some normalization: 4653 * (a | b* | c?)* == (a | b | c)* 4654 */ 4655 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4656 if ((cur->c1 != NULL) && 4657 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4658 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 4659 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4660 if ((cur->c2 != NULL) && 4661 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4662 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 4663 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4664 cur = cur->c2; 4665 } 4666 } 4667 NEXT; 4668 } else if (RAW == '+') { 4669 if (ret != NULL) { 4670 int found = 0; 4671 4672 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4673 /* 4674 * Some normalization: 4675 * (a | b*)+ == (a | b)* 4676 * (a | b?)+ == (a | b)* 4677 */ 4678 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4679 if ((cur->c1 != NULL) && 4680 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4681 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 4682 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4683 found = 1; 4684 } 4685 if ((cur->c2 != NULL) && 4686 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4687 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 4688 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4689 found = 1; 4690 } 4691 cur = cur->c2; 4692 } 4693 if (found) 4694 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4695 } 4696 NEXT; 4697 } 4698 return(ret); 4699} 4700 4701/** 4702 * xmlParseElementContentDecl: 4703 * @ctxt: an XML parser context 4704 * @name: the name of the element being defined. 4705 * @result: the Element Content pointer will be stored here if any 4706 * 4707 * parse the declaration for an Element content either Mixed or Children, 4708 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4709 * 4710 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4711 * 4712 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4713 */ 4714 4715int 4716xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4717 xmlElementContentPtr *result) { 4718 4719 xmlElementContentPtr tree = NULL; 4720 xmlParserInputPtr input = ctxt->input; 4721 int res; 4722 4723 *result = NULL; 4724 4725 if (RAW != '(') { 4726 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4728 ctxt->sax->error(ctxt->userData, 4729 "xmlParseElementContentDecl : %s '(' expected\n", name); 4730 ctxt->wellFormed = 0; 4731 ctxt->disableSAX = 1; 4732 return(-1); 4733 } 4734 NEXT; 4735 GROW; 4736 SKIP_BLANKS; 4737 if ((RAW == '#') && (NXT(1) == 'P') && 4738 (NXT(2) == 'C') && (NXT(3) == 'D') && 4739 (NXT(4) == 'A') && (NXT(5) == 'T') && 4740 (NXT(6) == 'A')) { 4741 tree = xmlParseElementMixedContentDecl(ctxt, input); 4742 res = XML_ELEMENT_TYPE_MIXED; 4743 } else { 4744 tree = xmlParseElementChildrenContentDecl(ctxt, input); 4745 res = XML_ELEMENT_TYPE_ELEMENT; 4746 } 4747 SKIP_BLANKS; 4748 *result = tree; 4749 return(res); 4750} 4751 4752/** 4753 * xmlParseElementDecl: 4754 * @ctxt: an XML parser context 4755 * 4756 * parse an Element declaration. 4757 * 4758 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4759 * 4760 * [ VC: Unique Element Type Declaration ] 4761 * No element type may be declared more than once 4762 * 4763 * Returns the type of the element, or -1 in case of error 4764 */ 4765int 4766xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4767 xmlChar *name; 4768 int ret = -1; 4769 xmlElementContentPtr content = NULL; 4770 4771 GROW; 4772 if ((RAW == '<') && (NXT(1) == '!') && 4773 (NXT(2) == 'E') && (NXT(3) == 'L') && 4774 (NXT(4) == 'E') && (NXT(5) == 'M') && 4775 (NXT(6) == 'E') && (NXT(7) == 'N') && 4776 (NXT(8) == 'T')) { 4777 xmlParserInputPtr input = ctxt->input; 4778 4779 SKIP(9); 4780 if (!IS_BLANK(CUR)) { 4781 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4783 ctxt->sax->error(ctxt->userData, 4784 "Space required after 'ELEMENT'\n"); 4785 ctxt->wellFormed = 0; 4786 ctxt->disableSAX = 1; 4787 } 4788 SKIP_BLANKS; 4789 name = xmlParseName(ctxt); 4790 if (name == NULL) { 4791 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4793 ctxt->sax->error(ctxt->userData, 4794 "xmlParseElementDecl: no name for Element\n"); 4795 ctxt->wellFormed = 0; 4796 ctxt->disableSAX = 1; 4797 return(-1); 4798 } 4799 while ((RAW == 0) && (ctxt->inputNr > 1)) 4800 xmlPopInput(ctxt); 4801 if (!IS_BLANK(CUR)) { 4802 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4804 ctxt->sax->error(ctxt->userData, 4805 "Space required after the element name\n"); 4806 ctxt->wellFormed = 0; 4807 ctxt->disableSAX = 1; 4808 } 4809 SKIP_BLANKS; 4810 if ((RAW == 'E') && (NXT(1) == 'M') && 4811 (NXT(2) == 'P') && (NXT(3) == 'T') && 4812 (NXT(4) == 'Y')) { 4813 SKIP(5); 4814 /* 4815 * Element must always be empty. 4816 */ 4817 ret = XML_ELEMENT_TYPE_EMPTY; 4818 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4819 (NXT(2) == 'Y')) { 4820 SKIP(3); 4821 /* 4822 * Element is a generic container. 4823 */ 4824 ret = XML_ELEMENT_TYPE_ANY; 4825 } else if (RAW == '(') { 4826 ret = xmlParseElementContentDecl(ctxt, name, &content); 4827 } else { 4828 /* 4829 * [ WFC: PEs in Internal Subset ] error handling. 4830 */ 4831 if ((RAW == '%') && (ctxt->external == 0) && 4832 (ctxt->inputNr == 1)) { 4833 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4835 ctxt->sax->error(ctxt->userData, 4836 "PEReference: forbidden within markup decl in internal subset\n"); 4837 } else { 4838 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4840 ctxt->sax->error(ctxt->userData, 4841 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4842 } 4843 ctxt->wellFormed = 0; 4844 ctxt->disableSAX = 1; 4845 if (name != NULL) xmlFree(name); 4846 return(-1); 4847 } 4848 4849 SKIP_BLANKS; 4850 /* 4851 * Pop-up of finished entities. 4852 */ 4853 while ((RAW == 0) && (ctxt->inputNr > 1)) 4854 xmlPopInput(ctxt); 4855 SKIP_BLANKS; 4856 4857 if (RAW != '>') { 4858 ctxt->errNo = XML_ERR_GT_REQUIRED; 4859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4860 ctxt->sax->error(ctxt->userData, 4861 "xmlParseElementDecl: expected '>' at the end\n"); 4862 ctxt->wellFormed = 0; 4863 ctxt->disableSAX = 1; 4864 } else { 4865 if (input != ctxt->input) { 4866 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4868 ctxt->sax->error(ctxt->userData, 4869"Element declaration doesn't start and stop in the same entity\n"); 4870 ctxt->wellFormed = 0; 4871 ctxt->disableSAX = 1; 4872 } 4873 4874 NEXT; 4875 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4876 (ctxt->sax->elementDecl != NULL)) 4877 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4878 content); 4879 } 4880 if (content != NULL) { 4881 xmlFreeElementContent(content); 4882 } 4883 if (name != NULL) { 4884 xmlFree(name); 4885 } 4886 } 4887 return(ret); 4888} 4889 4890/** 4891 * xmlParseConditionalSections 4892 * @ctxt: an XML parser context 4893 * 4894 * [61] conditionalSect ::= includeSect | ignoreSect 4895 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4896 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4897 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4898 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4899 */ 4900 4901static void 4902xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4903 SKIP(3); 4904 SKIP_BLANKS; 4905 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4906 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4907 (NXT(6) == 'E')) { 4908 SKIP(7); 4909 SKIP_BLANKS; 4910 if (RAW != '[') { 4911 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4913 ctxt->sax->error(ctxt->userData, 4914 "XML conditional section '[' expected\n"); 4915 ctxt->wellFormed = 0; 4916 ctxt->disableSAX = 1; 4917 } else { 4918 NEXT; 4919 } 4920 if (xmlParserDebugEntities) { 4921 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4922 xmlGenericError(xmlGenericErrorContext, 4923 "%s(%d): ", ctxt->input->filename, 4924 ctxt->input->line); 4925 xmlGenericError(xmlGenericErrorContext, 4926 "Entering INCLUDE Conditional Section\n"); 4927 } 4928 4929 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4930 (NXT(2) != '>'))) { 4931 const xmlChar *check = CUR_PTR; 4932 int cons = ctxt->input->consumed; 4933 4934 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4935 xmlParseConditionalSections(ctxt); 4936 } else if (IS_BLANK(CUR)) { 4937 NEXT; 4938 } else if (RAW == '%') { 4939 xmlParsePEReference(ctxt); 4940 } else 4941 xmlParseMarkupDecl(ctxt); 4942 4943 /* 4944 * Pop-up of finished entities. 4945 */ 4946 while ((RAW == 0) && (ctxt->inputNr > 1)) 4947 xmlPopInput(ctxt); 4948 4949 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 4950 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4952 ctxt->sax->error(ctxt->userData, 4953 "Content error in the external subset\n"); 4954 ctxt->wellFormed = 0; 4955 ctxt->disableSAX = 1; 4956 break; 4957 } 4958 } 4959 if (xmlParserDebugEntities) { 4960 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4961 xmlGenericError(xmlGenericErrorContext, 4962 "%s(%d): ", ctxt->input->filename, 4963 ctxt->input->line); 4964 xmlGenericError(xmlGenericErrorContext, 4965 "Leaving INCLUDE Conditional Section\n"); 4966 } 4967 4968 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4969 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4970 int state; 4971 int instate; 4972 int depth = 0; 4973 4974 SKIP(6); 4975 SKIP_BLANKS; 4976 if (RAW != '[') { 4977 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4979 ctxt->sax->error(ctxt->userData, 4980 "XML conditional section '[' expected\n"); 4981 ctxt->wellFormed = 0; 4982 ctxt->disableSAX = 1; 4983 } else { 4984 NEXT; 4985 } 4986 if (xmlParserDebugEntities) { 4987 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4988 xmlGenericError(xmlGenericErrorContext, 4989 "%s(%d): ", ctxt->input->filename, 4990 ctxt->input->line); 4991 xmlGenericError(xmlGenericErrorContext, 4992 "Entering IGNORE Conditional Section\n"); 4993 } 4994 4995 /* 4996 * Parse up to the end of the conditional section 4997 * But disable SAX event generating DTD building in the meantime 4998 */ 4999 state = ctxt->disableSAX; 5000 instate = ctxt->instate; 5001 ctxt->disableSAX = 1; 5002 ctxt->instate = XML_PARSER_IGNORE; 5003 5004 while ((depth >= 0) && (RAW != 0)) { 5005 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5006 depth++; 5007 SKIP(3); 5008 continue; 5009 } 5010 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5011 if (--depth >= 0) SKIP(3); 5012 continue; 5013 } 5014 NEXT; 5015 continue; 5016 } 5017 5018 ctxt->disableSAX = state; 5019 ctxt->instate = instate; 5020 5021 if (xmlParserDebugEntities) { 5022 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5023 xmlGenericError(xmlGenericErrorContext, 5024 "%s(%d): ", ctxt->input->filename, 5025 ctxt->input->line); 5026 xmlGenericError(xmlGenericErrorContext, 5027 "Leaving IGNORE Conditional Section\n"); 5028 } 5029 5030 } else { 5031 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 5032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5033 ctxt->sax->error(ctxt->userData, 5034 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 5035 ctxt->wellFormed = 0; 5036 ctxt->disableSAX = 1; 5037 } 5038 5039 if (RAW == 0) 5040 SHRINK; 5041 5042 if (RAW == 0) { 5043 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 5044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5045 ctxt->sax->error(ctxt->userData, 5046 "XML conditional section not closed\n"); 5047 ctxt->wellFormed = 0; 5048 ctxt->disableSAX = 1; 5049 } else { 5050 SKIP(3); 5051 } 5052} 5053 5054/** 5055 * xmlParseMarkupDecl: 5056 * @ctxt: an XML parser context 5057 * 5058 * parse Markup declarations 5059 * 5060 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5061 * NotationDecl | PI | Comment 5062 * 5063 * [ VC: Proper Declaration/PE Nesting ] 5064 * Parameter-entity replacement text must be properly nested with 5065 * markup declarations. That is to say, if either the first character 5066 * or the last character of a markup declaration (markupdecl above) is 5067 * contained in the replacement text for a parameter-entity reference, 5068 * both must be contained in the same replacement text. 5069 * 5070 * [ WFC: PEs in Internal Subset ] 5071 * In the internal DTD subset, parameter-entity references can occur 5072 * only where markup declarations can occur, not within markup declarations. 5073 * (This does not apply to references that occur in external parameter 5074 * entities or to the external subset.) 5075 */ 5076void 5077xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5078 GROW; 5079 xmlParseElementDecl(ctxt); 5080 xmlParseAttributeListDecl(ctxt); 5081 xmlParseEntityDecl(ctxt); 5082 xmlParseNotationDecl(ctxt); 5083 xmlParsePI(ctxt); 5084 xmlParseComment(ctxt); 5085 /* 5086 * This is only for internal subset. On external entities, 5087 * the replacement is done before parsing stage 5088 */ 5089 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5090 xmlParsePEReference(ctxt); 5091 5092 /* 5093 * Conditional sections are allowed from entities included 5094 * by PE References in the internal subset. 5095 */ 5096 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5097 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5098 xmlParseConditionalSections(ctxt); 5099 } 5100 } 5101 5102 ctxt->instate = XML_PARSER_DTD; 5103} 5104 5105/** 5106 * xmlParseTextDecl: 5107 * @ctxt: an XML parser context 5108 * 5109 * parse an XML declaration header for external entities 5110 * 5111 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5112 * 5113 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5114 */ 5115 5116void 5117xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5118 xmlChar *version; 5119 5120 /* 5121 * We know that '<?xml' is here. 5122 */ 5123 if ((RAW == '<') && (NXT(1) == '?') && 5124 (NXT(2) == 'x') && (NXT(3) == 'm') && 5125 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5126 SKIP(5); 5127 } else { 5128 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 5129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5130 ctxt->sax->error(ctxt->userData, 5131 "Text declaration '<?xml' required\n"); 5132 ctxt->wellFormed = 0; 5133 ctxt->disableSAX = 1; 5134 5135 return; 5136 } 5137 5138 if (!IS_BLANK(CUR)) { 5139 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5141 ctxt->sax->error(ctxt->userData, 5142 "Space needed after '<?xml'\n"); 5143 ctxt->wellFormed = 0; 5144 ctxt->disableSAX = 1; 5145 } 5146 SKIP_BLANKS; 5147 5148 /* 5149 * We may have the VersionInfo here. 5150 */ 5151 version = xmlParseVersionInfo(ctxt); 5152 if (version == NULL) 5153 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5154 else { 5155 if (!IS_BLANK(CUR)) { 5156 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 5157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5158 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 5159 ctxt->wellFormed = 0; 5160 ctxt->disableSAX = 1; 5161 } 5162 } 5163 ctxt->input->version = version; 5164 5165 /* 5166 * We must have the encoding declaration 5167 */ 5168 xmlParseEncodingDecl(ctxt); 5169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5170 /* 5171 * The XML REC instructs us to stop parsing right here 5172 */ 5173 return; 5174 } 5175 5176 SKIP_BLANKS; 5177 if ((RAW == '?') && (NXT(1) == '>')) { 5178 SKIP(2); 5179 } else if (RAW == '>') { 5180 /* Deprecated old WD ... */ 5181 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5183 ctxt->sax->error(ctxt->userData, 5184 "XML declaration must end-up with '?>'\n"); 5185 ctxt->wellFormed = 0; 5186 ctxt->disableSAX = 1; 5187 NEXT; 5188 } else { 5189 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 5190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5191 ctxt->sax->error(ctxt->userData, 5192 "parsing XML declaration: '?>' expected\n"); 5193 ctxt->wellFormed = 0; 5194 ctxt->disableSAX = 1; 5195 MOVETO_ENDTAG(CUR_PTR); 5196 NEXT; 5197 } 5198} 5199 5200/** 5201 * xmlParseExternalSubset: 5202 * @ctxt: an XML parser context 5203 * @ExternalID: the external identifier 5204 * @SystemID: the system identifier (or URL) 5205 * 5206 * parse Markup declarations from an external subset 5207 * 5208 * [30] extSubset ::= textDecl? extSubsetDecl 5209 * 5210 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5211 */ 5212void 5213xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5214 const xmlChar *SystemID) { 5215 GROW; 5216 if ((RAW == '<') && (NXT(1) == '?') && 5217 (NXT(2) == 'x') && (NXT(3) == 'm') && 5218 (NXT(4) == 'l')) { 5219 xmlParseTextDecl(ctxt); 5220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5221 /* 5222 * The XML REC instructs us to stop parsing right here 5223 */ 5224 ctxt->instate = XML_PARSER_EOF; 5225 return; 5226 } 5227 } 5228 if (ctxt->myDoc == NULL) { 5229 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5230 } 5231 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5232 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5233 5234 ctxt->instate = XML_PARSER_DTD; 5235 ctxt->external = 1; 5236 while (((RAW == '<') && (NXT(1) == '?')) || 5237 ((RAW == '<') && (NXT(1) == '!')) || 5238 (RAW == '%') || IS_BLANK(CUR)) { 5239 const xmlChar *check = CUR_PTR; 5240 int cons = ctxt->input->consumed; 5241 5242 GROW; 5243 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5244 xmlParseConditionalSections(ctxt); 5245 } else if (IS_BLANK(CUR)) { 5246 NEXT; 5247 } else if (RAW == '%') { 5248 xmlParsePEReference(ctxt); 5249 } else 5250 xmlParseMarkupDecl(ctxt); 5251 5252 /* 5253 * Pop-up of finished entities. 5254 */ 5255 while ((RAW == 0) && (ctxt->inputNr > 1)) 5256 xmlPopInput(ctxt); 5257 5258 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5259 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5261 ctxt->sax->error(ctxt->userData, 5262 "Content error in the external subset\n"); 5263 ctxt->wellFormed = 0; 5264 ctxt->disableSAX = 1; 5265 break; 5266 } 5267 } 5268 5269 if (RAW != 0) { 5270 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5272 ctxt->sax->error(ctxt->userData, 5273 "Extra content at the end of the document\n"); 5274 ctxt->wellFormed = 0; 5275 ctxt->disableSAX = 1; 5276 } 5277 5278} 5279 5280/** 5281 * xmlParseReference: 5282 * @ctxt: an XML parser context 5283 * 5284 * parse and handle entity references in content, depending on the SAX 5285 * interface, this may end-up in a call to character() if this is a 5286 * CharRef, a predefined entity, if there is no reference() callback. 5287 * or if the parser was asked to switch to that mode. 5288 * 5289 * [67] Reference ::= EntityRef | CharRef 5290 */ 5291void 5292xmlParseReference(xmlParserCtxtPtr ctxt) { 5293 xmlEntityPtr ent; 5294 xmlChar *val; 5295 if (RAW != '&') return; 5296 5297 if (NXT(1) == '#') { 5298 int i = 0; 5299 xmlChar out[10]; 5300 int hex = NXT(2); 5301 int value = xmlParseCharRef(ctxt); 5302 5303 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5304 /* 5305 * So we are using non-UTF-8 buffers 5306 * Check that the char fit on 8bits, if not 5307 * generate a CharRef. 5308 */ 5309 if (value <= 0xFF) { 5310 out[0] = value; 5311 out[1] = 0; 5312 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5313 (!ctxt->disableSAX)) 5314 ctxt->sax->characters(ctxt->userData, out, 1); 5315 } else { 5316 if ((hex == 'x') || (hex == 'X')) 5317 snprintf((char *)out, sizeof(out), "#x%X", value); 5318 else 5319 snprintf((char *)out, sizeof(out), "#%d", value); 5320 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5321 (!ctxt->disableSAX)) 5322 ctxt->sax->reference(ctxt->userData, out); 5323 } 5324 } else { 5325 /* 5326 * Just encode the value in UTF-8 5327 */ 5328 COPY_BUF(0 ,out, i, value); 5329 out[i] = 0; 5330 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5331 (!ctxt->disableSAX)) 5332 ctxt->sax->characters(ctxt->userData, out, i); 5333 } 5334 } else { 5335 ent = xmlParseEntityRef(ctxt); 5336 if (ent == NULL) return; 5337 if (!ctxt->wellFormed) 5338 return; 5339 if ((ent->name != NULL) && 5340 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5341 xmlNodePtr list = NULL; 5342 int ret; 5343 5344 5345 /* 5346 * The first reference to the entity trigger a parsing phase 5347 * where the ent->children is filled with the result from 5348 * the parsing. 5349 */ 5350 if (ent->children == NULL) { 5351 xmlChar *value; 5352 value = ent->content; 5353 5354 /* 5355 * Check that this entity is well formed 5356 */ 5357 if ((value != NULL) && 5358 (value[1] == 0) && (value[0] == '<') && 5359 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5360 /* 5361 * DONE: get definite answer on this !!! 5362 * Lots of entity decls are used to declare a single 5363 * char 5364 * <!ENTITY lt "<"> 5365 * Which seems to be valid since 5366 * 2.4: The ampersand character (&) and the left angle 5367 * bracket (<) may appear in their literal form only 5368 * when used ... They are also legal within the literal 5369 * entity value of an internal entity declaration;i 5370 * see "4.3.2 Well-Formed Parsed Entities". 5371 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5372 * Looking at the OASIS test suite and James Clark 5373 * tests, this is broken. However the XML REC uses 5374 * it. Is the XML REC not well-formed ???? 5375 * This is a hack to avoid this problem 5376 * 5377 * ANSWER: since lt gt amp .. are already defined, 5378 * this is a redefinition and hence the fact that the 5379 * content is not well balanced is not a Wf error, this 5380 * is lousy but acceptable. 5381 */ 5382 list = xmlNewDocText(ctxt->myDoc, value); 5383 if (list != NULL) { 5384 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5385 (ent->children == NULL)) { 5386 ent->children = list; 5387 ent->last = list; 5388 list->parent = (xmlNodePtr) ent; 5389 } else { 5390 xmlFreeNodeList(list); 5391 } 5392 } else if (list != NULL) { 5393 xmlFreeNodeList(list); 5394 } 5395 } else { 5396 /* 5397 * 4.3.2: An internal general parsed entity is well-formed 5398 * if its replacement text matches the production labeled 5399 * content. 5400 */ 5401 5402 void *user_data; 5403 /* 5404 * This is a bit hackish but this seems the best 5405 * way to make sure both SAX and DOM entity support 5406 * behaves okay. 5407 */ 5408 if (ctxt->userData == ctxt) 5409 user_data = NULL; 5410 else 5411 user_data = ctxt->userData; 5412 5413 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5414 ctxt->depth++; 5415 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 5416 ctxt->sax, user_data, ctxt->depth, 5417 value, &list); 5418 ctxt->depth--; 5419 } else if (ent->etype == 5420 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5421 ctxt->depth++; 5422 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5423 ctxt->sax, user_data, ctxt->depth, 5424 ent->URI, ent->ExternalID, &list); 5425 ctxt->depth--; 5426 } else { 5427 ret = -1; 5428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5429 ctxt->sax->error(ctxt->userData, 5430 "Internal: invalid entity type\n"); 5431 } 5432 if (ret == XML_ERR_ENTITY_LOOP) { 5433 ctxt->errNo = XML_ERR_ENTITY_LOOP; 5434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5435 ctxt->sax->error(ctxt->userData, 5436 "Detected entity reference loop\n"); 5437 ctxt->wellFormed = 0; 5438 ctxt->disableSAX = 1; 5439 return; 5440 } else if ((ret == 0) && (list != NULL)) { 5441 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5442 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5443 (ent->children == NULL)) { 5444 ent->children = list; 5445 if (ctxt->replaceEntities) { 5446 /* 5447 * Prune it directly in the generated document 5448 * except for single text nodes. 5449 */ 5450 if ((list->type == XML_TEXT_NODE) && 5451 (list->next == NULL)) { 5452 list->parent = (xmlNodePtr) ent; 5453 list = NULL; 5454 } else { 5455 while (list != NULL) { 5456 list->parent = (xmlNodePtr) ctxt->node; 5457 if (list->next == NULL) 5458 ent->last = list; 5459 list = list->next; 5460 } 5461 list = ent->children; 5462 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5463 xmlAddEntityReference(ent, list, NULL); 5464 } 5465 } else { 5466 while (list != NULL) { 5467 list->parent = (xmlNodePtr) ent; 5468 if (list->next == NULL) 5469 ent->last = list; 5470 list = list->next; 5471 } 5472 } 5473 } else { 5474 xmlFreeNodeList(list); 5475 list = NULL; 5476 } 5477 } else if (ret > 0) { 5478 ctxt->errNo = ret; 5479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5480 ctxt->sax->error(ctxt->userData, 5481 "Entity value required\n"); 5482 ctxt->wellFormed = 0; 5483 ctxt->disableSAX = 1; 5484 } else if (list != NULL) { 5485 xmlFreeNodeList(list); 5486 list = NULL; 5487 } 5488 } 5489 } 5490 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5491 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5492 /* 5493 * Create a node. 5494 */ 5495 ctxt->sax->reference(ctxt->userData, ent->name); 5496 return; 5497 } else if (ctxt->replaceEntities) { 5498 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5499 /* 5500 * Seems we are generating the DOM content, do 5501 * a simple tree copy for all references except the first 5502 * In the first occurrence list contains the replacement 5503 */ 5504 if (list == NULL) { 5505 xmlNodePtr new = NULL, cur, firstChild = NULL; 5506 cur = ent->children; 5507 while (cur != NULL) { 5508 new = xmlCopyNode(cur, 1); 5509 if (firstChild == NULL){ 5510 firstChild = new; 5511 } 5512 xmlAddChild(ctxt->node, new); 5513 if (cur == ent->last) 5514 break; 5515 cur = cur->next; 5516 } 5517 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5518 xmlAddEntityReference(ent, firstChild, new); 5519 } else { 5520 /* 5521 * the name change is to avoid coalescing of the 5522 * node with a possible previous text one which 5523 * would make ent->children a dangling pointer 5524 */ 5525 if (ent->children->type == XML_TEXT_NODE) 5526 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5527 if ((ent->last != ent->children) && 5528 (ent->last->type == XML_TEXT_NODE)) 5529 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5530 xmlAddChildList(ctxt->node, ent->children); 5531 } 5532 5533 /* 5534 * This is to avoid a nasty side effect, see 5535 * characters() in SAX.c 5536 */ 5537 ctxt->nodemem = 0; 5538 ctxt->nodelen = 0; 5539 return; 5540 } else { 5541 /* 5542 * Probably running in SAX mode 5543 */ 5544 xmlParserInputPtr input; 5545 5546 input = xmlNewEntityInputStream(ctxt, ent); 5547 xmlPushInput(ctxt, input); 5548 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5549 (RAW == '<') && (NXT(1) == '?') && 5550 (NXT(2) == 'x') && (NXT(3) == 'm') && 5551 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5552 xmlParseTextDecl(ctxt); 5553 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5554 /* 5555 * The XML REC instructs us to stop parsing right here 5556 */ 5557 ctxt->instate = XML_PARSER_EOF; 5558 return; 5559 } 5560 if (input->standalone == 1) { 5561 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5563 ctxt->sax->error(ctxt->userData, 5564 "external parsed entities cannot be standalone\n"); 5565 ctxt->wellFormed = 0; 5566 ctxt->disableSAX = 1; 5567 } 5568 } 5569 return; 5570 } 5571 } 5572 } else { 5573 val = ent->content; 5574 if (val == NULL) return; 5575 /* 5576 * inline the entity. 5577 */ 5578 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5579 (!ctxt->disableSAX)) 5580 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5581 } 5582 } 5583} 5584 5585/** 5586 * xmlParseEntityRef: 5587 * @ctxt: an XML parser context 5588 * 5589 * parse ENTITY references declarations 5590 * 5591 * [68] EntityRef ::= '&' Name ';' 5592 * 5593 * [ WFC: Entity Declared ] 5594 * In a document without any DTD, a document with only an internal DTD 5595 * subset which contains no parameter entity references, or a document 5596 * with "standalone='yes'", the Name given in the entity reference 5597 * must match that in an entity declaration, except that well-formed 5598 * documents need not declare any of the following entities: amp, lt, 5599 * gt, apos, quot. The declaration of a parameter entity must precede 5600 * any reference to it. Similarly, the declaration of a general entity 5601 * must precede any reference to it which appears in a default value in an 5602 * attribute-list declaration. Note that if entities are declared in the 5603 * external subset or in external parameter entities, a non-validating 5604 * processor is not obligated to read and process their declarations; 5605 * for such documents, the rule that an entity must be declared is a 5606 * well-formedness constraint only if standalone='yes'. 5607 * 5608 * [ WFC: Parsed Entity ] 5609 * An entity reference must not contain the name of an unparsed entity 5610 * 5611 * Returns the xmlEntityPtr if found, or NULL otherwise. 5612 */ 5613xmlEntityPtr 5614xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5615 xmlChar *name; 5616 xmlEntityPtr ent = NULL; 5617 5618 GROW; 5619 5620 if (RAW == '&') { 5621 NEXT; 5622 name = xmlParseName(ctxt); 5623 if (name == NULL) { 5624 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5626 ctxt->sax->error(ctxt->userData, 5627 "xmlParseEntityRef: no name\n"); 5628 ctxt->wellFormed = 0; 5629 ctxt->disableSAX = 1; 5630 } else { 5631 if (RAW == ';') { 5632 NEXT; 5633 /* 5634 * Ask first SAX for entity resolution, otherwise try the 5635 * predefined set. 5636 */ 5637 if (ctxt->sax != NULL) { 5638 if (ctxt->sax->getEntity != NULL) 5639 ent = ctxt->sax->getEntity(ctxt->userData, name); 5640 if (ent == NULL) 5641 ent = xmlGetPredefinedEntity(name); 5642 if ((ent == NULL) && (ctxt->userData==ctxt)) { 5643 ent = getEntity(ctxt, name); 5644 } 5645 } 5646 /* 5647 * [ WFC: Entity Declared ] 5648 * In a document without any DTD, a document with only an 5649 * internal DTD subset which contains no parameter entity 5650 * references, or a document with "standalone='yes'", the 5651 * Name given in the entity reference must match that in an 5652 * entity declaration, except that well-formed documents 5653 * need not declare any of the following entities: amp, lt, 5654 * gt, apos, quot. 5655 * The declaration of a parameter entity must precede any 5656 * reference to it. 5657 * Similarly, the declaration of a general entity must 5658 * precede any reference to it which appears in a default 5659 * value in an attribute-list declaration. Note that if 5660 * entities are declared in the external subset or in 5661 * external parameter entities, a non-validating processor 5662 * is not obligated to read and process their declarations; 5663 * for such documents, the rule that an entity must be 5664 * declared is a well-formedness constraint only if 5665 * standalone='yes'. 5666 */ 5667 if (ent == NULL) { 5668 if ((ctxt->standalone == 1) || 5669 ((ctxt->hasExternalSubset == 0) && 5670 (ctxt->hasPErefs == 0))) { 5671 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5673 ctxt->sax->error(ctxt->userData, 5674 "Entity '%s' not defined\n", name); 5675 ctxt->wellFormed = 0; 5676 ctxt->valid = 0; 5677 ctxt->disableSAX = 1; 5678 } else { 5679 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5681 ctxt->sax->error(ctxt->userData, 5682 "Entity '%s' not defined\n", name); 5683 ctxt->valid = 0; 5684 } 5685 } 5686 5687 /* 5688 * [ WFC: Parsed Entity ] 5689 * An entity reference must not contain the name of an 5690 * unparsed entity 5691 */ 5692 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5693 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5695 ctxt->sax->error(ctxt->userData, 5696 "Entity reference to unparsed entity %s\n", name); 5697 ctxt->wellFormed = 0; 5698 ctxt->disableSAX = 1; 5699 } 5700 5701 /* 5702 * [ WFC: No External Entity References ] 5703 * Attribute values cannot contain direct or indirect 5704 * entity references to external entities. 5705 */ 5706 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5707 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5708 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5710 ctxt->sax->error(ctxt->userData, 5711 "Attribute references external entity '%s'\n", name); 5712 ctxt->wellFormed = 0; 5713 ctxt->disableSAX = 1; 5714 } 5715 /* 5716 * [ WFC: No < in Attribute Values ] 5717 * The replacement text of any entity referred to directly or 5718 * indirectly in an attribute value (other than "<") must 5719 * not contain a <. 5720 */ 5721 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5722 (ent != NULL) && 5723 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5724 (ent->content != NULL) && 5725 (xmlStrchr(ent->content, '<'))) { 5726 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5728 ctxt->sax->error(ctxt->userData, 5729 "'<' in entity '%s' is not allowed in attributes values\n", name); 5730 ctxt->wellFormed = 0; 5731 ctxt->disableSAX = 1; 5732 } 5733 5734 /* 5735 * Internal check, no parameter entities here ... 5736 */ 5737 else { 5738 switch (ent->etype) { 5739 case XML_INTERNAL_PARAMETER_ENTITY: 5740 case XML_EXTERNAL_PARAMETER_ENTITY: 5741 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5743 ctxt->sax->error(ctxt->userData, 5744 "Attempt to reference the parameter entity '%s'\n", name); 5745 ctxt->wellFormed = 0; 5746 ctxt->disableSAX = 1; 5747 break; 5748 default: 5749 break; 5750 } 5751 } 5752 5753 /* 5754 * [ WFC: No Recursion ] 5755 * A parsed entity must not contain a recursive reference 5756 * to itself, either directly or indirectly. 5757 * Done somewhere else 5758 */ 5759 5760 } else { 5761 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5763 ctxt->sax->error(ctxt->userData, 5764 "xmlParseEntityRef: expecting ';'\n"); 5765 ctxt->wellFormed = 0; 5766 ctxt->disableSAX = 1; 5767 } 5768 xmlFree(name); 5769 } 5770 } 5771 return(ent); 5772} 5773 5774/** 5775 * xmlParseStringEntityRef: 5776 * @ctxt: an XML parser context 5777 * @str: a pointer to an index in the string 5778 * 5779 * parse ENTITY references declarations, but this version parses it from 5780 * a string value. 5781 * 5782 * [68] EntityRef ::= '&' Name ';' 5783 * 5784 * [ WFC: Entity Declared ] 5785 * In a document without any DTD, a document with only an internal DTD 5786 * subset which contains no parameter entity references, or a document 5787 * with "standalone='yes'", the Name given in the entity reference 5788 * must match that in an entity declaration, except that well-formed 5789 * documents need not declare any of the following entities: amp, lt, 5790 * gt, apos, quot. The declaration of a parameter entity must precede 5791 * any reference to it. Similarly, the declaration of a general entity 5792 * must precede any reference to it which appears in a default value in an 5793 * attribute-list declaration. Note that if entities are declared in the 5794 * external subset or in external parameter entities, a non-validating 5795 * processor is not obligated to read and process their declarations; 5796 * for such documents, the rule that an entity must be declared is a 5797 * well-formedness constraint only if standalone='yes'. 5798 * 5799 * [ WFC: Parsed Entity ] 5800 * An entity reference must not contain the name of an unparsed entity 5801 * 5802 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5803 * is updated to the current location in the string. 5804 */ 5805xmlEntityPtr 5806xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5807 xmlChar *name; 5808 const xmlChar *ptr; 5809 xmlChar cur; 5810 xmlEntityPtr ent = NULL; 5811 5812 if ((str == NULL) || (*str == NULL)) 5813 return(NULL); 5814 ptr = *str; 5815 cur = *ptr; 5816 if (cur == '&') { 5817 ptr++; 5818 cur = *ptr; 5819 name = xmlParseStringName(ctxt, &ptr); 5820 if (name == NULL) { 5821 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5823 ctxt->sax->error(ctxt->userData, 5824 "xmlParseStringEntityRef: no name\n"); 5825 ctxt->wellFormed = 0; 5826 ctxt->disableSAX = 1; 5827 } else { 5828 if (*ptr == ';') { 5829 ptr++; 5830 /* 5831 * Ask first SAX for entity resolution, otherwise try the 5832 * predefined set. 5833 */ 5834 if (ctxt->sax != NULL) { 5835 if (ctxt->sax->getEntity != NULL) 5836 ent = ctxt->sax->getEntity(ctxt->userData, name); 5837 if (ent == NULL) 5838 ent = xmlGetPredefinedEntity(name); 5839 if ((ent == NULL) && (ctxt->userData==ctxt)) { 5840 ent = getEntity(ctxt, name); 5841 } 5842 } 5843 /* 5844 * [ WFC: Entity Declared ] 5845 * In a document without any DTD, a document with only an 5846 * internal DTD subset which contains no parameter entity 5847 * references, or a document with "standalone='yes'", the 5848 * Name given in the entity reference must match that in an 5849 * entity declaration, except that well-formed documents 5850 * need not declare any of the following entities: amp, lt, 5851 * gt, apos, quot. 5852 * The declaration of a parameter entity must precede any 5853 * reference to it. 5854 * Similarly, the declaration of a general entity must 5855 * precede any reference to it which appears in a default 5856 * value in an attribute-list declaration. Note that if 5857 * entities are declared in the external subset or in 5858 * external parameter entities, a non-validating processor 5859 * is not obligated to read and process their declarations; 5860 * for such documents, the rule that an entity must be 5861 * declared is a well-formedness constraint only if 5862 * standalone='yes'. 5863 */ 5864 if (ent == NULL) { 5865 if ((ctxt->standalone == 1) || 5866 ((ctxt->hasExternalSubset == 0) && 5867 (ctxt->hasPErefs == 0))) { 5868 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5870 ctxt->sax->error(ctxt->userData, 5871 "Entity '%s' not defined\n", name); 5872 ctxt->wellFormed = 0; 5873 ctxt->disableSAX = 1; 5874 } else { 5875 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5876 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5877 ctxt->sax->warning(ctxt->userData, 5878 "Entity '%s' not defined\n", name); 5879 } 5880 } 5881 5882 /* 5883 * [ WFC: Parsed Entity ] 5884 * An entity reference must not contain the name of an 5885 * unparsed entity 5886 */ 5887 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5888 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5890 ctxt->sax->error(ctxt->userData, 5891 "Entity reference to unparsed entity %s\n", name); 5892 ctxt->wellFormed = 0; 5893 ctxt->disableSAX = 1; 5894 } 5895 5896 /* 5897 * [ WFC: No External Entity References ] 5898 * Attribute values cannot contain direct or indirect 5899 * entity references to external entities. 5900 */ 5901 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5902 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5903 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5905 ctxt->sax->error(ctxt->userData, 5906 "Attribute references external entity '%s'\n", name); 5907 ctxt->wellFormed = 0; 5908 ctxt->disableSAX = 1; 5909 } 5910 /* 5911 * [ WFC: No < in Attribute Values ] 5912 * The replacement text of any entity referred to directly or 5913 * indirectly in an attribute value (other than "<") must 5914 * not contain a <. 5915 */ 5916 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5917 (ent != NULL) && 5918 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5919 (ent->content != NULL) && 5920 (xmlStrchr(ent->content, '<'))) { 5921 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5923 ctxt->sax->error(ctxt->userData, 5924 "'<' in entity '%s' is not allowed in attributes values\n", name); 5925 ctxt->wellFormed = 0; 5926 ctxt->disableSAX = 1; 5927 } 5928 5929 /* 5930 * Internal check, no parameter entities here ... 5931 */ 5932 else { 5933 switch (ent->etype) { 5934 case XML_INTERNAL_PARAMETER_ENTITY: 5935 case XML_EXTERNAL_PARAMETER_ENTITY: 5936 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5938 ctxt->sax->error(ctxt->userData, 5939 "Attempt to reference the parameter entity '%s'\n", name); 5940 ctxt->wellFormed = 0; 5941 ctxt->disableSAX = 1; 5942 break; 5943 default: 5944 break; 5945 } 5946 } 5947 5948 /* 5949 * [ WFC: No Recursion ] 5950 * A parsed entity must not contain a recursive reference 5951 * to itself, either directly or indirectly. 5952 * Done somewhere else 5953 */ 5954 5955 } else { 5956 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5958 ctxt->sax->error(ctxt->userData, 5959 "xmlParseStringEntityRef: expecting ';'\n"); 5960 ctxt->wellFormed = 0; 5961 ctxt->disableSAX = 1; 5962 } 5963 xmlFree(name); 5964 } 5965 } 5966 *str = ptr; 5967 return(ent); 5968} 5969 5970/** 5971 * xmlParsePEReference: 5972 * @ctxt: an XML parser context 5973 * 5974 * parse PEReference declarations 5975 * The entity content is handled directly by pushing it's content as 5976 * a new input stream. 5977 * 5978 * [69] PEReference ::= '%' Name ';' 5979 * 5980 * [ WFC: No Recursion ] 5981 * A parsed entity must not contain a recursive 5982 * reference to itself, either directly or indirectly. 5983 * 5984 * [ WFC: Entity Declared ] 5985 * In a document without any DTD, a document with only an internal DTD 5986 * subset which contains no parameter entity references, or a document 5987 * with "standalone='yes'", ... ... The declaration of a parameter 5988 * entity must precede any reference to it... 5989 * 5990 * [ VC: Entity Declared ] 5991 * In a document with an external subset or external parameter entities 5992 * with "standalone='no'", ... ... The declaration of a parameter entity 5993 * must precede any reference to it... 5994 * 5995 * [ WFC: In DTD ] 5996 * Parameter-entity references may only appear in the DTD. 5997 * NOTE: misleading but this is handled. 5998 */ 5999void 6000xmlParsePEReference(xmlParserCtxtPtr ctxt) { 6001 xmlChar *name; 6002 xmlEntityPtr entity = NULL; 6003 xmlParserInputPtr input; 6004 6005 if (RAW == '%') { 6006 NEXT; 6007 name = xmlParseName(ctxt); 6008 if (name == NULL) { 6009 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6011 ctxt->sax->error(ctxt->userData, 6012 "xmlParsePEReference: no name\n"); 6013 ctxt->wellFormed = 0; 6014 ctxt->disableSAX = 1; 6015 } else { 6016 if (RAW == ';') { 6017 NEXT; 6018 if ((ctxt->sax != NULL) && 6019 (ctxt->sax->getParameterEntity != NULL)) 6020 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6021 name); 6022 if (entity == NULL) { 6023 /* 6024 * [ WFC: Entity Declared ] 6025 * In a document without any DTD, a document with only an 6026 * internal DTD subset which contains no parameter entity 6027 * references, or a document with "standalone='yes'", ... 6028 * ... The declaration of a parameter entity must precede 6029 * any reference to it... 6030 */ 6031 if ((ctxt->standalone == 1) || 6032 ((ctxt->hasExternalSubset == 0) && 6033 (ctxt->hasPErefs == 0))) { 6034 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 6035 if ((!ctxt->disableSAX) && 6036 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6037 ctxt->sax->error(ctxt->userData, 6038 "PEReference: %%%s; not found\n", name); 6039 ctxt->wellFormed = 0; 6040 ctxt->disableSAX = 1; 6041 } else { 6042 /* 6043 * [ VC: Entity Declared ] 6044 * In a document with an external subset or external 6045 * parameter entities with "standalone='no'", ... 6046 * ... The declaration of a parameter entity must precede 6047 * any reference to it... 6048 */ 6049 if ((!ctxt->disableSAX) && 6050 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6051 ctxt->sax->warning(ctxt->userData, 6052 "PEReference: %%%s; not found\n", name); 6053 ctxt->valid = 0; 6054 } 6055 } else { 6056 /* 6057 * Internal checking in case the entity quest barfed 6058 */ 6059 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6060 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6061 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6062 ctxt->sax->warning(ctxt->userData, 6063 "Internal: %%%s; is not a parameter entity\n", name); 6064 } else if (ctxt->input->free != deallocblankswrapper) { 6065 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 6066 xmlPushInput(ctxt, input); 6067 } else { 6068 /* 6069 * TODO !!! 6070 * handle the extra spaces added before and after 6071 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6072 */ 6073 input = xmlNewEntityInputStream(ctxt, entity); 6074 xmlPushInput(ctxt, input); 6075 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6076 (RAW == '<') && (NXT(1) == '?') && 6077 (NXT(2) == 'x') && (NXT(3) == 'm') && 6078 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 6079 xmlParseTextDecl(ctxt); 6080 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6081 /* 6082 * The XML REC instructs us to stop parsing 6083 * right here 6084 */ 6085 ctxt->instate = XML_PARSER_EOF; 6086 xmlFree(name); 6087 return; 6088 } 6089 } 6090 } 6091 } 6092 ctxt->hasPErefs = 1; 6093 } else { 6094 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6096 ctxt->sax->error(ctxt->userData, 6097 "xmlParsePEReference: expecting ';'\n"); 6098 ctxt->wellFormed = 0; 6099 ctxt->disableSAX = 1; 6100 } 6101 xmlFree(name); 6102 } 6103 } 6104} 6105 6106/** 6107 * xmlParseStringPEReference: 6108 * @ctxt: an XML parser context 6109 * @str: a pointer to an index in the string 6110 * 6111 * parse PEReference declarations 6112 * 6113 * [69] PEReference ::= '%' Name ';' 6114 * 6115 * [ WFC: No Recursion ] 6116 * A parsed entity must not contain a recursive 6117 * reference to itself, either directly or indirectly. 6118 * 6119 * [ WFC: Entity Declared ] 6120 * In a document without any DTD, a document with only an internal DTD 6121 * subset which contains no parameter entity references, or a document 6122 * with "standalone='yes'", ... ... The declaration of a parameter 6123 * entity must precede any reference to it... 6124 * 6125 * [ VC: Entity Declared ] 6126 * In a document with an external subset or external parameter entities 6127 * with "standalone='no'", ... ... The declaration of a parameter entity 6128 * must precede any reference to it... 6129 * 6130 * [ WFC: In DTD ] 6131 * Parameter-entity references may only appear in the DTD. 6132 * NOTE: misleading but this is handled. 6133 * 6134 * Returns the string of the entity content. 6135 * str is updated to the current value of the index 6136 */ 6137xmlEntityPtr 6138xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6139 const xmlChar *ptr; 6140 xmlChar cur; 6141 xmlChar *name; 6142 xmlEntityPtr entity = NULL; 6143 6144 if ((str == NULL) || (*str == NULL)) return(NULL); 6145 ptr = *str; 6146 cur = *ptr; 6147 if (cur == '%') { 6148 ptr++; 6149 cur = *ptr; 6150 name = xmlParseStringName(ctxt, &ptr); 6151 if (name == NULL) { 6152 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6154 ctxt->sax->error(ctxt->userData, 6155 "xmlParseStringPEReference: no name\n"); 6156 ctxt->wellFormed = 0; 6157 ctxt->disableSAX = 1; 6158 } else { 6159 cur = *ptr; 6160 if (cur == ';') { 6161 ptr++; 6162 cur = *ptr; 6163 if ((ctxt->sax != NULL) && 6164 (ctxt->sax->getParameterEntity != NULL)) 6165 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6166 name); 6167 if (entity == NULL) { 6168 /* 6169 * [ WFC: Entity Declared ] 6170 * In a document without any DTD, a document with only an 6171 * internal DTD subset which contains no parameter entity 6172 * references, or a document with "standalone='yes'", ... 6173 * ... The declaration of a parameter entity must precede 6174 * any reference to it... 6175 */ 6176 if ((ctxt->standalone == 1) || 6177 ((ctxt->hasExternalSubset == 0) && 6178 (ctxt->hasPErefs == 0))) { 6179 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 6180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6181 ctxt->sax->error(ctxt->userData, 6182 "PEReference: %%%s; not found\n", name); 6183 ctxt->wellFormed = 0; 6184 ctxt->disableSAX = 1; 6185 } else { 6186 /* 6187 * [ VC: Entity Declared ] 6188 * In a document with an external subset or external 6189 * parameter entities with "standalone='no'", ... 6190 * ... The declaration of a parameter entity must 6191 * precede any reference to it... 6192 */ 6193 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6194 ctxt->sax->warning(ctxt->userData, 6195 "PEReference: %%%s; not found\n", name); 6196 ctxt->valid = 0; 6197 } 6198 } else { 6199 /* 6200 * Internal checking in case the entity quest barfed 6201 */ 6202 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6203 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6204 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6205 ctxt->sax->warning(ctxt->userData, 6206 "Internal: %%%s; is not a parameter entity\n", name); 6207 } 6208 } 6209 ctxt->hasPErefs = 1; 6210 } else { 6211 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 6212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6213 ctxt->sax->error(ctxt->userData, 6214 "xmlParseStringPEReference: expecting ';'\n"); 6215 ctxt->wellFormed = 0; 6216 ctxt->disableSAX = 1; 6217 } 6218 xmlFree(name); 6219 } 6220 } 6221 *str = ptr; 6222 return(entity); 6223} 6224 6225/** 6226 * xmlParseDocTypeDecl: 6227 * @ctxt: an XML parser context 6228 * 6229 * parse a DOCTYPE declaration 6230 * 6231 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6232 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6233 * 6234 * [ VC: Root Element Type ] 6235 * The Name in the document type declaration must match the element 6236 * type of the root element. 6237 */ 6238 6239void 6240xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6241 xmlChar *name = NULL; 6242 xmlChar *ExternalID = NULL; 6243 xmlChar *URI = NULL; 6244 6245 /* 6246 * We know that '<!DOCTYPE' has been detected. 6247 */ 6248 SKIP(9); 6249 6250 SKIP_BLANKS; 6251 6252 /* 6253 * Parse the DOCTYPE name. 6254 */ 6255 name = xmlParseName(ctxt); 6256 if (name == NULL) { 6257 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6259 ctxt->sax->error(ctxt->userData, 6260 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6261 ctxt->wellFormed = 0; 6262 ctxt->disableSAX = 1; 6263 } 6264 ctxt->intSubName = name; 6265 6266 SKIP_BLANKS; 6267 6268 /* 6269 * Check for SystemID and ExternalID 6270 */ 6271 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6272 6273 if ((URI != NULL) || (ExternalID != NULL)) { 6274 ctxt->hasExternalSubset = 1; 6275 } 6276 ctxt->extSubURI = URI; 6277 ctxt->extSubSystem = ExternalID; 6278 6279 SKIP_BLANKS; 6280 6281 /* 6282 * Create and update the internal subset. 6283 */ 6284 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6285 (!ctxt->disableSAX)) 6286 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6287 6288 /* 6289 * Is there any internal subset declarations ? 6290 * they are handled separately in xmlParseInternalSubset() 6291 */ 6292 if (RAW == '[') 6293 return; 6294 6295 /* 6296 * We should be at the end of the DOCTYPE declaration. 6297 */ 6298 if (RAW != '>') { 6299 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6301 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6302 ctxt->wellFormed = 0; 6303 ctxt->disableSAX = 1; 6304 } 6305 NEXT; 6306} 6307 6308/** 6309 * xmlParseInternalSubset: 6310 * @ctxt: an XML parser context 6311 * 6312 * parse the internal subset declaration 6313 * 6314 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6315 */ 6316 6317static void 6318xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6319 /* 6320 * Is there any DTD definition ? 6321 */ 6322 if (RAW == '[') { 6323 ctxt->instate = XML_PARSER_DTD; 6324 NEXT; 6325 /* 6326 * Parse the succession of Markup declarations and 6327 * PEReferences. 6328 * Subsequence (markupdecl | PEReference | S)* 6329 */ 6330 while (RAW != ']') { 6331 const xmlChar *check = CUR_PTR; 6332 int cons = ctxt->input->consumed; 6333 6334 SKIP_BLANKS; 6335 xmlParseMarkupDecl(ctxt); 6336 xmlParsePEReference(ctxt); 6337 6338 /* 6339 * Pop-up of finished entities. 6340 */ 6341 while ((RAW == 0) && (ctxt->inputNr > 1)) 6342 xmlPopInput(ctxt); 6343 6344 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6345 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6347 ctxt->sax->error(ctxt->userData, 6348 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6349 ctxt->wellFormed = 0; 6350 ctxt->disableSAX = 1; 6351 break; 6352 } 6353 } 6354 if (RAW == ']') { 6355 NEXT; 6356 SKIP_BLANKS; 6357 } 6358 } 6359 6360 /* 6361 * We should be at the end of the DOCTYPE declaration. 6362 */ 6363 if (RAW != '>') { 6364 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6366 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); 6367 ctxt->wellFormed = 0; 6368 ctxt->disableSAX = 1; 6369 } 6370 NEXT; 6371} 6372 6373/** 6374 * xmlParseAttribute: 6375 * @ctxt: an XML parser context 6376 * @value: a xmlChar ** used to store the value of the attribute 6377 * 6378 * parse an attribute 6379 * 6380 * [41] Attribute ::= Name Eq AttValue 6381 * 6382 * [ WFC: No External Entity References ] 6383 * Attribute values cannot contain direct or indirect entity references 6384 * to external entities. 6385 * 6386 * [ WFC: No < in Attribute Values ] 6387 * The replacement text of any entity referred to directly or indirectly in 6388 * an attribute value (other than "<") must not contain a <. 6389 * 6390 * [ VC: Attribute Value Type ] 6391 * The attribute must have been declared; the value must be of the type 6392 * declared for it. 6393 * 6394 * [25] Eq ::= S? '=' S? 6395 * 6396 * With namespace: 6397 * 6398 * [NS 11] Attribute ::= QName Eq AttValue 6399 * 6400 * Also the case QName == xmlns:??? is handled independently as a namespace 6401 * definition. 6402 * 6403 * Returns the attribute name, and the value in *value. 6404 */ 6405 6406xmlChar * 6407xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6408 xmlChar *name, *val; 6409 6410 *value = NULL; 6411 GROW; 6412 name = xmlParseName(ctxt); 6413 if (name == NULL) { 6414 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6416 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 6417 ctxt->wellFormed = 0; 6418 ctxt->disableSAX = 1; 6419 return(NULL); 6420 } 6421 6422 /* 6423 * read the value 6424 */ 6425 SKIP_BLANKS; 6426 if (RAW == '=') { 6427 NEXT; 6428 SKIP_BLANKS; 6429 val = xmlParseAttValue(ctxt); 6430 ctxt->instate = XML_PARSER_CONTENT; 6431 } else { 6432 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6434 ctxt->sax->error(ctxt->userData, 6435 "Specification mandate value for attribute %s\n", name); 6436 ctxt->wellFormed = 0; 6437 ctxt->disableSAX = 1; 6438 xmlFree(name); 6439 return(NULL); 6440 } 6441 6442 /* 6443 * Check that xml:lang conforms to the specification 6444 * No more registered as an error, just generate a warning now 6445 * since this was deprecated in XML second edition 6446 */ 6447 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6448 if (!xmlCheckLanguageID(val)) { 6449 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6450 ctxt->sax->warning(ctxt->userData, 6451 "Malformed value for xml:lang : %s\n", val); 6452 } 6453 } 6454 6455 /* 6456 * Check that xml:space conforms to the specification 6457 */ 6458 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6459 if (xmlStrEqual(val, BAD_CAST "default")) 6460 *(ctxt->space) = 0; 6461 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6462 *(ctxt->space) = 1; 6463 else { 6464 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6466 ctxt->sax->error(ctxt->userData, 6467"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 6468 val); 6469 ctxt->wellFormed = 0; 6470 ctxt->disableSAX = 1; 6471 } 6472 } 6473 6474 *value = val; 6475 return(name); 6476} 6477 6478/** 6479 * xmlParseStartTag: 6480 * @ctxt: an XML parser context 6481 * 6482 * parse a start of tag either for rule element or 6483 * EmptyElement. In both case we don't parse the tag closing chars. 6484 * 6485 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6486 * 6487 * [ WFC: Unique Att Spec ] 6488 * No attribute name may appear more than once in the same start-tag or 6489 * empty-element tag. 6490 * 6491 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6492 * 6493 * [ WFC: Unique Att Spec ] 6494 * No attribute name may appear more than once in the same start-tag or 6495 * empty-element tag. 6496 * 6497 * With namespace: 6498 * 6499 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6500 * 6501 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6502 * 6503 * Returns the element name parsed 6504 */ 6505 6506xmlChar * 6507xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6508 xmlChar *name; 6509 xmlChar *attname; 6510 xmlChar *attvalue; 6511 const xmlChar **atts = NULL; 6512 int nbatts = 0; 6513 int maxatts = 0; 6514 int i; 6515 6516 if (RAW != '<') return(NULL); 6517 NEXT1; 6518 6519 name = xmlParseName(ctxt); 6520 if (name == NULL) { 6521 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6523 ctxt->sax->error(ctxt->userData, 6524 "xmlParseStartTag: invalid element name\n"); 6525 ctxt->wellFormed = 0; 6526 ctxt->disableSAX = 1; 6527 return(NULL); 6528 } 6529 6530 /* 6531 * Now parse the attributes, it ends up with the ending 6532 * 6533 * (S Attribute)* S? 6534 */ 6535 SKIP_BLANKS; 6536 GROW; 6537 6538 while ((RAW != '>') && 6539 ((RAW != '/') || (NXT(1) != '>')) && 6540 (IS_CHAR(RAW))) { 6541 const xmlChar *q = CUR_PTR; 6542 int cons = ctxt->input->consumed; 6543 6544 attname = xmlParseAttribute(ctxt, &attvalue); 6545 if ((attname != NULL) && (attvalue != NULL)) { 6546 /* 6547 * [ WFC: Unique Att Spec ] 6548 * No attribute name may appear more than once in the same 6549 * start-tag or empty-element tag. 6550 */ 6551 for (i = 0; i < nbatts;i += 2) { 6552 if (xmlStrEqual(atts[i], attname)) { 6553 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6555 ctxt->sax->error(ctxt->userData, 6556 "Attribute %s redefined\n", 6557 attname); 6558 ctxt->wellFormed = 0; 6559 ctxt->disableSAX = 1; 6560 xmlFree(attname); 6561 xmlFree(attvalue); 6562 goto failed; 6563 } 6564 } 6565 6566 /* 6567 * Add the pair to atts 6568 */ 6569 if (atts == NULL) { 6570 maxatts = 10; 6571 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6572 if (atts == NULL) { 6573 xmlGenericError(xmlGenericErrorContext, 6574 "malloc of %ld byte failed\n", 6575 maxatts * (long)sizeof(xmlChar *)); 6576 return(NULL); 6577 } 6578 } else if (nbatts + 4 > maxatts) { 6579 maxatts *= 2; 6580 atts = (const xmlChar **) xmlRealloc((void *) atts, 6581 maxatts * sizeof(xmlChar *)); 6582 if (atts == NULL) { 6583 xmlGenericError(xmlGenericErrorContext, 6584 "realloc of %ld byte failed\n", 6585 maxatts * (long)sizeof(xmlChar *)); 6586 return(NULL); 6587 } 6588 } 6589 atts[nbatts++] = attname; 6590 atts[nbatts++] = attvalue; 6591 atts[nbatts] = NULL; 6592 atts[nbatts + 1] = NULL; 6593 } else { 6594 if (attname != NULL) 6595 xmlFree(attname); 6596 if (attvalue != NULL) 6597 xmlFree(attvalue); 6598 } 6599 6600failed: 6601 6602 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6603 break; 6604 if (!IS_BLANK(RAW)) { 6605 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6607 ctxt->sax->error(ctxt->userData, 6608 "attributes construct error\n"); 6609 ctxt->wellFormed = 0; 6610 ctxt->disableSAX = 1; 6611 } 6612 SKIP_BLANKS; 6613 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6614 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6616 ctxt->sax->error(ctxt->userData, 6617 "xmlParseStartTag: problem parsing attributes\n"); 6618 ctxt->wellFormed = 0; 6619 ctxt->disableSAX = 1; 6620 break; 6621 } 6622 GROW; 6623 } 6624 6625 /* 6626 * SAX: Start of Element ! 6627 */ 6628 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6629 (!ctxt->disableSAX)) 6630 ctxt->sax->startElement(ctxt->userData, name, atts); 6631 6632 if (atts != NULL) { 6633 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6634 xmlFree((void *) atts); 6635 } 6636 return(name); 6637} 6638 6639/** 6640 * xmlParseEndTag: 6641 * @ctxt: an XML parser context 6642 * 6643 * parse an end of tag 6644 * 6645 * [42] ETag ::= '</' Name S? '>' 6646 * 6647 * With namespace 6648 * 6649 * [NS 9] ETag ::= '</' QName S? '>' 6650 */ 6651 6652void 6653xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6654 xmlChar *name; 6655 xmlChar *oldname; 6656 6657 GROW; 6658 if ((RAW != '<') || (NXT(1) != '/')) { 6659 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6661 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6662 ctxt->wellFormed = 0; 6663 ctxt->disableSAX = 1; 6664 return; 6665 } 6666 SKIP(2); 6667 6668 name = xmlParseNameAndCompare(ctxt,ctxt->name); 6669 6670 /* 6671 * We should definitely be at the ending "S? '>'" part 6672 */ 6673 GROW; 6674 SKIP_BLANKS; 6675 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6676 ctxt->errNo = XML_ERR_GT_REQUIRED; 6677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6678 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6679 ctxt->wellFormed = 0; 6680 ctxt->disableSAX = 1; 6681 } else 6682 NEXT1; 6683 6684 /* 6685 * [ WFC: Element Type Match ] 6686 * The Name in an element's end-tag must match the element type in the 6687 * start-tag. 6688 * 6689 */ 6690 if (name != (xmlChar*)1) { 6691 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6693 if (name != NULL) { 6694 ctxt->sax->error(ctxt->userData, 6695 "Opening and ending tag mismatch: %s and %s\n", 6696 ctxt->name, name); 6697 xmlFree(name); 6698 } else { 6699 ctxt->sax->error(ctxt->userData, 6700 "Ending tag error for: %s\n", ctxt->name); 6701 } 6702 6703 } 6704 ctxt->wellFormed = 0; 6705 ctxt->disableSAX = 1; 6706 } 6707 6708 /* 6709 * SAX: End of Tag 6710 */ 6711 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6712 (!ctxt->disableSAX)) 6713 ctxt->sax->endElement(ctxt->userData, ctxt->name); 6714 6715 oldname = namePop(ctxt); 6716 spacePop(ctxt); 6717 if (oldname != NULL) { 6718#ifdef DEBUG_STACK 6719 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6720#endif 6721 xmlFree(oldname); 6722 } 6723 return; 6724} 6725 6726/** 6727 * xmlParseCDSect: 6728 * @ctxt: an XML parser context 6729 * 6730 * Parse escaped pure raw content. 6731 * 6732 * [18] CDSect ::= CDStart CData CDEnd 6733 * 6734 * [19] CDStart ::= '<![CDATA[' 6735 * 6736 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6737 * 6738 * [21] CDEnd ::= ']]>' 6739 */ 6740void 6741xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6742 xmlChar *buf = NULL; 6743 int len = 0; 6744 int size = XML_PARSER_BUFFER_SIZE; 6745 int r, rl; 6746 int s, sl; 6747 int cur, l; 6748 int count = 0; 6749 6750 if ((NXT(0) == '<') && (NXT(1) == '!') && 6751 (NXT(2) == '[') && (NXT(3) == 'C') && 6752 (NXT(4) == 'D') && (NXT(5) == 'A') && 6753 (NXT(6) == 'T') && (NXT(7) == 'A') && 6754 (NXT(8) == '[')) { 6755 SKIP(9); 6756 } else 6757 return; 6758 6759 ctxt->instate = XML_PARSER_CDATA_SECTION; 6760 r = CUR_CHAR(rl); 6761 if (!IS_CHAR(r)) { 6762 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6764 ctxt->sax->error(ctxt->userData, 6765 "CData section not finished\n"); 6766 ctxt->wellFormed = 0; 6767 ctxt->disableSAX = 1; 6768 ctxt->instate = XML_PARSER_CONTENT; 6769 return; 6770 } 6771 NEXTL(rl); 6772 s = CUR_CHAR(sl); 6773 if (!IS_CHAR(s)) { 6774 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6776 ctxt->sax->error(ctxt->userData, 6777 "CData section not finished\n"); 6778 ctxt->wellFormed = 0; 6779 ctxt->disableSAX = 1; 6780 ctxt->instate = XML_PARSER_CONTENT; 6781 return; 6782 } 6783 NEXTL(sl); 6784 cur = CUR_CHAR(l); 6785 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6786 if (buf == NULL) { 6787 xmlGenericError(xmlGenericErrorContext, 6788 "malloc of %d byte failed\n", size); 6789 return; 6790 } 6791 while (IS_CHAR(cur) && 6792 ((r != ']') || (s != ']') || (cur != '>'))) { 6793 if (len + 5 >= size) { 6794 size *= 2; 6795 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6796 if (buf == NULL) { 6797 xmlGenericError(xmlGenericErrorContext, 6798 "realloc of %d byte failed\n", size); 6799 return; 6800 } 6801 } 6802 COPY_BUF(rl,buf,len,r); 6803 r = s; 6804 rl = sl; 6805 s = cur; 6806 sl = l; 6807 count++; 6808 if (count > 50) { 6809 GROW; 6810 count = 0; 6811 } 6812 NEXTL(l); 6813 cur = CUR_CHAR(l); 6814 } 6815 buf[len] = 0; 6816 ctxt->instate = XML_PARSER_CONTENT; 6817 if (cur != '>') { 6818 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6820 ctxt->sax->error(ctxt->userData, 6821 "CData section not finished\n%.50s\n", buf); 6822 ctxt->wellFormed = 0; 6823 ctxt->disableSAX = 1; 6824 xmlFree(buf); 6825 return; 6826 } 6827 NEXTL(l); 6828 6829 /* 6830 * OK the buffer is to be consumed as cdata. 6831 */ 6832 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6833 if (ctxt->sax->cdataBlock != NULL) 6834 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6835 else if (ctxt->sax->characters != NULL) 6836 ctxt->sax->characters(ctxt->userData, buf, len); 6837 } 6838 xmlFree(buf); 6839} 6840 6841/** 6842 * xmlParseContent: 6843 * @ctxt: an XML parser context 6844 * 6845 * Parse a content: 6846 * 6847 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6848 */ 6849 6850void 6851xmlParseContent(xmlParserCtxtPtr ctxt) { 6852 GROW; 6853 while ((RAW != 0) && 6854 ((RAW != '<') || (NXT(1) != '/'))) { 6855 const xmlChar *test = CUR_PTR; 6856 int cons = ctxt->input->consumed; 6857 const xmlChar *cur = ctxt->input->cur; 6858 6859 /* 6860 * First case : a Processing Instruction. 6861 */ 6862 if ((*cur == '<') && (cur[1] == '?')) { 6863 xmlParsePI(ctxt); 6864 } 6865 6866 /* 6867 * Second case : a CDSection 6868 */ 6869 else if ((*cur == '<') && (NXT(1) == '!') && 6870 (NXT(2) == '[') && (NXT(3) == 'C') && 6871 (NXT(4) == 'D') && (NXT(5) == 'A') && 6872 (NXT(6) == 'T') && (NXT(7) == 'A') && 6873 (NXT(8) == '[')) { 6874 xmlParseCDSect(ctxt); 6875 } 6876 6877 /* 6878 * Third case : a comment 6879 */ 6880 else if ((*cur == '<') && (NXT(1) == '!') && 6881 (NXT(2) == '-') && (NXT(3) == '-')) { 6882 xmlParseComment(ctxt); 6883 ctxt->instate = XML_PARSER_CONTENT; 6884 } 6885 6886 /* 6887 * Fourth case : a sub-element. 6888 */ 6889 else if (*cur == '<') { 6890 xmlParseElement(ctxt); 6891 } 6892 6893 /* 6894 * Fifth case : a reference. If if has not been resolved, 6895 * parsing returns it's Name, create the node 6896 */ 6897 6898 else if (*cur == '&') { 6899 xmlParseReference(ctxt); 6900 } 6901 6902 /* 6903 * Last case, text. Note that References are handled directly. 6904 */ 6905 else { 6906 xmlParseCharData(ctxt, 0); 6907 } 6908 6909 GROW; 6910 /* 6911 * Pop-up of finished entities. 6912 */ 6913 while ((RAW == 0) && (ctxt->inputNr > 1)) 6914 xmlPopInput(ctxt); 6915 SHRINK; 6916 6917 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 6918 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6920 ctxt->sax->error(ctxt->userData, 6921 "detected an error in element content\n"); 6922 ctxt->wellFormed = 0; 6923 ctxt->disableSAX = 1; 6924 ctxt->instate = XML_PARSER_EOF; 6925 break; 6926 } 6927 } 6928} 6929 6930/** 6931 * xmlParseElement: 6932 * @ctxt: an XML parser context 6933 * 6934 * parse an XML element, this is highly recursive 6935 * 6936 * [39] element ::= EmptyElemTag | STag content ETag 6937 * 6938 * [ WFC: Element Type Match ] 6939 * The Name in an element's end-tag must match the element type in the 6940 * start-tag. 6941 * 6942 * [ VC: Element Valid ] 6943 * An element is valid if there is a declaration matching elementdecl 6944 * where the Name matches the element type and one of the following holds: 6945 * - The declaration matches EMPTY and the element has no content. 6946 * - The declaration matches children and the sequence of child elements 6947 * belongs to the language generated by the regular expression in the 6948 * content model, with optional white space (characters matching the 6949 * nonterminal S) between each pair of child elements. 6950 * - The declaration matches Mixed and the content consists of character 6951 * data and child elements whose types match names in the content model. 6952 * - The declaration matches ANY, and the types of any child elements have 6953 * been declared. 6954 */ 6955 6956void 6957xmlParseElement(xmlParserCtxtPtr ctxt) { 6958 xmlChar *name; 6959 xmlChar *oldname; 6960 xmlParserNodeInfo node_info; 6961 xmlNodePtr ret; 6962 6963 /* Capture start position */ 6964 if (ctxt->record_info) { 6965 node_info.begin_pos = ctxt->input->consumed + 6966 (CUR_PTR - ctxt->input->base); 6967 node_info.begin_line = ctxt->input->line; 6968 } 6969 6970 if (ctxt->spaceNr == 0) 6971 spacePush(ctxt, -1); 6972 else 6973 spacePush(ctxt, *ctxt->space); 6974 6975 name = xmlParseStartTag(ctxt); 6976 if (name == NULL) { 6977 spacePop(ctxt); 6978 return; 6979 } 6980 namePush(ctxt, name); 6981 ret = ctxt->node; 6982 6983 /* 6984 * [ VC: Root Element Type ] 6985 * The Name in the document type declaration must match the element 6986 * type of the root element. 6987 */ 6988 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6989 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6990 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6991 6992 /* 6993 * Check for an Empty Element. 6994 */ 6995 if ((RAW == '/') && (NXT(1) == '>')) { 6996 SKIP(2); 6997 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6998 (!ctxt->disableSAX)) 6999 ctxt->sax->endElement(ctxt->userData, name); 7000 oldname = namePop(ctxt); 7001 spacePop(ctxt); 7002 if (oldname != NULL) { 7003#ifdef DEBUG_STACK 7004 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7005#endif 7006 xmlFree(oldname); 7007 } 7008 if ( ret != NULL && ctxt->record_info ) { 7009 node_info.end_pos = ctxt->input->consumed + 7010 (CUR_PTR - ctxt->input->base); 7011 node_info.end_line = ctxt->input->line; 7012 node_info.node = ret; 7013 xmlParserAddNodeInfo(ctxt, &node_info); 7014 } 7015 return; 7016 } 7017 if (RAW == '>') { 7018 NEXT1; 7019 } else { 7020 ctxt->errNo = XML_ERR_GT_REQUIRED; 7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7022 ctxt->sax->error(ctxt->userData, 7023 "Couldn't find end of Start Tag %s\n", 7024 name); 7025 ctxt->wellFormed = 0; 7026 ctxt->disableSAX = 1; 7027 7028 /* 7029 * end of parsing of this node. 7030 */ 7031 nodePop(ctxt); 7032 oldname = namePop(ctxt); 7033 spacePop(ctxt); 7034 if (oldname != NULL) { 7035#ifdef DEBUG_STACK 7036 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7037#endif 7038 xmlFree(oldname); 7039 } 7040 7041 /* 7042 * Capture end position and add node 7043 */ 7044 if ( ret != NULL && ctxt->record_info ) { 7045 node_info.end_pos = ctxt->input->consumed + 7046 (CUR_PTR - ctxt->input->base); 7047 node_info.end_line = ctxt->input->line; 7048 node_info.node = ret; 7049 xmlParserAddNodeInfo(ctxt, &node_info); 7050 } 7051 return; 7052 } 7053 7054 /* 7055 * Parse the content of the element: 7056 */ 7057 xmlParseContent(ctxt); 7058 if (!IS_CHAR(RAW)) { 7059 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED; 7060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7061 ctxt->sax->error(ctxt->userData, 7062 "Premature end of data in tag %s\n", name); 7063 ctxt->wellFormed = 0; 7064 ctxt->disableSAX = 1; 7065 7066 /* 7067 * end of parsing of this node. 7068 */ 7069 nodePop(ctxt); 7070 oldname = namePop(ctxt); 7071 spacePop(ctxt); 7072 if (oldname != NULL) { 7073#ifdef DEBUG_STACK 7074 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7075#endif 7076 xmlFree(oldname); 7077 } 7078 return; 7079 } 7080 7081 /* 7082 * parse the end of tag: '</' should be here. 7083 */ 7084 xmlParseEndTag(ctxt); 7085 7086 /* 7087 * Capture end position and add node 7088 */ 7089 if ( ret != NULL && ctxt->record_info ) { 7090 node_info.end_pos = ctxt->input->consumed + 7091 (CUR_PTR - ctxt->input->base); 7092 node_info.end_line = ctxt->input->line; 7093 node_info.node = ret; 7094 xmlParserAddNodeInfo(ctxt, &node_info); 7095 } 7096} 7097 7098/** 7099 * xmlParseVersionNum: 7100 * @ctxt: an XML parser context 7101 * 7102 * parse the XML version value. 7103 * 7104 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 7105 * 7106 * Returns the string giving the XML version number, or NULL 7107 */ 7108xmlChar * 7109xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 7110 xmlChar *buf = NULL; 7111 int len = 0; 7112 int size = 10; 7113 xmlChar cur; 7114 7115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 7116 if (buf == NULL) { 7117 xmlGenericError(xmlGenericErrorContext, 7118 "malloc of %d byte failed\n", size); 7119 return(NULL); 7120 } 7121 cur = CUR; 7122 while (((cur >= 'a') && (cur <= 'z')) || 7123 ((cur >= 'A') && (cur <= 'Z')) || 7124 ((cur >= '0') && (cur <= '9')) || 7125 (cur == '_') || (cur == '.') || 7126 (cur == ':') || (cur == '-')) { 7127 if (len + 1 >= size) { 7128 size *= 2; 7129 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7130 if (buf == NULL) { 7131 xmlGenericError(xmlGenericErrorContext, 7132 "realloc of %d byte failed\n", size); 7133 return(NULL); 7134 } 7135 } 7136 buf[len++] = cur; 7137 NEXT; 7138 cur=CUR; 7139 } 7140 buf[len] = 0; 7141 return(buf); 7142} 7143 7144/** 7145 * xmlParseVersionInfo: 7146 * @ctxt: an XML parser context 7147 * 7148 * parse the XML version. 7149 * 7150 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 7151 * 7152 * [25] Eq ::= S? '=' S? 7153 * 7154 * Returns the version string, e.g. "1.0" 7155 */ 7156 7157xmlChar * 7158xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 7159 xmlChar *version = NULL; 7160 const xmlChar *q; 7161 7162 if ((RAW == 'v') && (NXT(1) == 'e') && 7163 (NXT(2) == 'r') && (NXT(3) == 's') && 7164 (NXT(4) == 'i') && (NXT(5) == 'o') && 7165 (NXT(6) == 'n')) { 7166 SKIP(7); 7167 SKIP_BLANKS; 7168 if (RAW != '=') { 7169 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7171 ctxt->sax->error(ctxt->userData, 7172 "xmlParseVersionInfo : expected '='\n"); 7173 ctxt->wellFormed = 0; 7174 ctxt->disableSAX = 1; 7175 return(NULL); 7176 } 7177 NEXT; 7178 SKIP_BLANKS; 7179 if (RAW == '"') { 7180 NEXT; 7181 q = CUR_PTR; 7182 version = xmlParseVersionNum(ctxt); 7183 if (RAW != '"') { 7184 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7186 ctxt->sax->error(ctxt->userData, 7187 "String not closed\n%.50s\n", q); 7188 ctxt->wellFormed = 0; 7189 ctxt->disableSAX = 1; 7190 } else 7191 NEXT; 7192 } else if (RAW == '\''){ 7193 NEXT; 7194 q = CUR_PTR; 7195 version = xmlParseVersionNum(ctxt); 7196 if (RAW != '\'') { 7197 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7199 ctxt->sax->error(ctxt->userData, 7200 "String not closed\n%.50s\n", q); 7201 ctxt->wellFormed = 0; 7202 ctxt->disableSAX = 1; 7203 } else 7204 NEXT; 7205 } else { 7206 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7208 ctxt->sax->error(ctxt->userData, 7209 "xmlParseVersionInfo : expected ' or \"\n"); 7210 ctxt->wellFormed = 0; 7211 ctxt->disableSAX = 1; 7212 } 7213 } 7214 return(version); 7215} 7216 7217/** 7218 * xmlParseEncName: 7219 * @ctxt: an XML parser context 7220 * 7221 * parse the XML encoding name 7222 * 7223 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 7224 * 7225 * Returns the encoding name value or NULL 7226 */ 7227xmlChar * 7228xmlParseEncName(xmlParserCtxtPtr ctxt) { 7229 xmlChar *buf = NULL; 7230 int len = 0; 7231 int size = 10; 7232 xmlChar cur; 7233 7234 cur = CUR; 7235 if (((cur >= 'a') && (cur <= 'z')) || 7236 ((cur >= 'A') && (cur <= 'Z'))) { 7237 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 7238 if (buf == NULL) { 7239 xmlGenericError(xmlGenericErrorContext, 7240 "malloc of %d byte failed\n", size); 7241 return(NULL); 7242 } 7243 7244 buf[len++] = cur; 7245 NEXT; 7246 cur = CUR; 7247 while (((cur >= 'a') && (cur <= 'z')) || 7248 ((cur >= 'A') && (cur <= 'Z')) || 7249 ((cur >= '0') && (cur <= '9')) || 7250 (cur == '.') || (cur == '_') || 7251 (cur == '-')) { 7252 if (len + 1 >= size) { 7253 size *= 2; 7254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7255 if (buf == NULL) { 7256 xmlGenericError(xmlGenericErrorContext, 7257 "realloc of %d byte failed\n", size); 7258 return(NULL); 7259 } 7260 } 7261 buf[len++] = cur; 7262 NEXT; 7263 cur = CUR; 7264 if (cur == 0) { 7265 SHRINK; 7266 GROW; 7267 cur = CUR; 7268 } 7269 } 7270 buf[len] = 0; 7271 } else { 7272 ctxt->errNo = XML_ERR_ENCODING_NAME; 7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7274 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 7275 ctxt->wellFormed = 0; 7276 ctxt->disableSAX = 1; 7277 } 7278 return(buf); 7279} 7280 7281/** 7282 * xmlParseEncodingDecl: 7283 * @ctxt: an XML parser context 7284 * 7285 * parse the XML encoding declaration 7286 * 7287 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 7288 * 7289 * this setups the conversion filters. 7290 * 7291 * Returns the encoding value or NULL 7292 */ 7293 7294xmlChar * 7295xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 7296 xmlChar *encoding = NULL; 7297 const xmlChar *q; 7298 7299 SKIP_BLANKS; 7300 if ((RAW == 'e') && (NXT(1) == 'n') && 7301 (NXT(2) == 'c') && (NXT(3) == 'o') && 7302 (NXT(4) == 'd') && (NXT(5) == 'i') && 7303 (NXT(6) == 'n') && (NXT(7) == 'g')) { 7304 SKIP(8); 7305 SKIP_BLANKS; 7306 if (RAW != '=') { 7307 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7309 ctxt->sax->error(ctxt->userData, 7310 "xmlParseEncodingDecl : expected '='\n"); 7311 ctxt->wellFormed = 0; 7312 ctxt->disableSAX = 1; 7313 return(NULL); 7314 } 7315 NEXT; 7316 SKIP_BLANKS; 7317 if (RAW == '"') { 7318 NEXT; 7319 q = CUR_PTR; 7320 encoding = xmlParseEncName(ctxt); 7321 if (RAW != '"') { 7322 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7324 ctxt->sax->error(ctxt->userData, 7325 "String not closed\n%.50s\n", q); 7326 ctxt->wellFormed = 0; 7327 ctxt->disableSAX = 1; 7328 } else 7329 NEXT; 7330 } else if (RAW == '\''){ 7331 NEXT; 7332 q = CUR_PTR; 7333 encoding = xmlParseEncName(ctxt); 7334 if (RAW != '\'') { 7335 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7337 ctxt->sax->error(ctxt->userData, 7338 "String not closed\n%.50s\n", q); 7339 ctxt->wellFormed = 0; 7340 ctxt->disableSAX = 1; 7341 } else 7342 NEXT; 7343 } else { 7344 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7346 ctxt->sax->error(ctxt->userData, 7347 "xmlParseEncodingDecl : expected ' or \"\n"); 7348 ctxt->wellFormed = 0; 7349 ctxt->disableSAX = 1; 7350 } 7351 if (encoding != NULL) { 7352 xmlCharEncoding enc; 7353 xmlCharEncodingHandlerPtr handler; 7354 7355 if (ctxt->input->encoding != NULL) 7356 xmlFree((xmlChar *) ctxt->input->encoding); 7357 ctxt->input->encoding = encoding; 7358 7359 enc = xmlParseCharEncoding((const char *) encoding); 7360 /* 7361 * registered set of known encodings 7362 */ 7363 if (enc != XML_CHAR_ENCODING_ERROR) { 7364 xmlSwitchEncoding(ctxt, enc); 7365 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7366 ctxt->input->encoding = NULL; 7367 xmlFree(encoding); 7368 return(NULL); 7369 } 7370 } else { 7371 /* 7372 * fallback for unknown encodings 7373 */ 7374 handler = xmlFindCharEncodingHandler((const char *) encoding); 7375 if (handler != NULL) { 7376 xmlSwitchToEncoding(ctxt, handler); 7377 } else { 7378 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 7379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7380 ctxt->sax->error(ctxt->userData, 7381 "Unsupported encoding %s\n", encoding); 7382 return(NULL); 7383 } 7384 } 7385 } 7386 } 7387 return(encoding); 7388} 7389 7390/** 7391 * xmlParseSDDecl: 7392 * @ctxt: an XML parser context 7393 * 7394 * parse the XML standalone declaration 7395 * 7396 * [32] SDDecl ::= S 'standalone' Eq 7397 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 7398 * 7399 * [ VC: Standalone Document Declaration ] 7400 * TODO The standalone document declaration must have the value "no" 7401 * if any external markup declarations contain declarations of: 7402 * - attributes with default values, if elements to which these 7403 * attributes apply appear in the document without specifications 7404 * of values for these attributes, or 7405 * - entities (other than amp, lt, gt, apos, quot), if references 7406 * to those entities appear in the document, or 7407 * - attributes with values subject to normalization, where the 7408 * attribute appears in the document with a value which will change 7409 * as a result of normalization, or 7410 * - element types with element content, if white space occurs directly 7411 * within any instance of those types. 7412 * 7413 * Returns 1 if standalone, 0 otherwise 7414 */ 7415 7416int 7417xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 7418 int standalone = -1; 7419 7420 SKIP_BLANKS; 7421 if ((RAW == 's') && (NXT(1) == 't') && 7422 (NXT(2) == 'a') && (NXT(3) == 'n') && 7423 (NXT(4) == 'd') && (NXT(5) == 'a') && 7424 (NXT(6) == 'l') && (NXT(7) == 'o') && 7425 (NXT(8) == 'n') && (NXT(9) == 'e')) { 7426 SKIP(10); 7427 SKIP_BLANKS; 7428 if (RAW != '=') { 7429 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7431 ctxt->sax->error(ctxt->userData, 7432 "XML standalone declaration : expected '='\n"); 7433 ctxt->wellFormed = 0; 7434 ctxt->disableSAX = 1; 7435 return(standalone); 7436 } 7437 NEXT; 7438 SKIP_BLANKS; 7439 if (RAW == '\''){ 7440 NEXT; 7441 if ((RAW == 'n') && (NXT(1) == 'o')) { 7442 standalone = 0; 7443 SKIP(2); 7444 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7445 (NXT(2) == 's')) { 7446 standalone = 1; 7447 SKIP(3); 7448 } else { 7449 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7451 ctxt->sax->error(ctxt->userData, 7452 "standalone accepts only 'yes' or 'no'\n"); 7453 ctxt->wellFormed = 0; 7454 ctxt->disableSAX = 1; 7455 } 7456 if (RAW != '\'') { 7457 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7459 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7460 ctxt->wellFormed = 0; 7461 ctxt->disableSAX = 1; 7462 } else 7463 NEXT; 7464 } else if (RAW == '"'){ 7465 NEXT; 7466 if ((RAW == 'n') && (NXT(1) == 'o')) { 7467 standalone = 0; 7468 SKIP(2); 7469 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7470 (NXT(2) == 's')) { 7471 standalone = 1; 7472 SKIP(3); 7473 } else { 7474 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7476 ctxt->sax->error(ctxt->userData, 7477 "standalone accepts only 'yes' or 'no'\n"); 7478 ctxt->wellFormed = 0; 7479 ctxt->disableSAX = 1; 7480 } 7481 if (RAW != '"') { 7482 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7484 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7485 ctxt->wellFormed = 0; 7486 ctxt->disableSAX = 1; 7487 } else 7488 NEXT; 7489 } else { 7490 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7492 ctxt->sax->error(ctxt->userData, 7493 "Standalone value not found\n"); 7494 ctxt->wellFormed = 0; 7495 ctxt->disableSAX = 1; 7496 } 7497 } 7498 return(standalone); 7499} 7500 7501/** 7502 * xmlParseXMLDecl: 7503 * @ctxt: an XML parser context 7504 * 7505 * parse an XML declaration header 7506 * 7507 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 7508 */ 7509 7510void 7511xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7512 xmlChar *version; 7513 7514 /* 7515 * We know that '<?xml' is here. 7516 */ 7517 SKIP(5); 7518 7519 if (!IS_BLANK(RAW)) { 7520 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7522 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7523 ctxt->wellFormed = 0; 7524 ctxt->disableSAX = 1; 7525 } 7526 SKIP_BLANKS; 7527 7528 /* 7529 * We must have the VersionInfo here. 7530 */ 7531 version = xmlParseVersionInfo(ctxt); 7532 if (version == NULL) { 7533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7534 ctxt->sax->error(ctxt->userData, 7535 "Malformed declaration expecting version\n"); 7536 ctxt->wellFormed = 0; 7537 ctxt->disableSAX = 1; 7538 } else { 7539 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 7540 /* 7541 * TODO: Blueberry should be detected here 7542 */ 7543 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 7544 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", 7545 version); 7546 } 7547 if (ctxt->version != NULL) 7548 xmlFree((void *) ctxt->version); 7549 ctxt->version = version; 7550 } 7551 7552 /* 7553 * We may have the encoding declaration 7554 */ 7555 if (!IS_BLANK(RAW)) { 7556 if ((RAW == '?') && (NXT(1) == '>')) { 7557 SKIP(2); 7558 return; 7559 } 7560 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7562 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7563 ctxt->wellFormed = 0; 7564 ctxt->disableSAX = 1; 7565 } 7566 xmlParseEncodingDecl(ctxt); 7567 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7568 /* 7569 * The XML REC instructs us to stop parsing right here 7570 */ 7571 return; 7572 } 7573 7574 /* 7575 * We may have the standalone status. 7576 */ 7577 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7578 if ((RAW == '?') && (NXT(1) == '>')) { 7579 SKIP(2); 7580 return; 7581 } 7582 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7584 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7585 ctxt->wellFormed = 0; 7586 ctxt->disableSAX = 1; 7587 } 7588 SKIP_BLANKS; 7589 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7590 7591 SKIP_BLANKS; 7592 if ((RAW == '?') && (NXT(1) == '>')) { 7593 SKIP(2); 7594 } else if (RAW == '>') { 7595 /* Deprecated old WD ... */ 7596 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7598 ctxt->sax->error(ctxt->userData, 7599 "XML declaration must end-up with '?>'\n"); 7600 ctxt->wellFormed = 0; 7601 ctxt->disableSAX = 1; 7602 NEXT; 7603 } else { 7604 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7606 ctxt->sax->error(ctxt->userData, 7607 "parsing XML declaration: '?>' expected\n"); 7608 ctxt->wellFormed = 0; 7609 ctxt->disableSAX = 1; 7610 MOVETO_ENDTAG(CUR_PTR); 7611 NEXT; 7612 } 7613} 7614 7615/** 7616 * xmlParseMisc: 7617 * @ctxt: an XML parser context 7618 * 7619 * parse an XML Misc* optional field. 7620 * 7621 * [27] Misc ::= Comment | PI | S 7622 */ 7623 7624void 7625xmlParseMisc(xmlParserCtxtPtr ctxt) { 7626 while (((RAW == '<') && (NXT(1) == '?')) || 7627 ((RAW == '<') && (NXT(1) == '!') && 7628 (NXT(2) == '-') && (NXT(3) == '-')) || 7629 IS_BLANK(CUR)) { 7630 if ((RAW == '<') && (NXT(1) == '?')) { 7631 xmlParsePI(ctxt); 7632 } else if (IS_BLANK(CUR)) { 7633 NEXT; 7634 } else 7635 xmlParseComment(ctxt); 7636 } 7637} 7638 7639/** 7640 * xmlParseDocument: 7641 * @ctxt: an XML parser context 7642 * 7643 * parse an XML document (and build a tree if using the standard SAX 7644 * interface). 7645 * 7646 * [1] document ::= prolog element Misc* 7647 * 7648 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7649 * 7650 * Returns 0, -1 in case of error. the parser context is augmented 7651 * as a result of the parsing. 7652 */ 7653 7654int 7655xmlParseDocument(xmlParserCtxtPtr ctxt) { 7656 xmlChar start[4]; 7657 xmlCharEncoding enc; 7658 7659 xmlInitParser(); 7660 7661 GROW; 7662 7663 /* 7664 * SAX: beginning of the document processing. 7665 */ 7666 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7667 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7668 7669 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 7670 /* 7671 * Get the 4 first bytes and decode the charset 7672 * if enc != XML_CHAR_ENCODING_NONE 7673 * plug some encoding conversion routines. 7674 */ 7675 start[0] = RAW; 7676 start[1] = NXT(1); 7677 start[2] = NXT(2); 7678 start[3] = NXT(3); 7679 enc = xmlDetectCharEncoding(start, 4); 7680 if (enc != XML_CHAR_ENCODING_NONE) { 7681 xmlSwitchEncoding(ctxt, enc); 7682 } 7683 } 7684 7685 7686 if (CUR == 0) { 7687 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7689 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7690 ctxt->wellFormed = 0; 7691 ctxt->disableSAX = 1; 7692 } 7693 7694 /* 7695 * Check for the XMLDecl in the Prolog. 7696 */ 7697 GROW; 7698 if ((RAW == '<') && (NXT(1) == '?') && 7699 (NXT(2) == 'x') && (NXT(3) == 'm') && 7700 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7701 7702 /* 7703 * Note that we will switch encoding on the fly. 7704 */ 7705 xmlParseXMLDecl(ctxt); 7706 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7707 /* 7708 * The XML REC instructs us to stop parsing right here 7709 */ 7710 return(-1); 7711 } 7712 ctxt->standalone = ctxt->input->standalone; 7713 SKIP_BLANKS; 7714 } else { 7715 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7716 } 7717 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7718 ctxt->sax->startDocument(ctxt->userData); 7719 7720 /* 7721 * The Misc part of the Prolog 7722 */ 7723 GROW; 7724 xmlParseMisc(ctxt); 7725 7726 /* 7727 * Then possibly doc type declaration(s) and more Misc 7728 * (doctypedecl Misc*)? 7729 */ 7730 GROW; 7731 if ((RAW == '<') && (NXT(1) == '!') && 7732 (NXT(2) == 'D') && (NXT(3) == 'O') && 7733 (NXT(4) == 'C') && (NXT(5) == 'T') && 7734 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7735 (NXT(8) == 'E')) { 7736 7737 ctxt->inSubset = 1; 7738 xmlParseDocTypeDecl(ctxt); 7739 if (RAW == '[') { 7740 ctxt->instate = XML_PARSER_DTD; 7741 xmlParseInternalSubset(ctxt); 7742 } 7743 7744 /* 7745 * Create and update the external subset. 7746 */ 7747 ctxt->inSubset = 2; 7748 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7749 (!ctxt->disableSAX)) 7750 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7751 ctxt->extSubSystem, ctxt->extSubURI); 7752 ctxt->inSubset = 0; 7753 7754 7755 ctxt->instate = XML_PARSER_PROLOG; 7756 xmlParseMisc(ctxt); 7757 } 7758 7759 /* 7760 * Time to start parsing the tree itself 7761 */ 7762 GROW; 7763 if (RAW != '<') { 7764 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7766 ctxt->sax->error(ctxt->userData, 7767 "Start tag expected, '<' not found\n"); 7768 ctxt->wellFormed = 0; 7769 ctxt->disableSAX = 1; 7770 ctxt->instate = XML_PARSER_EOF; 7771 } else { 7772 ctxt->instate = XML_PARSER_CONTENT; 7773 xmlParseElement(ctxt); 7774 ctxt->instate = XML_PARSER_EPILOG; 7775 7776 7777 /* 7778 * The Misc part at the end 7779 */ 7780 xmlParseMisc(ctxt); 7781 7782 if (RAW != 0) { 7783 ctxt->errNo = XML_ERR_DOCUMENT_END; 7784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7785 ctxt->sax->error(ctxt->userData, 7786 "Extra content at the end of the document\n"); 7787 ctxt->wellFormed = 0; 7788 ctxt->disableSAX = 1; 7789 } 7790 ctxt->instate = XML_PARSER_EOF; 7791 } 7792 7793 /* 7794 * SAX: end of the document processing. 7795 */ 7796 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7797 ctxt->sax->endDocument(ctxt->userData); 7798 7799 /* 7800 * Remove locally kept entity definitions if the tree was not built 7801 */ 7802 if ((ctxt->myDoc != NULL) && 7803 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 7804 xmlFreeDoc(ctxt->myDoc); 7805 ctxt->myDoc = NULL; 7806 } 7807 7808 if (! ctxt->wellFormed) { 7809 ctxt->valid = 0; 7810 return(-1); 7811 } 7812 return(0); 7813} 7814 7815/** 7816 * xmlParseExtParsedEnt: 7817 * @ctxt: an XML parser context 7818 * 7819 * parse a general parsed entity 7820 * An external general parsed entity is well-formed if it matches the 7821 * production labeled extParsedEnt. 7822 * 7823 * [78] extParsedEnt ::= TextDecl? content 7824 * 7825 * Returns 0, -1 in case of error. the parser context is augmented 7826 * as a result of the parsing. 7827 */ 7828 7829int 7830xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7831 xmlChar start[4]; 7832 xmlCharEncoding enc; 7833 7834 xmlDefaultSAXHandlerInit(); 7835 7836 GROW; 7837 7838 /* 7839 * SAX: beginning of the document processing. 7840 */ 7841 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7842 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7843 7844 /* 7845 * Get the 4 first bytes and decode the charset 7846 * if enc != XML_CHAR_ENCODING_NONE 7847 * plug some encoding conversion routines. 7848 */ 7849 start[0] = RAW; 7850 start[1] = NXT(1); 7851 start[2] = NXT(2); 7852 start[3] = NXT(3); 7853 enc = xmlDetectCharEncoding(start, 4); 7854 if (enc != XML_CHAR_ENCODING_NONE) { 7855 xmlSwitchEncoding(ctxt, enc); 7856 } 7857 7858 7859 if (CUR == 0) { 7860 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7862 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7863 ctxt->wellFormed = 0; 7864 ctxt->disableSAX = 1; 7865 } 7866 7867 /* 7868 * Check for the XMLDecl in the Prolog. 7869 */ 7870 GROW; 7871 if ((RAW == '<') && (NXT(1) == '?') && 7872 (NXT(2) == 'x') && (NXT(3) == 'm') && 7873 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7874 7875 /* 7876 * Note that we will switch encoding on the fly. 7877 */ 7878 xmlParseXMLDecl(ctxt); 7879 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7880 /* 7881 * The XML REC instructs us to stop parsing right here 7882 */ 7883 return(-1); 7884 } 7885 SKIP_BLANKS; 7886 } else { 7887 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7888 } 7889 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7890 ctxt->sax->startDocument(ctxt->userData); 7891 7892 /* 7893 * Doing validity checking on chunk doesn't make sense 7894 */ 7895 ctxt->instate = XML_PARSER_CONTENT; 7896 ctxt->validate = 0; 7897 ctxt->loadsubset = 0; 7898 ctxt->depth = 0; 7899 7900 xmlParseContent(ctxt); 7901 7902 if ((RAW == '<') && (NXT(1) == '/')) { 7903 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7905 ctxt->sax->error(ctxt->userData, 7906 "chunk is not well balanced\n"); 7907 ctxt->wellFormed = 0; 7908 ctxt->disableSAX = 1; 7909 } else if (RAW != 0) { 7910 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7912 ctxt->sax->error(ctxt->userData, 7913 "extra content at the end of well balanced chunk\n"); 7914 ctxt->wellFormed = 0; 7915 ctxt->disableSAX = 1; 7916 } 7917 7918 /* 7919 * SAX: end of the document processing. 7920 */ 7921 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7922 ctxt->sax->endDocument(ctxt->userData); 7923 7924 if (! ctxt->wellFormed) return(-1); 7925 return(0); 7926} 7927 7928/************************************************************************ 7929 * * 7930 * Progressive parsing interfaces * 7931 * * 7932 ************************************************************************/ 7933 7934/** 7935 * xmlParseLookupSequence: 7936 * @ctxt: an XML parser context 7937 * @first: the first char to lookup 7938 * @next: the next char to lookup or zero 7939 * @third: the next char to lookup or zero 7940 * 7941 * Try to find if a sequence (first, next, third) or just (first next) or 7942 * (first) is available in the input stream. 7943 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7944 * to avoid rescanning sequences of bytes, it DOES change the state of the 7945 * parser, do not use liberally. 7946 * 7947 * Returns the index to the current parsing point if the full sequence 7948 * is available, -1 otherwise. 7949 */ 7950static int 7951xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7952 xmlChar next, xmlChar third) { 7953 int base, len; 7954 xmlParserInputPtr in; 7955 const xmlChar *buf; 7956 7957 in = ctxt->input; 7958 if (in == NULL) return(-1); 7959 base = in->cur - in->base; 7960 if (base < 0) return(-1); 7961 if (ctxt->checkIndex > base) 7962 base = ctxt->checkIndex; 7963 if (in->buf == NULL) { 7964 buf = in->base; 7965 len = in->length; 7966 } else { 7967 buf = in->buf->buffer->content; 7968 len = in->buf->buffer->use; 7969 } 7970 /* take into account the sequence length */ 7971 if (third) len -= 2; 7972 else if (next) len --; 7973 for (;base < len;base++) { 7974 if (buf[base] == first) { 7975 if (third != 0) { 7976 if ((buf[base + 1] != next) || 7977 (buf[base + 2] != third)) continue; 7978 } else if (next != 0) { 7979 if (buf[base + 1] != next) continue; 7980 } 7981 ctxt->checkIndex = 0; 7982#ifdef DEBUG_PUSH 7983 if (next == 0) 7984 xmlGenericError(xmlGenericErrorContext, 7985 "PP: lookup '%c' found at %d\n", 7986 first, base); 7987 else if (third == 0) 7988 xmlGenericError(xmlGenericErrorContext, 7989 "PP: lookup '%c%c' found at %d\n", 7990 first, next, base); 7991 else 7992 xmlGenericError(xmlGenericErrorContext, 7993 "PP: lookup '%c%c%c' found at %d\n", 7994 first, next, third, base); 7995#endif 7996 return(base - (in->cur - in->base)); 7997 } 7998 } 7999 ctxt->checkIndex = base; 8000#ifdef DEBUG_PUSH 8001 if (next == 0) 8002 xmlGenericError(xmlGenericErrorContext, 8003 "PP: lookup '%c' failed\n", first); 8004 else if (third == 0) 8005 xmlGenericError(xmlGenericErrorContext, 8006 "PP: lookup '%c%c' failed\n", first, next); 8007 else 8008 xmlGenericError(xmlGenericErrorContext, 8009 "PP: lookup '%c%c%c' failed\n", first, next, third); 8010#endif 8011 return(-1); 8012} 8013 8014/** 8015 * xmlParseTryOrFinish: 8016 * @ctxt: an XML parser context 8017 * @terminate: last chunk indicator 8018 * 8019 * Try to progress on parsing 8020 * 8021 * Returns zero if no parsing was possible 8022 */ 8023static int 8024xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 8025 int ret = 0; 8026 int avail; 8027 xmlChar cur, next; 8028 8029#ifdef DEBUG_PUSH 8030 switch (ctxt->instate) { 8031 case XML_PARSER_EOF: 8032 xmlGenericError(xmlGenericErrorContext, 8033 "PP: try EOF\n"); break; 8034 case XML_PARSER_START: 8035 xmlGenericError(xmlGenericErrorContext, 8036 "PP: try START\n"); break; 8037 case XML_PARSER_MISC: 8038 xmlGenericError(xmlGenericErrorContext, 8039 "PP: try MISC\n");break; 8040 case XML_PARSER_COMMENT: 8041 xmlGenericError(xmlGenericErrorContext, 8042 "PP: try COMMENT\n");break; 8043 case XML_PARSER_PROLOG: 8044 xmlGenericError(xmlGenericErrorContext, 8045 "PP: try PROLOG\n");break; 8046 case XML_PARSER_START_TAG: 8047 xmlGenericError(xmlGenericErrorContext, 8048 "PP: try START_TAG\n");break; 8049 case XML_PARSER_CONTENT: 8050 xmlGenericError(xmlGenericErrorContext, 8051 "PP: try CONTENT\n");break; 8052 case XML_PARSER_CDATA_SECTION: 8053 xmlGenericError(xmlGenericErrorContext, 8054 "PP: try CDATA_SECTION\n");break; 8055 case XML_PARSER_END_TAG: 8056 xmlGenericError(xmlGenericErrorContext, 8057 "PP: try END_TAG\n");break; 8058 case XML_PARSER_ENTITY_DECL: 8059 xmlGenericError(xmlGenericErrorContext, 8060 "PP: try ENTITY_DECL\n");break; 8061 case XML_PARSER_ENTITY_VALUE: 8062 xmlGenericError(xmlGenericErrorContext, 8063 "PP: try ENTITY_VALUE\n");break; 8064 case XML_PARSER_ATTRIBUTE_VALUE: 8065 xmlGenericError(xmlGenericErrorContext, 8066 "PP: try ATTRIBUTE_VALUE\n");break; 8067 case XML_PARSER_DTD: 8068 xmlGenericError(xmlGenericErrorContext, 8069 "PP: try DTD\n");break; 8070 case XML_PARSER_EPILOG: 8071 xmlGenericError(xmlGenericErrorContext, 8072 "PP: try EPILOG\n");break; 8073 case XML_PARSER_PI: 8074 xmlGenericError(xmlGenericErrorContext, 8075 "PP: try PI\n");break; 8076 case XML_PARSER_IGNORE: 8077 xmlGenericError(xmlGenericErrorContext, 8078 "PP: try IGNORE\n");break; 8079 } 8080#endif 8081 8082 while (1) { 8083 SHRINK; 8084 8085 /* 8086 * Pop-up of finished entities. 8087 */ 8088 while ((RAW == 0) && (ctxt->inputNr > 1)) 8089 xmlPopInput(ctxt); 8090 8091 if (ctxt->input ==NULL) break; 8092 if (ctxt->input->buf == NULL) 8093 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8094 else { 8095 /* 8096 * If we are operating on converted input, try to flush 8097 * remainng chars to avoid them stalling in the non-converted 8098 * buffer. 8099 */ 8100 if ((ctxt->input->buf->raw != NULL) && 8101 (ctxt->input->buf->raw->use > 0)) { 8102 int base = ctxt->input->base - 8103 ctxt->input->buf->buffer->content; 8104 int current = ctxt->input->cur - ctxt->input->base; 8105 8106 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 8107 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8108 ctxt->input->cur = ctxt->input->base + current; 8109 ctxt->input->end = 8110 &ctxt->input->buf->buffer->content[ 8111 ctxt->input->buf->buffer->use]; 8112 } 8113 avail = ctxt->input->buf->buffer->use - 8114 (ctxt->input->cur - ctxt->input->base); 8115 } 8116 if (avail < 1) 8117 goto done; 8118 switch (ctxt->instate) { 8119 case XML_PARSER_EOF: 8120 /* 8121 * Document parsing is done ! 8122 */ 8123 goto done; 8124 case XML_PARSER_START: 8125 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 8126 xmlChar start[4]; 8127 xmlCharEncoding enc; 8128 8129 /* 8130 * Very first chars read from the document flow. 8131 */ 8132 if (avail < 4) 8133 goto done; 8134 8135 /* 8136 * Get the 4 first bytes and decode the charset 8137 * if enc != XML_CHAR_ENCODING_NONE 8138 * plug some encoding conversion routines. 8139 */ 8140 start[0] = RAW; 8141 start[1] = NXT(1); 8142 start[2] = NXT(2); 8143 start[3] = NXT(3); 8144 enc = xmlDetectCharEncoding(start, 4); 8145 if (enc != XML_CHAR_ENCODING_NONE) { 8146 xmlSwitchEncoding(ctxt, enc); 8147 } 8148 break; 8149 } 8150 8151 cur = ctxt->input->cur[0]; 8152 next = ctxt->input->cur[1]; 8153 if (cur == 0) { 8154 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8155 ctxt->sax->setDocumentLocator(ctxt->userData, 8156 &xmlDefaultSAXLocator); 8157 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8159 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 8160 ctxt->wellFormed = 0; 8161 ctxt->disableSAX = 1; 8162 ctxt->instate = XML_PARSER_EOF; 8163#ifdef DEBUG_PUSH 8164 xmlGenericError(xmlGenericErrorContext, 8165 "PP: entering EOF\n"); 8166#endif 8167 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8168 ctxt->sax->endDocument(ctxt->userData); 8169 goto done; 8170 } 8171 if ((cur == '<') && (next == '?')) { 8172 /* PI or XML decl */ 8173 if (avail < 5) return(ret); 8174 if ((!terminate) && 8175 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8176 return(ret); 8177 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8178 ctxt->sax->setDocumentLocator(ctxt->userData, 8179 &xmlDefaultSAXLocator); 8180 if ((ctxt->input->cur[2] == 'x') && 8181 (ctxt->input->cur[3] == 'm') && 8182 (ctxt->input->cur[4] == 'l') && 8183 (IS_BLANK(ctxt->input->cur[5]))) { 8184 ret += 5; 8185#ifdef DEBUG_PUSH 8186 xmlGenericError(xmlGenericErrorContext, 8187 "PP: Parsing XML Decl\n"); 8188#endif 8189 xmlParseXMLDecl(ctxt); 8190 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8191 /* 8192 * The XML REC instructs us to stop parsing right 8193 * here 8194 */ 8195 ctxt->instate = XML_PARSER_EOF; 8196 return(0); 8197 } 8198 ctxt->standalone = ctxt->input->standalone; 8199 if ((ctxt->encoding == NULL) && 8200 (ctxt->input->encoding != NULL)) 8201 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 8202 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8203 (!ctxt->disableSAX)) 8204 ctxt->sax->startDocument(ctxt->userData); 8205 ctxt->instate = XML_PARSER_MISC; 8206#ifdef DEBUG_PUSH 8207 xmlGenericError(xmlGenericErrorContext, 8208 "PP: entering MISC\n"); 8209#endif 8210 } else { 8211 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8212 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8213 (!ctxt->disableSAX)) 8214 ctxt->sax->startDocument(ctxt->userData); 8215 ctxt->instate = XML_PARSER_MISC; 8216#ifdef DEBUG_PUSH 8217 xmlGenericError(xmlGenericErrorContext, 8218 "PP: entering MISC\n"); 8219#endif 8220 } 8221 } else { 8222 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8223 ctxt->sax->setDocumentLocator(ctxt->userData, 8224 &xmlDefaultSAXLocator); 8225 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8226 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8227 (!ctxt->disableSAX)) 8228 ctxt->sax->startDocument(ctxt->userData); 8229 ctxt->instate = XML_PARSER_MISC; 8230#ifdef DEBUG_PUSH 8231 xmlGenericError(xmlGenericErrorContext, 8232 "PP: entering MISC\n"); 8233#endif 8234 } 8235 break; 8236 case XML_PARSER_MISC: 8237 SKIP_BLANKS; 8238 if (ctxt->input->buf == NULL) 8239 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8240 else 8241 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8242 if (avail < 2) 8243 goto done; 8244 cur = ctxt->input->cur[0]; 8245 next = ctxt->input->cur[1]; 8246 if ((cur == '<') && (next == '?')) { 8247 if ((!terminate) && 8248 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8249 goto done; 8250#ifdef DEBUG_PUSH 8251 xmlGenericError(xmlGenericErrorContext, 8252 "PP: Parsing PI\n"); 8253#endif 8254 xmlParsePI(ctxt); 8255 } else if ((cur == '<') && (next == '!') && 8256 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8257 if ((!terminate) && 8258 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8259 goto done; 8260#ifdef DEBUG_PUSH 8261 xmlGenericError(xmlGenericErrorContext, 8262 "PP: Parsing Comment\n"); 8263#endif 8264 xmlParseComment(ctxt); 8265 ctxt->instate = XML_PARSER_MISC; 8266 } else if ((cur == '<') && (next == '!') && 8267 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 8268 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 8269 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 8270 (ctxt->input->cur[8] == 'E')) { 8271 if ((!terminate) && 8272 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8273 goto done; 8274#ifdef DEBUG_PUSH 8275 xmlGenericError(xmlGenericErrorContext, 8276 "PP: Parsing internal subset\n"); 8277#endif 8278 ctxt->inSubset = 1; 8279 xmlParseDocTypeDecl(ctxt); 8280 if (RAW == '[') { 8281 ctxt->instate = XML_PARSER_DTD; 8282#ifdef DEBUG_PUSH 8283 xmlGenericError(xmlGenericErrorContext, 8284 "PP: entering DTD\n"); 8285#endif 8286 } else { 8287 /* 8288 * Create and update the external subset. 8289 */ 8290 ctxt->inSubset = 2; 8291 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8292 (ctxt->sax->externalSubset != NULL)) 8293 ctxt->sax->externalSubset(ctxt->userData, 8294 ctxt->intSubName, ctxt->extSubSystem, 8295 ctxt->extSubURI); 8296 ctxt->inSubset = 0; 8297 ctxt->instate = XML_PARSER_PROLOG; 8298#ifdef DEBUG_PUSH 8299 xmlGenericError(xmlGenericErrorContext, 8300 "PP: entering PROLOG\n"); 8301#endif 8302 } 8303 } else if ((cur == '<') && (next == '!') && 8304 (avail < 9)) { 8305 goto done; 8306 } else { 8307 ctxt->instate = XML_PARSER_START_TAG; 8308#ifdef DEBUG_PUSH 8309 xmlGenericError(xmlGenericErrorContext, 8310 "PP: entering START_TAG\n"); 8311#endif 8312 } 8313 break; 8314 case XML_PARSER_IGNORE: 8315 xmlGenericError(xmlGenericErrorContext, 8316 "PP: internal error, state == IGNORE"); 8317 ctxt->instate = XML_PARSER_DTD; 8318#ifdef DEBUG_PUSH 8319 xmlGenericError(xmlGenericErrorContext, 8320 "PP: entering DTD\n"); 8321#endif 8322 break; 8323 case XML_PARSER_PROLOG: 8324 SKIP_BLANKS; 8325 if (ctxt->input->buf == NULL) 8326 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8327 else 8328 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8329 if (avail < 2) 8330 goto done; 8331 cur = ctxt->input->cur[0]; 8332 next = ctxt->input->cur[1]; 8333 if ((cur == '<') && (next == '?')) { 8334 if ((!terminate) && 8335 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8336 goto done; 8337#ifdef DEBUG_PUSH 8338 xmlGenericError(xmlGenericErrorContext, 8339 "PP: Parsing PI\n"); 8340#endif 8341 xmlParsePI(ctxt); 8342 } else if ((cur == '<') && (next == '!') && 8343 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8344 if ((!terminate) && 8345 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8346 goto done; 8347#ifdef DEBUG_PUSH 8348 xmlGenericError(xmlGenericErrorContext, 8349 "PP: Parsing Comment\n"); 8350#endif 8351 xmlParseComment(ctxt); 8352 ctxt->instate = XML_PARSER_PROLOG; 8353 } else if ((cur == '<') && (next == '!') && 8354 (avail < 4)) { 8355 goto done; 8356 } else { 8357 ctxt->instate = XML_PARSER_START_TAG; 8358#ifdef DEBUG_PUSH 8359 xmlGenericError(xmlGenericErrorContext, 8360 "PP: entering START_TAG\n"); 8361#endif 8362 } 8363 break; 8364 case XML_PARSER_EPILOG: 8365 SKIP_BLANKS; 8366 if (ctxt->input->buf == NULL) 8367 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8368 else 8369 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8370 if (avail < 2) 8371 goto done; 8372 cur = ctxt->input->cur[0]; 8373 next = ctxt->input->cur[1]; 8374 if ((cur == '<') && (next == '?')) { 8375 if ((!terminate) && 8376 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8377 goto done; 8378#ifdef DEBUG_PUSH 8379 xmlGenericError(xmlGenericErrorContext, 8380 "PP: Parsing PI\n"); 8381#endif 8382 xmlParsePI(ctxt); 8383 ctxt->instate = XML_PARSER_EPILOG; 8384 } else if ((cur == '<') && (next == '!') && 8385 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8386 if ((!terminate) && 8387 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8388 goto done; 8389#ifdef DEBUG_PUSH 8390 xmlGenericError(xmlGenericErrorContext, 8391 "PP: Parsing Comment\n"); 8392#endif 8393 xmlParseComment(ctxt); 8394 ctxt->instate = XML_PARSER_EPILOG; 8395 } else if ((cur == '<') && (next == '!') && 8396 (avail < 4)) { 8397 goto done; 8398 } else { 8399 ctxt->errNo = XML_ERR_DOCUMENT_END; 8400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8401 ctxt->sax->error(ctxt->userData, 8402 "Extra content at the end of the document\n"); 8403 ctxt->wellFormed = 0; 8404 ctxt->disableSAX = 1; 8405 ctxt->instate = XML_PARSER_EOF; 8406#ifdef DEBUG_PUSH 8407 xmlGenericError(xmlGenericErrorContext, 8408 "PP: entering EOF\n"); 8409#endif 8410 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8411 ctxt->sax->endDocument(ctxt->userData); 8412 goto done; 8413 } 8414 break; 8415 case XML_PARSER_START_TAG: { 8416 xmlChar *name, *oldname; 8417 8418 if ((avail < 2) && (ctxt->inputNr == 1)) 8419 goto done; 8420 cur = ctxt->input->cur[0]; 8421 if (cur != '<') { 8422 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8424 ctxt->sax->error(ctxt->userData, 8425 "Start tag expect, '<' not found\n"); 8426 ctxt->wellFormed = 0; 8427 ctxt->disableSAX = 1; 8428 ctxt->instate = XML_PARSER_EOF; 8429#ifdef DEBUG_PUSH 8430 xmlGenericError(xmlGenericErrorContext, 8431 "PP: entering EOF\n"); 8432#endif 8433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8434 ctxt->sax->endDocument(ctxt->userData); 8435 goto done; 8436 } 8437 if ((!terminate) && 8438 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8439 goto done; 8440 if (ctxt->spaceNr == 0) 8441 spacePush(ctxt, -1); 8442 else 8443 spacePush(ctxt, *ctxt->space); 8444 name = xmlParseStartTag(ctxt); 8445 if (name == NULL) { 8446 spacePop(ctxt); 8447 ctxt->instate = XML_PARSER_EOF; 8448#ifdef DEBUG_PUSH 8449 xmlGenericError(xmlGenericErrorContext, 8450 "PP: entering EOF\n"); 8451#endif 8452 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8453 ctxt->sax->endDocument(ctxt->userData); 8454 goto done; 8455 } 8456 namePush(ctxt, xmlStrdup(name)); 8457 8458 /* 8459 * [ VC: Root Element Type ] 8460 * The Name in the document type declaration must match 8461 * the element type of the root element. 8462 */ 8463 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8464 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8465 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8466 8467 /* 8468 * Check for an Empty Element. 8469 */ 8470 if ((RAW == '/') && (NXT(1) == '>')) { 8471 SKIP(2); 8472 if ((ctxt->sax != NULL) && 8473 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 8474 ctxt->sax->endElement(ctxt->userData, name); 8475 xmlFree(name); 8476 oldname = namePop(ctxt); 8477 spacePop(ctxt); 8478 if (oldname != NULL) { 8479#ifdef DEBUG_STACK 8480 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8481#endif 8482 xmlFree(oldname); 8483 } 8484 if (ctxt->name == NULL) { 8485 ctxt->instate = XML_PARSER_EPILOG; 8486#ifdef DEBUG_PUSH 8487 xmlGenericError(xmlGenericErrorContext, 8488 "PP: entering EPILOG\n"); 8489#endif 8490 } else { 8491 ctxt->instate = XML_PARSER_CONTENT; 8492#ifdef DEBUG_PUSH 8493 xmlGenericError(xmlGenericErrorContext, 8494 "PP: entering CONTENT\n"); 8495#endif 8496 } 8497 break; 8498 } 8499 if (RAW == '>') { 8500 NEXT; 8501 } else { 8502 ctxt->errNo = XML_ERR_GT_REQUIRED; 8503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8504 ctxt->sax->error(ctxt->userData, 8505 "Couldn't find end of Start Tag %s\n", 8506 name); 8507 ctxt->wellFormed = 0; 8508 ctxt->disableSAX = 1; 8509 8510 /* 8511 * end of parsing of this node. 8512 */ 8513 nodePop(ctxt); 8514 oldname = namePop(ctxt); 8515 spacePop(ctxt); 8516 if (oldname != NULL) { 8517#ifdef DEBUG_STACK 8518 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8519#endif 8520 xmlFree(oldname); 8521 } 8522 } 8523 xmlFree(name); 8524 ctxt->instate = XML_PARSER_CONTENT; 8525#ifdef DEBUG_PUSH 8526 xmlGenericError(xmlGenericErrorContext, 8527 "PP: entering CONTENT\n"); 8528#endif 8529 break; 8530 } 8531 case XML_PARSER_CONTENT: { 8532 const xmlChar *test; 8533 int cons; 8534 if ((avail < 2) && (ctxt->inputNr == 1)) 8535 goto done; 8536 cur = ctxt->input->cur[0]; 8537 next = ctxt->input->cur[1]; 8538 8539 test = CUR_PTR; 8540 cons = ctxt->input->consumed; 8541 if ((cur == '<') && (next == '?')) { 8542 if ((!terminate) && 8543 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8544 goto done; 8545#ifdef DEBUG_PUSH 8546 xmlGenericError(xmlGenericErrorContext, 8547 "PP: Parsing PI\n"); 8548#endif 8549 xmlParsePI(ctxt); 8550 } else if ((cur == '<') && (next == '!') && 8551 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8552 if ((!terminate) && 8553 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8554 goto done; 8555#ifdef DEBUG_PUSH 8556 xmlGenericError(xmlGenericErrorContext, 8557 "PP: Parsing Comment\n"); 8558#endif 8559 xmlParseComment(ctxt); 8560 ctxt->instate = XML_PARSER_CONTENT; 8561 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8562 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8563 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8564 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8565 (ctxt->input->cur[8] == '[')) { 8566 SKIP(9); 8567 ctxt->instate = XML_PARSER_CDATA_SECTION; 8568#ifdef DEBUG_PUSH 8569 xmlGenericError(xmlGenericErrorContext, 8570 "PP: entering CDATA_SECTION\n"); 8571#endif 8572 break; 8573 } else if ((cur == '<') && (next == '!') && 8574 (avail < 9)) { 8575 goto done; 8576 } else if ((cur == '<') && (next == '/')) { 8577 ctxt->instate = XML_PARSER_END_TAG; 8578#ifdef DEBUG_PUSH 8579 xmlGenericError(xmlGenericErrorContext, 8580 "PP: entering END_TAG\n"); 8581#endif 8582 break; 8583 } else if (cur == '<') { 8584 ctxt->instate = XML_PARSER_START_TAG; 8585#ifdef DEBUG_PUSH 8586 xmlGenericError(xmlGenericErrorContext, 8587 "PP: entering START_TAG\n"); 8588#endif 8589 break; 8590 } else if (cur == '&') { 8591 if ((!terminate) && 8592 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8593 goto done; 8594#ifdef DEBUG_PUSH 8595 xmlGenericError(xmlGenericErrorContext, 8596 "PP: Parsing Reference\n"); 8597#endif 8598 xmlParseReference(ctxt); 8599 } else { 8600 /* TODO Avoid the extra copy, handle directly !!! */ 8601 /* 8602 * Goal of the following test is: 8603 * - minimize calls to the SAX 'character' callback 8604 * when they are mergeable 8605 * - handle an problem for isBlank when we only parse 8606 * a sequence of blank chars and the next one is 8607 * not available to check against '<' presence. 8608 * - tries to homogenize the differences in SAX 8609 * callbacks between the push and pull versions 8610 * of the parser. 8611 */ 8612 if ((ctxt->inputNr == 1) && 8613 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8614 if ((!terminate) && 8615 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8616 goto done; 8617 } 8618 ctxt->checkIndex = 0; 8619#ifdef DEBUG_PUSH 8620 xmlGenericError(xmlGenericErrorContext, 8621 "PP: Parsing char data\n"); 8622#endif 8623 xmlParseCharData(ctxt, 0); 8624 } 8625 /* 8626 * Pop-up of finished entities. 8627 */ 8628 while ((RAW == 0) && (ctxt->inputNr > 1)) 8629 xmlPopInput(ctxt); 8630 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8631 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8633 ctxt->sax->error(ctxt->userData, 8634 "detected an error in element content\n"); 8635 ctxt->wellFormed = 0; 8636 ctxt->disableSAX = 1; 8637 ctxt->instate = XML_PARSER_EOF; 8638 break; 8639 } 8640 break; 8641 } 8642 case XML_PARSER_CDATA_SECTION: { 8643 /* 8644 * The Push mode need to have the SAX callback for 8645 * cdataBlock merge back contiguous callbacks. 8646 */ 8647 int base; 8648 8649 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8650 if (base < 0) { 8651 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8652 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8653 if (ctxt->sax->cdataBlock != NULL) 8654 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8655 XML_PARSER_BIG_BUFFER_SIZE); 8656 } 8657 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8658 ctxt->checkIndex = 0; 8659 } 8660 goto done; 8661 } else { 8662 if ((ctxt->sax != NULL) && (base > 0) && 8663 (!ctxt->disableSAX)) { 8664 if (ctxt->sax->cdataBlock != NULL) 8665 ctxt->sax->cdataBlock(ctxt->userData, 8666 ctxt->input->cur, base); 8667 } 8668 SKIP(base + 3); 8669 ctxt->checkIndex = 0; 8670 ctxt->instate = XML_PARSER_CONTENT; 8671#ifdef DEBUG_PUSH 8672 xmlGenericError(xmlGenericErrorContext, 8673 "PP: entering CONTENT\n"); 8674#endif 8675 } 8676 break; 8677 } 8678 case XML_PARSER_END_TAG: 8679 if (avail < 2) 8680 goto done; 8681 if ((!terminate) && 8682 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8683 goto done; 8684 xmlParseEndTag(ctxt); 8685 if (ctxt->name == NULL) { 8686 ctxt->instate = XML_PARSER_EPILOG; 8687#ifdef DEBUG_PUSH 8688 xmlGenericError(xmlGenericErrorContext, 8689 "PP: entering EPILOG\n"); 8690#endif 8691 } else { 8692 ctxt->instate = XML_PARSER_CONTENT; 8693#ifdef DEBUG_PUSH 8694 xmlGenericError(xmlGenericErrorContext, 8695 "PP: entering CONTENT\n"); 8696#endif 8697 } 8698 break; 8699 case XML_PARSER_DTD: { 8700 /* 8701 * Sorry but progressive parsing of the internal subset 8702 * is not expected to be supported. We first check that 8703 * the full content of the internal subset is available and 8704 * the parsing is launched only at that point. 8705 * Internal subset ends up with "']' S? '>'" in an unescaped 8706 * section and not in a ']]>' sequence which are conditional 8707 * sections (whoever argued to keep that crap in XML deserve 8708 * a place in hell !). 8709 */ 8710 int base, i; 8711 xmlChar *buf; 8712 xmlChar quote = 0; 8713 8714 base = ctxt->input->cur - ctxt->input->base; 8715 if (base < 0) return(0); 8716 if (ctxt->checkIndex > base) 8717 base = ctxt->checkIndex; 8718 buf = ctxt->input->buf->buffer->content; 8719 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8720 base++) { 8721 if (quote != 0) { 8722 if (buf[base] == quote) 8723 quote = 0; 8724 continue; 8725 } 8726 if (buf[base] == '"') { 8727 quote = '"'; 8728 continue; 8729 } 8730 if (buf[base] == '\'') { 8731 quote = '\''; 8732 continue; 8733 } 8734 if (buf[base] == ']') { 8735 if ((unsigned int) base +1 >= 8736 ctxt->input->buf->buffer->use) 8737 break; 8738 if (buf[base + 1] == ']') { 8739 /* conditional crap, skip both ']' ! */ 8740 base++; 8741 continue; 8742 } 8743 for (i = 0; 8744 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8745 i++) { 8746 if (buf[base + i] == '>') 8747 goto found_end_int_subset; 8748 } 8749 break; 8750 } 8751 } 8752 /* 8753 * We didn't found the end of the Internal subset 8754 */ 8755 if (quote == 0) 8756 ctxt->checkIndex = base; 8757#ifdef DEBUG_PUSH 8758 if (next == 0) 8759 xmlGenericError(xmlGenericErrorContext, 8760 "PP: lookup of int subset end filed\n"); 8761#endif 8762 goto done; 8763 8764found_end_int_subset: 8765 xmlParseInternalSubset(ctxt); 8766 ctxt->inSubset = 2; 8767 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8768 (ctxt->sax->externalSubset != NULL)) 8769 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8770 ctxt->extSubSystem, ctxt->extSubURI); 8771 ctxt->inSubset = 0; 8772 ctxt->instate = XML_PARSER_PROLOG; 8773 ctxt->checkIndex = 0; 8774#ifdef DEBUG_PUSH 8775 xmlGenericError(xmlGenericErrorContext, 8776 "PP: entering PROLOG\n"); 8777#endif 8778 break; 8779 } 8780 case XML_PARSER_COMMENT: 8781 xmlGenericError(xmlGenericErrorContext, 8782 "PP: internal error, state == COMMENT\n"); 8783 ctxt->instate = XML_PARSER_CONTENT; 8784#ifdef DEBUG_PUSH 8785 xmlGenericError(xmlGenericErrorContext, 8786 "PP: entering CONTENT\n"); 8787#endif 8788 break; 8789 case XML_PARSER_PI: 8790 xmlGenericError(xmlGenericErrorContext, 8791 "PP: internal error, state == PI\n"); 8792 ctxt->instate = XML_PARSER_CONTENT; 8793#ifdef DEBUG_PUSH 8794 xmlGenericError(xmlGenericErrorContext, 8795 "PP: entering CONTENT\n"); 8796#endif 8797 break; 8798 case XML_PARSER_ENTITY_DECL: 8799 xmlGenericError(xmlGenericErrorContext, 8800 "PP: internal error, state == ENTITY_DECL\n"); 8801 ctxt->instate = XML_PARSER_DTD; 8802#ifdef DEBUG_PUSH 8803 xmlGenericError(xmlGenericErrorContext, 8804 "PP: entering DTD\n"); 8805#endif 8806 break; 8807 case XML_PARSER_ENTITY_VALUE: 8808 xmlGenericError(xmlGenericErrorContext, 8809 "PP: internal error, state == ENTITY_VALUE\n"); 8810 ctxt->instate = XML_PARSER_CONTENT; 8811#ifdef DEBUG_PUSH 8812 xmlGenericError(xmlGenericErrorContext, 8813 "PP: entering DTD\n"); 8814#endif 8815 break; 8816 case XML_PARSER_ATTRIBUTE_VALUE: 8817 xmlGenericError(xmlGenericErrorContext, 8818 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8819 ctxt->instate = XML_PARSER_START_TAG; 8820#ifdef DEBUG_PUSH 8821 xmlGenericError(xmlGenericErrorContext, 8822 "PP: entering START_TAG\n"); 8823#endif 8824 break; 8825 case XML_PARSER_SYSTEM_LITERAL: 8826 xmlGenericError(xmlGenericErrorContext, 8827 "PP: internal error, state == SYSTEM_LITERAL\n"); 8828 ctxt->instate = XML_PARSER_START_TAG; 8829#ifdef DEBUG_PUSH 8830 xmlGenericError(xmlGenericErrorContext, 8831 "PP: entering START_TAG\n"); 8832#endif 8833 break; 8834 case XML_PARSER_PUBLIC_LITERAL: 8835 xmlGenericError(xmlGenericErrorContext, 8836 "PP: internal error, state == PUBLIC_LITERAL\n"); 8837 ctxt->instate = XML_PARSER_START_TAG; 8838#ifdef DEBUG_PUSH 8839 xmlGenericError(xmlGenericErrorContext, 8840 "PP: entering START_TAG\n"); 8841#endif 8842 break; 8843 } 8844 } 8845done: 8846#ifdef DEBUG_PUSH 8847 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8848#endif 8849 return(ret); 8850} 8851 8852/** 8853 * xmlParseChunk: 8854 * @ctxt: an XML parser context 8855 * @chunk: an char array 8856 * @size: the size in byte of the chunk 8857 * @terminate: last chunk indicator 8858 * 8859 * Parse a Chunk of memory 8860 * 8861 * Returns zero if no error, the xmlParserErrors otherwise. 8862 */ 8863int 8864xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8865 int terminate) { 8866 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8867 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8868 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8869 int cur = ctxt->input->cur - ctxt->input->base; 8870 8871 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8872 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8873 ctxt->input->cur = ctxt->input->base + cur; 8874 ctxt->input->end = 8875 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8876#ifdef DEBUG_PUSH 8877 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8878#endif 8879 8880 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8881 xmlParseTryOrFinish(ctxt, terminate); 8882 } else if (ctxt->instate != XML_PARSER_EOF) { 8883 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8884 xmlParserInputBufferPtr in = ctxt->input->buf; 8885 if ((in->encoder != NULL) && (in->buffer != NULL) && 8886 (in->raw != NULL)) { 8887 int nbchars; 8888 8889 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8890 if (nbchars < 0) { 8891 xmlGenericError(xmlGenericErrorContext, 8892 "xmlParseChunk: encoder error\n"); 8893 return(XML_ERR_INVALID_ENCODING); 8894 } 8895 } 8896 } 8897 } 8898 xmlParseTryOrFinish(ctxt, terminate); 8899 if (terminate) { 8900 /* 8901 * Check for termination 8902 */ 8903 int avail = 0; 8904 if (ctxt->input->buf == NULL) 8905 avail = ctxt->input->length - 8906 (ctxt->input->cur - ctxt->input->base); 8907 else 8908 avail = ctxt->input->buf->buffer->use - 8909 (ctxt->input->cur - ctxt->input->base); 8910 8911 if ((ctxt->instate != XML_PARSER_EOF) && 8912 (ctxt->instate != XML_PARSER_EPILOG)) { 8913 ctxt->errNo = XML_ERR_DOCUMENT_END; 8914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8915 ctxt->sax->error(ctxt->userData, 8916 "Extra content at the end of the document\n"); 8917 ctxt->wellFormed = 0; 8918 ctxt->disableSAX = 1; 8919 } 8920 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 8921 ctxt->errNo = XML_ERR_DOCUMENT_END; 8922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8923 ctxt->sax->error(ctxt->userData, 8924 "Extra content at the end of the document\n"); 8925 ctxt->wellFormed = 0; 8926 ctxt->disableSAX = 1; 8927 8928 } 8929 if (ctxt->instate != XML_PARSER_EOF) { 8930 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8931 ctxt->sax->endDocument(ctxt->userData); 8932 } 8933 ctxt->instate = XML_PARSER_EOF; 8934 } 8935 return((xmlParserErrors) ctxt->errNo); 8936} 8937 8938/************************************************************************ 8939 * * 8940 * I/O front end functions to the parser * 8941 * * 8942 ************************************************************************/ 8943 8944/** 8945 * xmlStopParser: 8946 * @ctxt: an XML parser context 8947 * 8948 * Blocks further parser processing 8949 */ 8950void 8951xmlStopParser(xmlParserCtxtPtr ctxt) { 8952 ctxt->instate = XML_PARSER_EOF; 8953 if (ctxt->input != NULL) 8954 ctxt->input->cur = BAD_CAST""; 8955} 8956 8957/** 8958 * xmlCreatePushParserCtxt: 8959 * @sax: a SAX handler 8960 * @user_data: The user data returned on SAX callbacks 8961 * @chunk: a pointer to an array of chars 8962 * @size: number of chars in the array 8963 * @filename: an optional file name or URI 8964 * 8965 * Create a parser context for using the XML parser in push mode. 8966 * If @buffer and @size are non-NULL, the data is used to detect 8967 * the encoding. The remaining characters will be parsed so they 8968 * don't need to be fed in again through xmlParseChunk. 8969 * To allow content encoding detection, @size should be >= 4 8970 * The value of @filename is used for fetching external entities 8971 * and error/warning reports. 8972 * 8973 * Returns the new parser context or NULL 8974 */ 8975 8976xmlParserCtxtPtr 8977xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8978 const char *chunk, int size, const char *filename) { 8979 xmlParserCtxtPtr ctxt; 8980 xmlParserInputPtr inputStream; 8981 xmlParserInputBufferPtr buf; 8982 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8983 8984 /* 8985 * plug some encoding conversion routines 8986 */ 8987 if ((chunk != NULL) && (size >= 4)) 8988 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8989 8990 buf = xmlAllocParserInputBuffer(enc); 8991 if (buf == NULL) return(NULL); 8992 8993 ctxt = xmlNewParserCtxt(); 8994 if (ctxt == NULL) { 8995 xmlFree(buf); 8996 return(NULL); 8997 } 8998 if (sax != NULL) { 8999 if (ctxt->sax != &xmlDefaultSAXHandler) 9000 xmlFree(ctxt->sax); 9001 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9002 if (ctxt->sax == NULL) { 9003 xmlFree(buf); 9004 xmlFree(ctxt); 9005 return(NULL); 9006 } 9007 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9008 if (user_data != NULL) 9009 ctxt->userData = user_data; 9010 } 9011 if (filename == NULL) { 9012 ctxt->directory = NULL; 9013 } else { 9014 ctxt->directory = xmlParserGetDirectory(filename); 9015 } 9016 9017 inputStream = xmlNewInputStream(ctxt); 9018 if (inputStream == NULL) { 9019 xmlFreeParserCtxt(ctxt); 9020 return(NULL); 9021 } 9022 9023 if (filename == NULL) 9024 inputStream->filename = NULL; 9025 else 9026 inputStream->filename = (char *) 9027 xmlNormalizeWindowsPath((const xmlChar *) filename); 9028 inputStream->buf = buf; 9029 inputStream->base = inputStream->buf->buffer->content; 9030 inputStream->cur = inputStream->buf->buffer->content; 9031 inputStream->end = 9032 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 9033 9034 inputPush(ctxt, inputStream); 9035 9036 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9037 (ctxt->input->buf != NULL)) { 9038 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9039 int cur = ctxt->input->cur - ctxt->input->base; 9040 9041 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9042 9043 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9044 ctxt->input->cur = ctxt->input->base + cur; 9045 ctxt->input->end = 9046 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9047#ifdef DEBUG_PUSH 9048 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9049#endif 9050 } 9051 9052 if (enc != XML_CHAR_ENCODING_NONE) { 9053 xmlSwitchEncoding(ctxt, enc); 9054 } 9055 9056 return(ctxt); 9057} 9058 9059/** 9060 * xmlCreateIOParserCtxt: 9061 * @sax: a SAX handler 9062 * @user_data: The user data returned on SAX callbacks 9063 * @ioread: an I/O read function 9064 * @ioclose: an I/O close function 9065 * @ioctx: an I/O handler 9066 * @enc: the charset encoding if known 9067 * 9068 * Create a parser context for using the XML parser with an existing 9069 * I/O stream 9070 * 9071 * Returns the new parser context or NULL 9072 */ 9073xmlParserCtxtPtr 9074xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9075 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 9076 void *ioctx, xmlCharEncoding enc) { 9077 xmlParserCtxtPtr ctxt; 9078 xmlParserInputPtr inputStream; 9079 xmlParserInputBufferPtr buf; 9080 9081 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 9082 if (buf == NULL) return(NULL); 9083 9084 ctxt = xmlNewParserCtxt(); 9085 if (ctxt == NULL) { 9086 xmlFree(buf); 9087 return(NULL); 9088 } 9089 if (sax != NULL) { 9090 if (ctxt->sax != &xmlDefaultSAXHandler) 9091 xmlFree(ctxt->sax); 9092 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9093 if (ctxt->sax == NULL) { 9094 xmlFree(buf); 9095 xmlFree(ctxt); 9096 return(NULL); 9097 } 9098 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9099 if (user_data != NULL) 9100 ctxt->userData = user_data; 9101 } 9102 9103 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 9104 if (inputStream == NULL) { 9105 xmlFreeParserCtxt(ctxt); 9106 return(NULL); 9107 } 9108 inputPush(ctxt, inputStream); 9109 9110 return(ctxt); 9111} 9112 9113/************************************************************************ 9114 * * 9115 * Front ends when parsing a DTD * 9116 * * 9117 ************************************************************************/ 9118 9119/** 9120 * xmlIOParseDTD: 9121 * @sax: the SAX handler block or NULL 9122 * @input: an Input Buffer 9123 * @enc: the charset encoding if known 9124 * 9125 * Load and parse a DTD 9126 * 9127 * Returns the resulting xmlDtdPtr or NULL in case of error. 9128 * @input will be freed at parsing end. 9129 */ 9130 9131xmlDtdPtr 9132xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 9133 xmlCharEncoding enc) { 9134 xmlDtdPtr ret = NULL; 9135 xmlParserCtxtPtr ctxt; 9136 xmlParserInputPtr pinput = NULL; 9137 xmlChar start[4]; 9138 9139 if (input == NULL) 9140 return(NULL); 9141 9142 ctxt = xmlNewParserCtxt(); 9143 if (ctxt == NULL) { 9144 return(NULL); 9145 } 9146 9147 /* 9148 * Set-up the SAX context 9149 */ 9150 if (sax != NULL) { 9151 if (ctxt->sax != NULL) 9152 xmlFree(ctxt->sax); 9153 ctxt->sax = sax; 9154 ctxt->userData = NULL; 9155 } 9156 9157 /* 9158 * generate a parser input from the I/O handler 9159 */ 9160 9161 pinput = xmlNewIOInputStream(ctxt, input, enc); 9162 if (pinput == NULL) { 9163 if (sax != NULL) ctxt->sax = NULL; 9164 xmlFreeParserCtxt(ctxt); 9165 return(NULL); 9166 } 9167 9168 /* 9169 * plug some encoding conversion routines here. 9170 */ 9171 xmlPushInput(ctxt, pinput); 9172 9173 pinput->filename = NULL; 9174 pinput->line = 1; 9175 pinput->col = 1; 9176 pinput->base = ctxt->input->cur; 9177 pinput->cur = ctxt->input->cur; 9178 pinput->free = NULL; 9179 9180 /* 9181 * let's parse that entity knowing it's an external subset. 9182 */ 9183 ctxt->inSubset = 2; 9184 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9185 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9186 BAD_CAST "none", BAD_CAST "none"); 9187 9188 if (enc == XML_CHAR_ENCODING_NONE) { 9189 /* 9190 * Get the 4 first bytes and decode the charset 9191 * if enc != XML_CHAR_ENCODING_NONE 9192 * plug some encoding conversion routines. 9193 */ 9194 start[0] = RAW; 9195 start[1] = NXT(1); 9196 start[2] = NXT(2); 9197 start[3] = NXT(3); 9198 enc = xmlDetectCharEncoding(start, 4); 9199 if (enc != XML_CHAR_ENCODING_NONE) { 9200 xmlSwitchEncoding(ctxt, enc); 9201 } 9202 } 9203 9204 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 9205 9206 if (ctxt->myDoc != NULL) { 9207 if (ctxt->wellFormed) { 9208 ret = ctxt->myDoc->extSubset; 9209 ctxt->myDoc->extSubset = NULL; 9210 } else { 9211 ret = NULL; 9212 } 9213 xmlFreeDoc(ctxt->myDoc); 9214 ctxt->myDoc = NULL; 9215 } 9216 if (sax != NULL) ctxt->sax = NULL; 9217 xmlFreeParserCtxt(ctxt); 9218 9219 return(ret); 9220} 9221 9222/** 9223 * xmlSAXParseDTD: 9224 * @sax: the SAX handler block 9225 * @ExternalID: a NAME* containing the External ID of the DTD 9226 * @SystemID: a NAME* containing the URL to the DTD 9227 * 9228 * Load and parse an external subset. 9229 * 9230 * Returns the resulting xmlDtdPtr or NULL in case of error. 9231 */ 9232 9233xmlDtdPtr 9234xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 9235 const xmlChar *SystemID) { 9236 xmlDtdPtr ret = NULL; 9237 xmlParserCtxtPtr ctxt; 9238 xmlParserInputPtr input = NULL; 9239 xmlCharEncoding enc; 9240 9241 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 9242 9243 ctxt = xmlNewParserCtxt(); 9244 if (ctxt == NULL) { 9245 return(NULL); 9246 } 9247 9248 /* 9249 * Set-up the SAX context 9250 */ 9251 if (sax != NULL) { 9252 if (ctxt->sax != NULL) 9253 xmlFree(ctxt->sax); 9254 ctxt->sax = sax; 9255 ctxt->userData = NULL; 9256 } 9257 9258 /* 9259 * Ask the Entity resolver to load the damn thing 9260 */ 9261 9262 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 9263 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 9264 if (input == NULL) { 9265 if (sax != NULL) ctxt->sax = NULL; 9266 xmlFreeParserCtxt(ctxt); 9267 return(NULL); 9268 } 9269 9270 /* 9271 * plug some encoding conversion routines here. 9272 */ 9273 xmlPushInput(ctxt, input); 9274 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 9275 xmlSwitchEncoding(ctxt, enc); 9276 9277 if (input->filename == NULL) 9278 input->filename = (char *) xmlStrdup(SystemID); 9279 input->line = 1; 9280 input->col = 1; 9281 input->base = ctxt->input->cur; 9282 input->cur = ctxt->input->cur; 9283 input->free = NULL; 9284 9285 /* 9286 * let's parse that entity knowing it's an external subset. 9287 */ 9288 ctxt->inSubset = 2; 9289 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9290 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9291 ExternalID, SystemID); 9292 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 9293 9294 if (ctxt->myDoc != NULL) { 9295 if (ctxt->wellFormed) { 9296 ret = ctxt->myDoc->extSubset; 9297 ctxt->myDoc->extSubset = NULL; 9298 } else { 9299 ret = NULL; 9300 } 9301 xmlFreeDoc(ctxt->myDoc); 9302 ctxt->myDoc = NULL; 9303 } 9304 if (sax != NULL) ctxt->sax = NULL; 9305 xmlFreeParserCtxt(ctxt); 9306 9307 return(ret); 9308} 9309 9310/** 9311 * xmlParseDTD: 9312 * @ExternalID: a NAME* containing the External ID of the DTD 9313 * @SystemID: a NAME* containing the URL to the DTD 9314 * 9315 * Load and parse an external subset. 9316 * 9317 * Returns the resulting xmlDtdPtr or NULL in case of error. 9318 */ 9319 9320xmlDtdPtr 9321xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 9322 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 9323} 9324 9325/************************************************************************ 9326 * * 9327 * Front ends when parsing an Entity * 9328 * * 9329 ************************************************************************/ 9330 9331/** 9332 * xmlParseCtxtExternalEntity: 9333 * @ctx: the existing parsing context 9334 * @URL: the URL for the entity to load 9335 * @ID: the System ID for the entity to load 9336 * @lst: the return value for the set of parsed nodes 9337 * 9338 * Parse an external general entity within an existing parsing context 9339 * An external general parsed entity is well-formed if it matches the 9340 * production labeled extParsedEnt. 9341 * 9342 * [78] extParsedEnt ::= TextDecl? content 9343 * 9344 * Returns 0 if the entity is well formed, -1 in case of args problem and 9345 * the parser error code otherwise 9346 */ 9347 9348int 9349xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 9350 const xmlChar *ID, xmlNodePtr *lst) { 9351 xmlParserCtxtPtr ctxt; 9352 xmlDocPtr newDoc; 9353 xmlSAXHandlerPtr oldsax = NULL; 9354 int ret = 0; 9355 xmlChar start[4]; 9356 xmlCharEncoding enc; 9357 9358 if (ctx->depth > 40) { 9359 return(XML_ERR_ENTITY_LOOP); 9360 } 9361 9362 if (lst != NULL) 9363 *lst = NULL; 9364 if ((URL == NULL) && (ID == NULL)) 9365 return(-1); 9366 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 9367 return(-1); 9368 9369 9370 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9371 if (ctxt == NULL) return(-1); 9372 ctxt->userData = ctxt; 9373 oldsax = ctxt->sax; 9374 ctxt->sax = ctx->sax; 9375 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9376 if (newDoc == NULL) { 9377 xmlFreeParserCtxt(ctxt); 9378 return(-1); 9379 } 9380 if (ctx->myDoc != NULL) { 9381 newDoc->intSubset = ctx->myDoc->intSubset; 9382 newDoc->extSubset = ctx->myDoc->extSubset; 9383 } 9384 if (ctx->myDoc->URL != NULL) { 9385 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 9386 } 9387 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9388 if (newDoc->children == NULL) { 9389 ctxt->sax = oldsax; 9390 xmlFreeParserCtxt(ctxt); 9391 newDoc->intSubset = NULL; 9392 newDoc->extSubset = NULL; 9393 xmlFreeDoc(newDoc); 9394 return(-1); 9395 } 9396 nodePush(ctxt, newDoc->children); 9397 if (ctx->myDoc == NULL) { 9398 ctxt->myDoc = newDoc; 9399 } else { 9400 ctxt->myDoc = ctx->myDoc; 9401 newDoc->children->doc = ctx->myDoc; 9402 } 9403 9404 /* 9405 * Get the 4 first bytes and decode the charset 9406 * if enc != XML_CHAR_ENCODING_NONE 9407 * plug some encoding conversion routines. 9408 */ 9409 GROW 9410 start[0] = RAW; 9411 start[1] = NXT(1); 9412 start[2] = NXT(2); 9413 start[3] = NXT(3); 9414 enc = xmlDetectCharEncoding(start, 4); 9415 if (enc != XML_CHAR_ENCODING_NONE) { 9416 xmlSwitchEncoding(ctxt, enc); 9417 } 9418 9419 /* 9420 * Parse a possible text declaration first 9421 */ 9422 if ((RAW == '<') && (NXT(1) == '?') && 9423 (NXT(2) == 'x') && (NXT(3) == 'm') && 9424 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9425 xmlParseTextDecl(ctxt); 9426 } 9427 9428 /* 9429 * Doing validity checking on chunk doesn't make sense 9430 */ 9431 ctxt->instate = XML_PARSER_CONTENT; 9432 ctxt->validate = ctx->validate; 9433 ctxt->loadsubset = ctx->loadsubset; 9434 ctxt->depth = ctx->depth + 1; 9435 ctxt->replaceEntities = ctx->replaceEntities; 9436 if (ctxt->validate) { 9437 ctxt->vctxt.error = ctx->vctxt.error; 9438 ctxt->vctxt.warning = ctx->vctxt.warning; 9439 } else { 9440 ctxt->vctxt.error = NULL; 9441 ctxt->vctxt.warning = NULL; 9442 } 9443 ctxt->vctxt.nodeTab = NULL; 9444 ctxt->vctxt.nodeNr = 0; 9445 ctxt->vctxt.nodeMax = 0; 9446 ctxt->vctxt.node = NULL; 9447 9448 xmlParseContent(ctxt); 9449 9450 if ((RAW == '<') && (NXT(1) == '/')) { 9451 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9453 ctxt->sax->error(ctxt->userData, 9454 "chunk is not well balanced\n"); 9455 ctxt->wellFormed = 0; 9456 ctxt->disableSAX = 1; 9457 } else if (RAW != 0) { 9458 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9460 ctxt->sax->error(ctxt->userData, 9461 "extra content at the end of well balanced chunk\n"); 9462 ctxt->wellFormed = 0; 9463 ctxt->disableSAX = 1; 9464 } 9465 if (ctxt->node != newDoc->children) { 9466 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9468 ctxt->sax->error(ctxt->userData, 9469 "chunk is not well balanced\n"); 9470 ctxt->wellFormed = 0; 9471 ctxt->disableSAX = 1; 9472 } 9473 9474 if (!ctxt->wellFormed) { 9475 if (ctxt->errNo == 0) 9476 ret = 1; 9477 else 9478 ret = ctxt->errNo; 9479 } else { 9480 if (lst != NULL) { 9481 xmlNodePtr cur; 9482 9483 /* 9484 * Return the newly created nodeset after unlinking it from 9485 * they pseudo parent. 9486 */ 9487 cur = newDoc->children->children; 9488 *lst = cur; 9489 while (cur != NULL) { 9490 cur->parent = NULL; 9491 cur = cur->next; 9492 } 9493 newDoc->children->children = NULL; 9494 } 9495 ret = 0; 9496 } 9497 ctxt->sax = oldsax; 9498 xmlFreeParserCtxt(ctxt); 9499 newDoc->intSubset = NULL; 9500 newDoc->extSubset = NULL; 9501 xmlFreeDoc(newDoc); 9502 9503 return(ret); 9504} 9505 9506/** 9507 * xmlParseExternalEntityPrivate: 9508 * @doc: the document the chunk pertains to 9509 * @oldctxt: the previous parser context if available 9510 * @sax: the SAX handler bloc (possibly NULL) 9511 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9512 * @depth: Used for loop detection, use 0 9513 * @URL: the URL for the entity to load 9514 * @ID: the System ID for the entity to load 9515 * @list: the return value for the set of parsed nodes 9516 * 9517 * Private version of xmlParseExternalEntity() 9518 * 9519 * Returns 0 if the entity is well formed, -1 in case of args problem and 9520 * the parser error code otherwise 9521 */ 9522 9523static int 9524xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 9525 xmlSAXHandlerPtr sax, 9526 void *user_data, int depth, const xmlChar *URL, 9527 const xmlChar *ID, xmlNodePtr *list) { 9528 xmlParserCtxtPtr ctxt; 9529 xmlDocPtr newDoc; 9530 xmlSAXHandlerPtr oldsax = NULL; 9531 int ret = 0; 9532 xmlChar start[4]; 9533 xmlCharEncoding enc; 9534 9535 if (depth > 40) { 9536 return(XML_ERR_ENTITY_LOOP); 9537 } 9538 9539 9540 9541 if (list != NULL) 9542 *list = NULL; 9543 if ((URL == NULL) && (ID == NULL)) 9544 return(-1); 9545 if (doc == NULL) /* @@ relax but check for dereferences */ 9546 return(-1); 9547 9548 9549 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9550 if (ctxt == NULL) return(-1); 9551 ctxt->userData = ctxt; 9552 if (oldctxt != NULL) { 9553 ctxt->_private = oldctxt->_private; 9554 ctxt->loadsubset = oldctxt->loadsubset; 9555 ctxt->validate = oldctxt->validate; 9556 ctxt->external = oldctxt->external; 9557 } else { 9558 /* 9559 * Doing validity checking on chunk without context 9560 * doesn't make sense 9561 */ 9562 ctxt->_private = NULL; 9563 ctxt->validate = 0; 9564 ctxt->external = 2; 9565 ctxt->loadsubset = 0; 9566 } 9567 if (sax != NULL) { 9568 oldsax = ctxt->sax; 9569 ctxt->sax = sax; 9570 if (user_data != NULL) 9571 ctxt->userData = user_data; 9572 } 9573 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9574 if (newDoc == NULL) { 9575 xmlFreeParserCtxt(ctxt); 9576 return(-1); 9577 } 9578 if (doc != NULL) { 9579 newDoc->intSubset = doc->intSubset; 9580 newDoc->extSubset = doc->extSubset; 9581 } 9582 if (doc->URL != NULL) { 9583 newDoc->URL = xmlStrdup(doc->URL); 9584 } 9585 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9586 if (newDoc->children == NULL) { 9587 if (sax != NULL) 9588 ctxt->sax = oldsax; 9589 xmlFreeParserCtxt(ctxt); 9590 newDoc->intSubset = NULL; 9591 newDoc->extSubset = NULL; 9592 xmlFreeDoc(newDoc); 9593 return(-1); 9594 } 9595 nodePush(ctxt, newDoc->children); 9596 if (doc == NULL) { 9597 ctxt->myDoc = newDoc; 9598 } else { 9599 ctxt->myDoc = doc; 9600 newDoc->children->doc = doc; 9601 } 9602 9603 /* 9604 * Get the 4 first bytes and decode the charset 9605 * if enc != XML_CHAR_ENCODING_NONE 9606 * plug some encoding conversion routines. 9607 */ 9608 GROW; 9609 start[0] = RAW; 9610 start[1] = NXT(1); 9611 start[2] = NXT(2); 9612 start[3] = NXT(3); 9613 enc = xmlDetectCharEncoding(start, 4); 9614 if (enc != XML_CHAR_ENCODING_NONE) { 9615 xmlSwitchEncoding(ctxt, enc); 9616 } 9617 9618 /* 9619 * Parse a possible text declaration first 9620 */ 9621 if ((RAW == '<') && (NXT(1) == '?') && 9622 (NXT(2) == 'x') && (NXT(3) == 'm') && 9623 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9624 xmlParseTextDecl(ctxt); 9625 } 9626 9627 ctxt->instate = XML_PARSER_CONTENT; 9628 ctxt->depth = depth; 9629 9630 xmlParseContent(ctxt); 9631 9632 if ((RAW == '<') && (NXT(1) == '/')) { 9633 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9635 ctxt->sax->error(ctxt->userData, 9636 "chunk is not well balanced\n"); 9637 ctxt->wellFormed = 0; 9638 ctxt->disableSAX = 1; 9639 } else if (RAW != 0) { 9640 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9642 ctxt->sax->error(ctxt->userData, 9643 "extra content at the end of well balanced chunk\n"); 9644 ctxt->wellFormed = 0; 9645 ctxt->disableSAX = 1; 9646 } 9647 if (ctxt->node != newDoc->children) { 9648 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9650 ctxt->sax->error(ctxt->userData, 9651 "chunk is not well balanced\n"); 9652 ctxt->wellFormed = 0; 9653 ctxt->disableSAX = 1; 9654 } 9655 9656 if (!ctxt->wellFormed) { 9657 if (ctxt->errNo == 0) 9658 ret = 1; 9659 else 9660 ret = ctxt->errNo; 9661 } else { 9662 if (list != NULL) { 9663 xmlNodePtr cur; 9664 9665 /* 9666 * Return the newly created nodeset after unlinking it from 9667 * they pseudo parent. 9668 */ 9669 cur = newDoc->children->children; 9670 *list = cur; 9671 while (cur != NULL) { 9672 cur->parent = NULL; 9673 cur = cur->next; 9674 } 9675 newDoc->children->children = NULL; 9676 } 9677 ret = 0; 9678 } 9679 if (sax != NULL) 9680 ctxt->sax = oldsax; 9681 xmlFreeParserCtxt(ctxt); 9682 newDoc->intSubset = NULL; 9683 newDoc->extSubset = NULL; 9684 xmlFreeDoc(newDoc); 9685 9686 return(ret); 9687} 9688 9689/** 9690 * xmlParseExternalEntity: 9691 * @doc: the document the chunk pertains to 9692 * @sax: the SAX handler bloc (possibly NULL) 9693 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9694 * @depth: Used for loop detection, use 0 9695 * @URL: the URL for the entity to load 9696 * @ID: the System ID for the entity to load 9697 * @lst: the return value for the set of parsed nodes 9698 * 9699 * Parse an external general entity 9700 * An external general parsed entity is well-formed if it matches the 9701 * production labeled extParsedEnt. 9702 * 9703 * [78] extParsedEnt ::= TextDecl? content 9704 * 9705 * Returns 0 if the entity is well formed, -1 in case of args problem and 9706 * the parser error code otherwise 9707 */ 9708 9709int 9710xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 9711 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 9712 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 9713 ID, lst)); 9714} 9715 9716/** 9717 * xmlParseBalancedChunkMemory: 9718 * @doc: the document the chunk pertains to 9719 * @sax: the SAX handler bloc (possibly NULL) 9720 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9721 * @depth: Used for loop detection, use 0 9722 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9723 * @lst: the return value for the set of parsed nodes 9724 * 9725 * Parse a well-balanced chunk of an XML document 9726 * called by the parser 9727 * The allowed sequence for the Well Balanced Chunk is the one defined by 9728 * the content production in the XML grammar: 9729 * 9730 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9731 * 9732 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9733 * the parser error code otherwise 9734 */ 9735 9736int 9737xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9738 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 9739 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 9740 depth, string, lst, 0 ); 9741} 9742 9743/** 9744 * xmlParseBalancedChunkMemoryRecover: 9745 * @doc: the document the chunk pertains to 9746 * @sax: the SAX handler bloc (possibly NULL) 9747 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9748 * @depth: Used for loop detection, use 0 9749 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9750 * @lst: the return value for the set of parsed nodes 9751 * @recover: return nodes even if the data is broken (use 0) 9752 * 9753 * 9754 * Parse a well-balanced chunk of an XML document 9755 * called by the parser 9756 * The allowed sequence for the Well Balanced Chunk is the one defined by 9757 * the content production in the XML grammar: 9758 * 9759 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9760 * 9761 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9762 * the parser error code otherwise 9763 * 9764 * In case recover is set to 1, the nodelist will not be empty even if 9765 * the parsed chunk is not well balanced. 9766 */ 9767int 9768xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9769 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 9770 int recover) { 9771 xmlParserCtxtPtr ctxt; 9772 xmlDocPtr newDoc; 9773 xmlSAXHandlerPtr oldsax = NULL; 9774 xmlNodePtr content; 9775 int size; 9776 int ret = 0; 9777 9778 if (depth > 40) { 9779 return(XML_ERR_ENTITY_LOOP); 9780 } 9781 9782 9783 if (lst != NULL) 9784 *lst = NULL; 9785 if (string == NULL) 9786 return(-1); 9787 9788 size = xmlStrlen(string); 9789 9790 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9791 if (ctxt == NULL) return(-1); 9792 ctxt->userData = ctxt; 9793 if (sax != NULL) { 9794 oldsax = ctxt->sax; 9795 ctxt->sax = sax; 9796 if (user_data != NULL) 9797 ctxt->userData = user_data; 9798 } 9799 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9800 if (newDoc == NULL) { 9801 xmlFreeParserCtxt(ctxt); 9802 return(-1); 9803 } 9804 if (doc != NULL) { 9805 newDoc->intSubset = doc->intSubset; 9806 newDoc->extSubset = doc->extSubset; 9807 } 9808 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9809 if (newDoc->children == NULL) { 9810 if (sax != NULL) 9811 ctxt->sax = oldsax; 9812 xmlFreeParserCtxt(ctxt); 9813 newDoc->intSubset = NULL; 9814 newDoc->extSubset = NULL; 9815 xmlFreeDoc(newDoc); 9816 return(-1); 9817 } 9818 nodePush(ctxt, newDoc->children); 9819 if (doc == NULL) { 9820 ctxt->myDoc = newDoc; 9821 } else { 9822 ctxt->myDoc = newDoc; 9823 newDoc->children->doc = doc; 9824 } 9825 ctxt->instate = XML_PARSER_CONTENT; 9826 ctxt->depth = depth; 9827 9828 /* 9829 * Doing validity checking on chunk doesn't make sense 9830 */ 9831 ctxt->validate = 0; 9832 ctxt->loadsubset = 0; 9833 9834 if ( doc != NULL ){ 9835 content = doc->children; 9836 doc->children = NULL; 9837 xmlParseContent(ctxt); 9838 doc->children = content; 9839 } 9840 else { 9841 xmlParseContent(ctxt); 9842 } 9843 if ((RAW == '<') && (NXT(1) == '/')) { 9844 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9846 ctxt->sax->error(ctxt->userData, 9847 "chunk is not well balanced\n"); 9848 ctxt->wellFormed = 0; 9849 ctxt->disableSAX = 1; 9850 } else if (RAW != 0) { 9851 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9853 ctxt->sax->error(ctxt->userData, 9854 "extra content at the end of well balanced chunk\n"); 9855 ctxt->wellFormed = 0; 9856 ctxt->disableSAX = 1; 9857 } 9858 if (ctxt->node != newDoc->children) { 9859 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9861 ctxt->sax->error(ctxt->userData, 9862 "chunk is not well balanced\n"); 9863 ctxt->wellFormed = 0; 9864 ctxt->disableSAX = 1; 9865 } 9866 9867 if (!ctxt->wellFormed) { 9868 if (ctxt->errNo == 0) 9869 ret = 1; 9870 else 9871 ret = ctxt->errNo; 9872 } else { 9873 ret = 0; 9874 } 9875 9876 if (lst != NULL && (ret == 0 || recover == 1)) { 9877 xmlNodePtr cur; 9878 9879 /* 9880 * Return the newly created nodeset after unlinking it from 9881 * they pseudo parent. 9882 */ 9883 cur = newDoc->children->children; 9884 *lst = cur; 9885 while (cur != NULL) { 9886 cur->parent = NULL; 9887 cur = cur->next; 9888 } 9889 newDoc->children->children = NULL; 9890 } 9891 9892 if (sax != NULL) 9893 ctxt->sax = oldsax; 9894 xmlFreeParserCtxt(ctxt); 9895 newDoc->intSubset = NULL; 9896 newDoc->extSubset = NULL; 9897 xmlFreeDoc(newDoc); 9898 9899 return(ret); 9900} 9901 9902/** 9903 * xmlSAXParseEntity: 9904 * @sax: the SAX handler block 9905 * @filename: the filename 9906 * 9907 * parse an XML external entity out of context and build a tree. 9908 * It use the given SAX function block to handle the parsing callback. 9909 * If sax is NULL, fallback to the default DOM tree building routines. 9910 * 9911 * [78] extParsedEnt ::= TextDecl? content 9912 * 9913 * This correspond to a "Well Balanced" chunk 9914 * 9915 * Returns the resulting document tree 9916 */ 9917 9918xmlDocPtr 9919xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9920 xmlDocPtr ret; 9921 xmlParserCtxtPtr ctxt; 9922 char *directory = NULL; 9923 9924 ctxt = xmlCreateFileParserCtxt(filename); 9925 if (ctxt == NULL) { 9926 return(NULL); 9927 } 9928 if (sax != NULL) { 9929 if (ctxt->sax != NULL) 9930 xmlFree(ctxt->sax); 9931 ctxt->sax = sax; 9932 ctxt->userData = NULL; 9933 } 9934 9935 if ((ctxt->directory == NULL) && (directory == NULL)) 9936 directory = xmlParserGetDirectory(filename); 9937 9938 xmlParseExtParsedEnt(ctxt); 9939 9940 if (ctxt->wellFormed) 9941 ret = ctxt->myDoc; 9942 else { 9943 ret = NULL; 9944 xmlFreeDoc(ctxt->myDoc); 9945 ctxt->myDoc = NULL; 9946 } 9947 if (sax != NULL) 9948 ctxt->sax = NULL; 9949 xmlFreeParserCtxt(ctxt); 9950 9951 return(ret); 9952} 9953 9954/** 9955 * xmlParseEntity: 9956 * @filename: the filename 9957 * 9958 * parse an XML external entity out of context and build a tree. 9959 * 9960 * [78] extParsedEnt ::= TextDecl? content 9961 * 9962 * This correspond to a "Well Balanced" chunk 9963 * 9964 * Returns the resulting document tree 9965 */ 9966 9967xmlDocPtr 9968xmlParseEntity(const char *filename) { 9969 return(xmlSAXParseEntity(NULL, filename)); 9970} 9971 9972/** 9973 * xmlCreateEntityParserCtxt: 9974 * @URL: the entity URL 9975 * @ID: the entity PUBLIC ID 9976 * @base: a possible base for the target URI 9977 * 9978 * Create a parser context for an external entity 9979 * Automatic support for ZLIB/Compress compressed document is provided 9980 * by default if found at compile-time. 9981 * 9982 * Returns the new parser context or NULL 9983 */ 9984xmlParserCtxtPtr 9985xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9986 const xmlChar *base) { 9987 xmlParserCtxtPtr ctxt; 9988 xmlParserInputPtr inputStream; 9989 char *directory = NULL; 9990 xmlChar *uri; 9991 9992 ctxt = xmlNewParserCtxt(); 9993 if (ctxt == NULL) { 9994 return(NULL); 9995 } 9996 9997 uri = xmlBuildURI(URL, base); 9998 9999 if (uri == NULL) { 10000 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 10001 if (inputStream == NULL) { 10002 xmlFreeParserCtxt(ctxt); 10003 return(NULL); 10004 } 10005 10006 inputPush(ctxt, inputStream); 10007 10008 if ((ctxt->directory == NULL) && (directory == NULL)) 10009 directory = xmlParserGetDirectory((char *)URL); 10010 if ((ctxt->directory == NULL) && (directory != NULL)) 10011 ctxt->directory = directory; 10012 } else { 10013 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 10014 if (inputStream == NULL) { 10015 xmlFree(uri); 10016 xmlFreeParserCtxt(ctxt); 10017 return(NULL); 10018 } 10019 10020 inputPush(ctxt, inputStream); 10021 10022 if ((ctxt->directory == NULL) && (directory == NULL)) 10023 directory = xmlParserGetDirectory((char *)uri); 10024 if ((ctxt->directory == NULL) && (directory != NULL)) 10025 ctxt->directory = directory; 10026 xmlFree(uri); 10027 } 10028 10029 return(ctxt); 10030} 10031 10032/************************************************************************ 10033 * * 10034 * Front ends when parsing from a file * 10035 * * 10036 ************************************************************************/ 10037 10038/** 10039 * xmlCreateFileParserCtxt: 10040 * @filename: the filename 10041 * 10042 * Create a parser context for a file content. 10043 * Automatic support for ZLIB/Compress compressed document is provided 10044 * by default if found at compile-time. 10045 * 10046 * Returns the new parser context or NULL 10047 */ 10048xmlParserCtxtPtr 10049xmlCreateFileParserCtxt(const char *filename) 10050{ 10051 xmlParserCtxtPtr ctxt; 10052 xmlParserInputPtr inputStream; 10053 char *directory = NULL; 10054 xmlChar *normalized; 10055 10056 ctxt = xmlNewParserCtxt(); 10057 if (ctxt == NULL) { 10058 if (xmlDefaultSAXHandler.error != NULL) { 10059 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 10060 } 10061 return(NULL); 10062 } 10063 10064 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename); 10065 if (normalized == NULL) { 10066 xmlFreeParserCtxt(ctxt); 10067 return(NULL); 10068 } 10069 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt); 10070 if (inputStream == NULL) { 10071 xmlFreeParserCtxt(ctxt); 10072 xmlFree(normalized); 10073 return(NULL); 10074 } 10075 10076 inputPush(ctxt, inputStream); 10077 if ((ctxt->directory == NULL) && (directory == NULL)) 10078 directory = xmlParserGetDirectory((char *) normalized); 10079 if ((ctxt->directory == NULL) && (directory != NULL)) 10080 ctxt->directory = directory; 10081 10082 xmlFree(normalized); 10083 10084 return(ctxt); 10085} 10086 10087/** 10088 * xmlSAXParseFileWithData: 10089 * @sax: the SAX handler block 10090 * @filename: the filename 10091 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10092 * documents 10093 * @data: the userdata 10094 * 10095 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10096 * compressed document is provided by default if found at compile-time. 10097 * It use the given SAX function block to handle the parsing callback. 10098 * If sax is NULL, fallback to the default DOM tree building routines. 10099 * 10100 * User data (void *) is stored within the parser context in the 10101 * context's _private member, so it is available nearly everywhere in libxml 10102 * 10103 * Returns the resulting document tree 10104 */ 10105 10106xmlDocPtr 10107xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 10108 int recovery, void *data) { 10109 xmlDocPtr ret; 10110 xmlParserCtxtPtr ctxt; 10111 char *directory = NULL; 10112 10113 xmlInitParser(); 10114 10115 ctxt = xmlCreateFileParserCtxt(filename); 10116 if (ctxt == NULL) { 10117 return(NULL); 10118 } 10119 if (sax != NULL) { 10120 if (ctxt->sax != NULL) 10121 xmlFree(ctxt->sax); 10122 ctxt->sax = sax; 10123 } 10124 if (data!=NULL) { 10125 ctxt->_private=data; 10126 } 10127 10128 if ((ctxt->directory == NULL) && (directory == NULL)) 10129 directory = xmlParserGetDirectory(filename); 10130 if ((ctxt->directory == NULL) && (directory != NULL)) 10131 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 10132 10133 xmlParseDocument(ctxt); 10134 10135 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10136 else { 10137 ret = NULL; 10138 xmlFreeDoc(ctxt->myDoc); 10139 ctxt->myDoc = NULL; 10140 } 10141 if (sax != NULL) 10142 ctxt->sax = NULL; 10143 xmlFreeParserCtxt(ctxt); 10144 10145 return(ret); 10146} 10147 10148/** 10149 * xmlSAXParseFile: 10150 * @sax: the SAX handler block 10151 * @filename: the filename 10152 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10153 * documents 10154 * 10155 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10156 * compressed document is provided by default if found at compile-time. 10157 * It use the given SAX function block to handle the parsing callback. 10158 * If sax is NULL, fallback to the default DOM tree building routines. 10159 * 10160 * Returns the resulting document tree 10161 */ 10162 10163xmlDocPtr 10164xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 10165 int recovery) { 10166 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 10167} 10168 10169/** 10170 * xmlRecoverDoc: 10171 * @cur: a pointer to an array of xmlChar 10172 * 10173 * parse an XML in-memory document and build a tree. 10174 * In the case the document is not Well Formed, a tree is built anyway 10175 * 10176 * Returns the resulting document tree 10177 */ 10178 10179xmlDocPtr 10180xmlRecoverDoc(xmlChar *cur) { 10181 return(xmlSAXParseDoc(NULL, cur, 1)); 10182} 10183 10184/** 10185 * xmlParseFile: 10186 * @filename: the filename 10187 * 10188 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10189 * compressed document is provided by default if found at compile-time. 10190 * 10191 * Returns the resulting document tree if the file was wellformed, 10192 * NULL otherwise. 10193 */ 10194 10195xmlDocPtr 10196xmlParseFile(const char *filename) { 10197 return(xmlSAXParseFile(NULL, filename, 0)); 10198} 10199 10200/** 10201 * xmlRecoverFile: 10202 * @filename: the filename 10203 * 10204 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 10205 * compressed document is provided by default if found at compile-time. 10206 * In the case the document is not Well Formed, a tree is built anyway 10207 * 10208 * Returns the resulting document tree 10209 */ 10210 10211xmlDocPtr 10212xmlRecoverFile(const char *filename) { 10213 return(xmlSAXParseFile(NULL, filename, 1)); 10214} 10215 10216 10217/** 10218 * xmlSetupParserForBuffer: 10219 * @ctxt: an XML parser context 10220 * @buffer: a xmlChar * buffer 10221 * @filename: a file name 10222 * 10223 * Setup the parser context to parse a new buffer; Clears any prior 10224 * contents from the parser context. The buffer parameter must not be 10225 * NULL, but the filename parameter can be 10226 */ 10227void 10228xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 10229 const char* filename) 10230{ 10231 xmlParserInputPtr input; 10232 10233 input = xmlNewInputStream(ctxt); 10234 if (input == NULL) { 10235 xmlGenericError(xmlGenericErrorContext, 10236 "malloc"); 10237 xmlFree(ctxt); 10238 return; 10239 } 10240 10241 xmlClearParserCtxt(ctxt); 10242 if (filename != NULL) 10243 input->filename = xmlMemStrdup(filename); 10244 input->base = buffer; 10245 input->cur = buffer; 10246 input->end = &buffer[xmlStrlen(buffer)]; 10247 inputPush(ctxt, input); 10248} 10249 10250/** 10251 * xmlSAXUserParseFile: 10252 * @sax: a SAX handler 10253 * @user_data: The user data returned on SAX callbacks 10254 * @filename: a file name 10255 * 10256 * parse an XML file and call the given SAX handler routines. 10257 * Automatic support for ZLIB/Compress compressed document is provided 10258 * 10259 * Returns 0 in case of success or a error number otherwise 10260 */ 10261int 10262xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 10263 const char *filename) { 10264 int ret = 0; 10265 xmlParserCtxtPtr ctxt; 10266 10267 ctxt = xmlCreateFileParserCtxt(filename); 10268 if (ctxt == NULL) return -1; 10269 if (ctxt->sax != &xmlDefaultSAXHandler) 10270 xmlFree(ctxt->sax); 10271 ctxt->sax = sax; 10272 if (user_data != NULL) 10273 ctxt->userData = user_data; 10274 10275 xmlParseDocument(ctxt); 10276 10277 if (ctxt->wellFormed) 10278 ret = 0; 10279 else { 10280 if (ctxt->errNo != 0) 10281 ret = ctxt->errNo; 10282 else 10283 ret = -1; 10284 } 10285 if (sax != NULL) 10286 ctxt->sax = NULL; 10287 xmlFreeParserCtxt(ctxt); 10288 10289 return ret; 10290} 10291 10292/************************************************************************ 10293 * * 10294 * Front ends when parsing from memory * 10295 * * 10296 ************************************************************************/ 10297 10298/** 10299 * xmlCreateMemoryParserCtxt: 10300 * @buffer: a pointer to a char array 10301 * @size: the size of the array 10302 * 10303 * Create a parser context for an XML in-memory document. 10304 * 10305 * Returns the new parser context or NULL 10306 */ 10307xmlParserCtxtPtr 10308xmlCreateMemoryParserCtxt(const char *buffer, int size) { 10309 xmlParserCtxtPtr ctxt; 10310 xmlParserInputPtr input; 10311 xmlParserInputBufferPtr buf; 10312 10313 if (buffer == NULL) 10314 return(NULL); 10315 if (size <= 0) 10316 return(NULL); 10317 10318 ctxt = xmlNewParserCtxt(); 10319 if (ctxt == NULL) 10320 return(NULL); 10321 10322 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 10323 if (buf == NULL) return(NULL); 10324 10325 input = xmlNewInputStream(ctxt); 10326 if (input == NULL) { 10327 xmlFreeParserCtxt(ctxt); 10328 return(NULL); 10329 } 10330 10331 input->filename = NULL; 10332 input->buf = buf; 10333 input->base = input->buf->buffer->content; 10334 input->cur = input->buf->buffer->content; 10335 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 10336 10337 inputPush(ctxt, input); 10338 return(ctxt); 10339} 10340 10341/** 10342 * xmlSAXParseMemoryWithData: 10343 * @sax: the SAX handler block 10344 * @buffer: an pointer to a char array 10345 * @size: the size of the array 10346 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10347 * documents 10348 * @data: the userdata 10349 * 10350 * parse an XML in-memory block and use the given SAX function block 10351 * to handle the parsing callback. If sax is NULL, fallback to the default 10352 * DOM tree building routines. 10353 * 10354 * User data (void *) is stored within the parser context in the 10355 * context's _private member, so it is available nearly everywhere in libxml 10356 * 10357 * Returns the resulting document tree 10358 */ 10359 10360xmlDocPtr 10361xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 10362 int size, int recovery, void *data) { 10363 xmlDocPtr ret; 10364 xmlParserCtxtPtr ctxt; 10365 10366 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10367 if (ctxt == NULL) return(NULL); 10368 if (sax != NULL) { 10369 if (ctxt->sax != NULL) 10370 xmlFree(ctxt->sax); 10371 ctxt->sax = sax; 10372 } 10373 if (data!=NULL) { 10374 ctxt->_private=data; 10375 } 10376 10377 xmlParseDocument(ctxt); 10378 10379 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10380 else { 10381 ret = NULL; 10382 xmlFreeDoc(ctxt->myDoc); 10383 ctxt->myDoc = NULL; 10384 } 10385 if (sax != NULL) 10386 ctxt->sax = NULL; 10387 xmlFreeParserCtxt(ctxt); 10388 10389 return(ret); 10390} 10391 10392/** 10393 * xmlSAXParseMemory: 10394 * @sax: the SAX handler block 10395 * @buffer: an pointer to a char array 10396 * @size: the size of the array 10397 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 10398 * documents 10399 * 10400 * parse an XML in-memory block and use the given SAX function block 10401 * to handle the parsing callback. If sax is NULL, fallback to the default 10402 * DOM tree building routines. 10403 * 10404 * Returns the resulting document tree 10405 */ 10406xmlDocPtr 10407xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 10408 int size, int recovery) { 10409 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 10410} 10411 10412/** 10413 * xmlParseMemory: 10414 * @buffer: an pointer to a char array 10415 * @size: the size of the array 10416 * 10417 * parse an XML in-memory block and build a tree. 10418 * 10419 * Returns the resulting document tree 10420 */ 10421 10422xmlDocPtr xmlParseMemory(const char *buffer, int size) { 10423 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 10424} 10425 10426/** 10427 * xmlRecoverMemory: 10428 * @buffer: an pointer to a char array 10429 * @size: the size of the array 10430 * 10431 * parse an XML in-memory block and build a tree. 10432 * In the case the document is not Well Formed, a tree is built anyway 10433 * 10434 * Returns the resulting document tree 10435 */ 10436 10437xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 10438 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 10439} 10440 10441/** 10442 * xmlSAXUserParseMemory: 10443 * @sax: a SAX handler 10444 * @user_data: The user data returned on SAX callbacks 10445 * @buffer: an in-memory XML document input 10446 * @size: the length of the XML document in bytes 10447 * 10448 * A better SAX parsing routine. 10449 * parse an XML in-memory buffer and call the given SAX handler routines. 10450 * 10451 * Returns 0 in case of success or a error number otherwise 10452 */ 10453int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 10454 const char *buffer, int size) { 10455 int ret = 0; 10456 xmlParserCtxtPtr ctxt; 10457 xmlSAXHandlerPtr oldsax = NULL; 10458 10459 if (sax == NULL) return -1; 10460 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10461 if (ctxt == NULL) return -1; 10462 oldsax = ctxt->sax; 10463 ctxt->sax = sax; 10464 if (user_data != NULL) 10465 ctxt->userData = user_data; 10466 10467 xmlParseDocument(ctxt); 10468 10469 if (ctxt->wellFormed) 10470 ret = 0; 10471 else { 10472 if (ctxt->errNo != 0) 10473 ret = ctxt->errNo; 10474 else 10475 ret = -1; 10476 } 10477 ctxt->sax = oldsax; 10478 xmlFreeParserCtxt(ctxt); 10479 10480 return ret; 10481} 10482 10483/** 10484 * xmlCreateDocParserCtxt: 10485 * @cur: a pointer to an array of xmlChar 10486 * 10487 * Creates a parser context for an XML in-memory document. 10488 * 10489 * Returns the new parser context or NULL 10490 */ 10491xmlParserCtxtPtr 10492xmlCreateDocParserCtxt(xmlChar *cur) { 10493 int len; 10494 10495 if (cur == NULL) 10496 return(NULL); 10497 len = xmlStrlen(cur); 10498 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 10499} 10500 10501/** 10502 * xmlSAXParseDoc: 10503 * @sax: the SAX handler block 10504 * @cur: a pointer to an array of xmlChar 10505 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10506 * documents 10507 * 10508 * parse an XML in-memory document and build a tree. 10509 * It use the given SAX function block to handle the parsing callback. 10510 * If sax is NULL, fallback to the default DOM tree building routines. 10511 * 10512 * Returns the resulting document tree 10513 */ 10514 10515xmlDocPtr 10516xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 10517 xmlDocPtr ret; 10518 xmlParserCtxtPtr ctxt; 10519 10520 if (cur == NULL) return(NULL); 10521 10522 10523 ctxt = xmlCreateDocParserCtxt(cur); 10524 if (ctxt == NULL) return(NULL); 10525 if (sax != NULL) { 10526 ctxt->sax = sax; 10527 ctxt->userData = NULL; 10528 } 10529 10530 xmlParseDocument(ctxt); 10531 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10532 else { 10533 ret = NULL; 10534 xmlFreeDoc(ctxt->myDoc); 10535 ctxt->myDoc = NULL; 10536 } 10537 if (sax != NULL) 10538 ctxt->sax = NULL; 10539 xmlFreeParserCtxt(ctxt); 10540 10541 return(ret); 10542} 10543 10544/** 10545 * xmlParseDoc: 10546 * @cur: a pointer to an array of xmlChar 10547 * 10548 * parse an XML in-memory document and build a tree. 10549 * 10550 * Returns the resulting document tree 10551 */ 10552 10553xmlDocPtr 10554xmlParseDoc(xmlChar *cur) { 10555 return(xmlSAXParseDoc(NULL, cur, 0)); 10556} 10557 10558/************************************************************************ 10559 * * 10560 * Specific function to keep track of entities references * 10561 * and used by the XSLT debugger * 10562 * * 10563 ************************************************************************/ 10564 10565static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 10566 10567/** 10568 * xmlAddEntityReference: 10569 * @ent : A valid entity 10570 * @firstNode : A valid first node for children of entity 10571 * @lastNode : A valid last node of children entity 10572 * 10573 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 10574 */ 10575static void 10576xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 10577 xmlNodePtr lastNode) 10578{ 10579 if (xmlEntityRefFunc != NULL) { 10580 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 10581 } 10582} 10583 10584 10585/** 10586 * xmlSetEntityReferenceFunc: 10587 * @func : A valid function 10588 * 10589 * Set the function to call call back when a xml reference has been made 10590 */ 10591void 10592xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 10593{ 10594 xmlEntityRefFunc = func; 10595} 10596 10597/************************************************************************ 10598 * * 10599 * Miscellaneous * 10600 * * 10601 ************************************************************************/ 10602 10603#ifdef LIBXML_XPATH_ENABLED 10604#include <libxml/xpath.h> 10605#endif 10606 10607extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 10608static int xmlParserInitialized = 0; 10609 10610/** 10611 * xmlInitParser: 10612 * 10613 * Initialization function for the XML parser. 10614 * This is not reentrant. Call once before processing in case of 10615 * use in multithreaded programs. 10616 */ 10617 10618void 10619xmlInitParser(void) { 10620 if (xmlParserInitialized != 0) 10621 return; 10622 10623 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 10624 (xmlGenericError == NULL)) 10625 initGenericErrorDefaultFunc(NULL); 10626 xmlInitThreads(); 10627 xmlInitMemory(); 10628 xmlInitCharEncodingHandlers(); 10629 xmlInitializePredefinedEntities(); 10630 xmlDefaultSAXHandlerInit(); 10631 xmlRegisterDefaultInputCallbacks(); 10632 xmlRegisterDefaultOutputCallbacks(); 10633#ifdef LIBXML_HTML_ENABLED 10634 htmlInitAutoClose(); 10635 htmlDefaultSAXHandlerInit(); 10636#endif 10637#ifdef LIBXML_XPATH_ENABLED 10638 xmlXPathInit(); 10639#endif 10640 xmlParserInitialized = 1; 10641} 10642 10643/** 10644 * xmlCleanupParser: 10645 * 10646 * Cleanup function for the XML parser. It tries to reclaim all 10647 * parsing related global memory allocated for the parser processing. 10648 * It doesn't deallocate any document related memory. Calling this 10649 * function should not prevent reusing the parser. 10650 */ 10651 10652void 10653xmlCleanupParser(void) { 10654 xmlCleanupCharEncodingHandlers(); 10655 xmlCleanupPredefinedEntities(); 10656#ifdef LIBXML_CATALOG_ENABLED 10657 xmlCatalogCleanup(); 10658#endif 10659 xmlCleanupThreads(); 10660 xmlParserInitialized = 0; 10661} 10662