parser.c revision 9f7b84bb07ab4f748ba981a38c7566cd48af60fa
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscelaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAx callbacks or as standalones functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#include "libxml.h" 34 35#ifdef WIN32 36#define XML_DIR_SEP '\\' 37#else 38#define XML_DIR_SEP '/' 39#endif 40 41#include <stdlib.h> 42#include <string.h> 43#include <libxml/xmlmemory.h> 44#include <libxml/tree.h> 45#include <libxml/parser.h> 46#include <libxml/parserInternals.h> 47#include <libxml/valid.h> 48#include <libxml/entities.h> 49#include <libxml/xmlerror.h> 50#include <libxml/encoding.h> 51#include <libxml/xmlIO.h> 52#include <libxml/uri.h> 53#ifdef LIBXML_CATALOG_ENABLED 54#include <libxml/catalog.h> 55#endif 56 57#ifdef HAVE_CTYPE_H 58#include <ctype.h> 59#endif 60#ifdef HAVE_STDLIB_H 61#include <stdlib.h> 62#endif 63#ifdef HAVE_SYS_STAT_H 64#include <sys/stat.h> 65#endif 66#ifdef HAVE_FCNTL_H 67#include <fcntl.h> 68#endif 69#ifdef HAVE_UNISTD_H 70#include <unistd.h> 71#endif 72#ifdef HAVE_ZLIB_H 73#include <zlib.h> 74#endif 75 76 77#define XML_PARSER_BIG_BUFFER_SIZE 300 78#define XML_PARSER_BUFFER_SIZE 100 79 80/* 81 * Various global defaults for parsing 82 */ 83int xmlParserDebugEntities = 0; 84 85/* 86 * List of XML prefixed PI allowed by W3C specs 87 */ 88 89const char *xmlW3CPIs[] = { 90 "xml-stylesheet", 91 NULL 92}; 93 94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 97 const xmlChar **str); 98 99static int 100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 101 xmlSAXHandlerPtr sax, 102 void *user_data, int depth, const xmlChar *URL, 103 const xmlChar *ID, xmlNodePtr *list); 104 105/************************************************************************ 106 * * 107 * Parser stacks related functions and macros * 108 * * 109 ************************************************************************/ 110 111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 112 const xmlChar ** str); 113 114/* 115 * Generic function for accessing stacks in the Parser Context 116 */ 117 118#define PUSH_AND_POP(scope, type, name) \ 119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 120 if (ctxt->name##Nr >= ctxt->name##Max) { \ 121 ctxt->name##Max *= 2; \ 122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 124 if (ctxt->name##Tab == NULL) { \ 125 xmlGenericError(xmlGenericErrorContext, \ 126 "realloc failed !\n"); \ 127 return(0); \ 128 } \ 129 } \ 130 ctxt->name##Tab[ctxt->name##Nr] = value; \ 131 ctxt->name = value; \ 132 return(ctxt->name##Nr++); \ 133} \ 134scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 135 type ret; \ 136 if (ctxt->name##Nr <= 0) return(0); \ 137 ctxt->name##Nr--; \ 138 if (ctxt->name##Nr > 0) \ 139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 140 else \ 141 ctxt->name = NULL; \ 142 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 143 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 144 return(ret); \ 145} \ 146 147/** 148 * inputPop: 149 * @ctxt: an XML parser context 150 * 151 * Pops the top parser input from the input stack 152 * 153 * Returns the input just removed 154 */ 155/** 156 * inputPush: 157 * @ctxt: an XML parser context 158 * @input: the parser input 159 * 160 * Pushes a new parser input on top of the input stack 161 */ 162/** 163 * namePop: 164 * @ctxt: an XML parser context 165 * 166 * Pops the top element name from the name stack 167 * 168 * Returns the name just removed 169 */ 170/** 171 * namePush: 172 * @ctxt: an XML parser context 173 * @name: the element name 174 * 175 * Pushes a new element name on top of the name stack 176 */ 177/** 178 * nodePop: 179 * @ctxt: an XML parser context 180 * 181 * Pops the top element node from the node stack 182 * 183 * Returns the node just removed 184 */ 185/** 186 * nodePush: 187 * @ctxt: an XML parser context 188 * @node: the element node 189 * 190 * Pushes a new element node on top of the node stack 191 */ 192/* 193 * Those macros actually generate the functions 194 */ 195PUSH_AND_POP(extern, xmlParserInputPtr, input) 196PUSH_AND_POP(extern, xmlNodePtr, node) 197PUSH_AND_POP(extern, xmlChar*, name) 198 199static int spacePush(xmlParserCtxtPtr ctxt, int val) { 200 if (ctxt->spaceNr >= ctxt->spaceMax) { 201 ctxt->spaceMax *= 2; 202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 204 if (ctxt->spaceTab == NULL) { 205 xmlGenericError(xmlGenericErrorContext, 206 "realloc failed !\n"); 207 return(0); 208 } 209 } 210 ctxt->spaceTab[ctxt->spaceNr] = val; 211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 212 return(ctxt->spaceNr++); 213} 214 215static int spacePop(xmlParserCtxtPtr ctxt) { 216 int ret; 217 if (ctxt->spaceNr <= 0) return(0); 218 ctxt->spaceNr--; 219 if (ctxt->spaceNr > 0) 220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 221 else 222 ctxt->space = NULL; 223 ret = ctxt->spaceTab[ctxt->spaceNr]; 224 ctxt->spaceTab[ctxt->spaceNr] = -1; 225 return(ret); 226} 227 228/* 229 * Macros for accessing the content. Those should be used only by the parser, 230 * and not exported. 231 * 232 * Dirty macros, i.e. one often need to make assumption on the context to 233 * use them 234 * 235 * CUR_PTR return the current pointer to the xmlChar to be parsed. 236 * To be used with extreme caution since operations consuming 237 * characters may move the input buffer to a different location ! 238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 239 * This should be used internally by the parser 240 * only to compare to ASCII values otherwise it would break when 241 * running with UTF-8 encoding. 242 * RAW same as CUR but in the input buffer, bypass any token 243 * extraction that may have been done 244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 245 * to compare on ASCII based substring. 246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 247 * strings within the parser. 248 * 249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 250 * 251 * NEXT Skip to the next character, this does the proper decoding 252 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 253 * NEXTL(l) Skip l xmlChars in the input buffer 254 * CUR_CHAR(l) returns the current unicode character (int), set l 255 * to the number of xmlChars used for the encoding [0-5]. 256 * CUR_SCHAR same but operate on a string instead of the context 257 * COPY_BUF copy the current unicode char to the target buffer, increment 258 * the index 259 * GROW, SHRINK handling of input buffers 260 */ 261 262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 264#define NXT(val) ctxt->input->cur[(val)] 265#define CUR_PTR ctxt->input->cur 266 267#define SKIP(val) do { \ 268 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 270 if ((*ctxt->input->cur == 0) && \ 271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 272 xmlPopInput(ctxt); \ 273 } while (0) 274 275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ 276 xmlParserInputShrink(ctxt->input); \ 277 if ((*ctxt->input->cur == 0) && \ 278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 279 xmlPopInput(ctxt); \ 280 } 281 282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ 283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 284 if ((*ctxt->input->cur == 0) && \ 285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 286 xmlPopInput(ctxt); \ 287 } 288 289#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 290 291#define NEXT xmlNextChar(ctxt) 292 293#define NEXT1 { \ 294 ctxt->input->cur++; \ 295 ctxt->nbChars++; \ 296 if (*ctxt->input->cur == 0) \ 297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 298 } 299 300#define NEXTL(l) do { \ 301 if (*(ctxt->input->cur) == '\n') { \ 302 ctxt->input->line++; ctxt->input->col = 1; \ 303 } else ctxt->input->col++; \ 304 ctxt->token = 0; ctxt->input->cur += l; \ 305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 306 } while (0) 307 308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 310 311#define COPY_BUF(l,b,i,v) \ 312 if (l == 1) b[i++] = (xmlChar) v; \ 313 else i += xmlCopyCharMultiByte(&b[i],v) 314 315/** 316 * xmlSkipBlankChars: 317 * @ctxt: the XML parser context 318 * 319 * skip all blanks character found at that point in the input streams. 320 * It pops up finished entities in the process if allowable at that point. 321 * 322 * Returns the number of space chars skipped 323 */ 324 325int 326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 327 int res = 0; 328 329 if (ctxt->token != 0) { 330 if (!IS_BLANK(ctxt->token)) 331 return(0); 332 ctxt->token = 0; 333 res++; 334 } 335 /* 336 * It's Okay to use CUR/NEXT here since all the blanks are on 337 * the ASCII range. 338 */ 339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 340 const xmlChar *cur; 341 /* 342 * if we are in the document content, go really fast 343 */ 344 cur = ctxt->input->cur; 345 while (IS_BLANK(*cur)) { 346 if (*cur == '\n') { 347 ctxt->input->line++; ctxt->input->col = 1; 348 } 349 cur++; 350 res++; 351 if (*cur == 0) { 352 ctxt->input->cur = cur; 353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 354 cur = ctxt->input->cur; 355 } 356 } 357 ctxt->input->cur = cur; 358 } else { 359 int cur; 360 do { 361 cur = CUR; 362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 363 NEXT; 364 cur = CUR; 365 res++; 366 } 367 while ((cur == 0) && (ctxt->inputNr > 1) && 368 (ctxt->instate != XML_PARSER_COMMENT)) { 369 xmlPopInput(ctxt); 370 cur = CUR; 371 } 372 /* 373 * Need to handle support of entities branching here 374 */ 375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 377 } 378 return(res); 379} 380 381/************************************************************************ 382 * * 383 * Commodity functions to handle entities * 384 * * 385 ************************************************************************/ 386 387/** 388 * xmlPopInput: 389 * @ctxt: an XML parser context 390 * 391 * xmlPopInput: the current input pointed by ctxt->input came to an end 392 * pop it and return the next char. 393 * 394 * Returns the current xmlChar in the parser context 395 */ 396xmlChar 397xmlPopInput(xmlParserCtxtPtr ctxt) { 398 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 399 if (xmlParserDebugEntities) 400 xmlGenericError(xmlGenericErrorContext, 401 "Popping input %d\n", ctxt->inputNr); 402 xmlFreeInputStream(inputPop(ctxt)); 403 if ((*ctxt->input->cur == 0) && 404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 405 return(xmlPopInput(ctxt)); 406 return(CUR); 407} 408 409/** 410 * xmlPushInput: 411 * @ctxt: an XML parser context 412 * @input: an XML parser input fragment (entity, XML fragment ...). 413 * 414 * xmlPushInput: switch to a new input stream which is stacked on top 415 * of the previous one(s). 416 */ 417void 418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 419 if (input == NULL) return; 420 421 if (xmlParserDebugEntities) { 422 if ((ctxt->input != NULL) && (ctxt->input->filename)) 423 xmlGenericError(xmlGenericErrorContext, 424 "%s(%d): ", ctxt->input->filename, 425 ctxt->input->line); 426 xmlGenericError(xmlGenericErrorContext, 427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 428 } 429 inputPush(ctxt, input); 430 GROW; 431} 432 433/** 434 * xmlParseCharRef: 435 * @ctxt: an XML parser context 436 * 437 * parse Reference declarations 438 * 439 * [66] CharRef ::= '&#' [0-9]+ ';' | 440 * '&#x' [0-9a-fA-F]+ ';' 441 * 442 * [ WFC: Legal Character ] 443 * Characters referred to using character references must match the 444 * production for Char. 445 * 446 * Returns the value parsed (as an int), 0 in case of error 447 */ 448int 449xmlParseCharRef(xmlParserCtxtPtr ctxt) { 450 unsigned int val = 0; 451 int count = 0; 452 453 if (ctxt->token != 0) { 454 val = ctxt->token; 455 ctxt->token = 0; 456 return(val); 457 } 458 /* 459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 460 */ 461 if ((RAW == '&') && (NXT(1) == '#') && 462 (NXT(2) == 'x')) { 463 SKIP(3); 464 GROW; 465 while (RAW != ';') { /* loop blocked by count */ 466 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 467 val = val * 16 + (CUR - '0'); 468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 469 val = val * 16 + (CUR - 'a') + 10; 470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 471 val = val * 16 + (CUR - 'A') + 10; 472 else { 473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 475 ctxt->sax->error(ctxt->userData, 476 "xmlParseCharRef: invalid hexadecimal value\n"); 477 ctxt->wellFormed = 0; 478 ctxt->disableSAX = 1; 479 val = 0; 480 break; 481 } 482 NEXT; 483 count++; 484 } 485 if (RAW == ';') { 486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 487 ctxt->nbChars ++; 488 ctxt->input->cur++; 489 } 490 } else if ((RAW == '&') && (NXT(1) == '#')) { 491 SKIP(2); 492 GROW; 493 while (RAW != ';') { /* loop blocked by count */ 494 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 495 val = val * 10 + (CUR - '0'); 496 else { 497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 499 ctxt->sax->error(ctxt->userData, 500 "xmlParseCharRef: invalid decimal value\n"); 501 ctxt->wellFormed = 0; 502 ctxt->disableSAX = 1; 503 val = 0; 504 break; 505 } 506 NEXT; 507 count++; 508 } 509 if (RAW == ';') { 510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 511 ctxt->nbChars ++; 512 ctxt->input->cur++; 513 } 514 } else { 515 ctxt->errNo = XML_ERR_INVALID_CHARREF; 516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 517 ctxt->sax->error(ctxt->userData, 518 "xmlParseCharRef: invalid value\n"); 519 ctxt->wellFormed = 0; 520 ctxt->disableSAX = 1; 521 } 522 523 /* 524 * [ WFC: Legal Character ] 525 * Characters referred to using character references must match the 526 * production for Char. 527 */ 528 if (IS_CHAR(val)) { 529 return(val); 530 } else { 531 ctxt->errNo = XML_ERR_INVALID_CHAR; 532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 534 val); 535 ctxt->wellFormed = 0; 536 ctxt->disableSAX = 1; 537 } 538 return(0); 539} 540 541/** 542 * xmlParseStringCharRef: 543 * @ctxt: an XML parser context 544 * @str: a pointer to an index in the string 545 * 546 * parse Reference declarations, variant parsing from a string rather 547 * than an an input flow. 548 * 549 * [66] CharRef ::= '&#' [0-9]+ ';' | 550 * '&#x' [0-9a-fA-F]+ ';' 551 * 552 * [ WFC: Legal Character ] 553 * Characters referred to using character references must match the 554 * production for Char. 555 * 556 * Returns the value parsed (as an int), 0 in case of error, str will be 557 * updated to the current value of the index 558 */ 559static int 560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 561 const xmlChar *ptr; 562 xmlChar cur; 563 int val = 0; 564 565 if ((str == NULL) || (*str == NULL)) return(0); 566 ptr = *str; 567 cur = *ptr; 568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 569 ptr += 3; 570 cur = *ptr; 571 while (cur != ';') { /* Non input consuming loop */ 572 if ((cur >= '0') && (cur <= '9')) 573 val = val * 16 + (cur - '0'); 574 else if ((cur >= 'a') && (cur <= 'f')) 575 val = val * 16 + (cur - 'a') + 10; 576 else if ((cur >= 'A') && (cur <= 'F')) 577 val = val * 16 + (cur - 'A') + 10; 578 else { 579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 581 ctxt->sax->error(ctxt->userData, 582 "xmlParseStringCharRef: invalid hexadecimal value\n"); 583 ctxt->wellFormed = 0; 584 ctxt->disableSAX = 1; 585 val = 0; 586 break; 587 } 588 ptr++; 589 cur = *ptr; 590 } 591 if (cur == ';') 592 ptr++; 593 } else if ((cur == '&') && (ptr[1] == '#')){ 594 ptr += 2; 595 cur = *ptr; 596 while (cur != ';') { /* Non input consuming loops */ 597 if ((cur >= '0') && (cur <= '9')) 598 val = val * 10 + (cur - '0'); 599 else { 600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 602 ctxt->sax->error(ctxt->userData, 603 "xmlParseStringCharRef: invalid decimal value\n"); 604 ctxt->wellFormed = 0; 605 ctxt->disableSAX = 1; 606 val = 0; 607 break; 608 } 609 ptr++; 610 cur = *ptr; 611 } 612 if (cur == ';') 613 ptr++; 614 } else { 615 ctxt->errNo = XML_ERR_INVALID_CHARREF; 616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 617 ctxt->sax->error(ctxt->userData, 618 "xmlParseCharRef: invalid value\n"); 619 ctxt->wellFormed = 0; 620 ctxt->disableSAX = 1; 621 return(0); 622 } 623 *str = ptr; 624 625 /* 626 * [ WFC: Legal Character ] 627 * Characters referred to using character references must match the 628 * production for Char. 629 */ 630 if (IS_CHAR(val)) { 631 return(val); 632 } else { 633 ctxt->errNo = XML_ERR_INVALID_CHAR; 634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 635 ctxt->sax->error(ctxt->userData, 636 "CharRef: invalid xmlChar value %d\n", val); 637 ctxt->wellFormed = 0; 638 ctxt->disableSAX = 1; 639 } 640 return(0); 641} 642 643/** 644 * xmlParserHandlePEReference: 645 * @ctxt: the parser context 646 * 647 * [69] PEReference ::= '%' Name ';' 648 * 649 * [ WFC: No Recursion ] 650 * A parsed entity must not contain a recursive 651 * reference to itself, either directly or indirectly. 652 * 653 * [ WFC: Entity Declared ] 654 * In a document without any DTD, a document with only an internal DTD 655 * subset which contains no parameter entity references, or a document 656 * with "standalone='yes'", ... ... The declaration of a parameter 657 * entity must precede any reference to it... 658 * 659 * [ VC: Entity Declared ] 660 * In a document with an external subset or external parameter entities 661 * with "standalone='no'", ... ... The declaration of a parameter entity 662 * must precede any reference to it... 663 * 664 * [ WFC: In DTD ] 665 * Parameter-entity references may only appear in the DTD. 666 * NOTE: misleading but this is handled. 667 * 668 * A PEReference may have been detected in the current input stream 669 * the handling is done accordingly to 670 * http://www.w3.org/TR/REC-xml#entproc 671 * i.e. 672 * - Included in literal in entity values 673 * - Included as Paraemeter Entity reference within DTDs 674 */ 675void 676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 677 xmlChar *name; 678 xmlEntityPtr entity = NULL; 679 xmlParserInputPtr input; 680 681 if (ctxt->token != 0) { 682 return; 683 } 684 if (RAW != '%') return; 685 switch(ctxt->instate) { 686 case XML_PARSER_CDATA_SECTION: 687 return; 688 case XML_PARSER_COMMENT: 689 return; 690 case XML_PARSER_START_TAG: 691 return; 692 case XML_PARSER_END_TAG: 693 return; 694 case XML_PARSER_EOF: 695 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 698 ctxt->wellFormed = 0; 699 ctxt->disableSAX = 1; 700 return; 701 case XML_PARSER_PROLOG: 702 case XML_PARSER_START: 703 case XML_PARSER_MISC: 704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 707 ctxt->wellFormed = 0; 708 ctxt->disableSAX = 1; 709 return; 710 case XML_PARSER_ENTITY_DECL: 711 case XML_PARSER_CONTENT: 712 case XML_PARSER_ATTRIBUTE_VALUE: 713 case XML_PARSER_PI: 714 case XML_PARSER_SYSTEM_LITERAL: 715 /* we just ignore it there */ 716 return; 717 case XML_PARSER_EPILOG: 718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 721 ctxt->wellFormed = 0; 722 ctxt->disableSAX = 1; 723 return; 724 case XML_PARSER_ENTITY_VALUE: 725 /* 726 * NOTE: in the case of entity values, we don't do the 727 * substitution here since we need the literal 728 * entity value to be able to save the internal 729 * subset of the document. 730 * This will be handled by xmlStringDecodeEntities 731 */ 732 return; 733 case XML_PARSER_DTD: 734 /* 735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 736 * In the internal DTD subset, parameter-entity references 737 * can occur only where markup declarations can occur, not 738 * within markup declarations. 739 * In that case this is handled in xmlParseMarkupDecl 740 */ 741 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 742 return; 743 break; 744 case XML_PARSER_IGNORE: 745 return; 746 } 747 748 NEXT; 749 name = xmlParseName(ctxt); 750 if (xmlParserDebugEntities) 751 xmlGenericError(xmlGenericErrorContext, 752 "PE Reference: %s\n", name); 753 if (name == NULL) { 754 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 757 ctxt->wellFormed = 0; 758 ctxt->disableSAX = 1; 759 } else { 760 if (RAW == ';') { 761 NEXT; 762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 764 if (entity == NULL) { 765 766 /* 767 * [ WFC: Entity Declared ] 768 * In a document without any DTD, a document with only an 769 * internal DTD subset which contains no parameter entity 770 * references, or a document with "standalone='yes'", ... 771 * ... The declaration of a parameter entity must precede 772 * any reference to it... 773 */ 774 if ((ctxt->standalone == 1) || 775 ((ctxt->hasExternalSubset == 0) && 776 (ctxt->hasPErefs == 0))) { 777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 778 ctxt->sax->error(ctxt->userData, 779 "PEReference: %%%s; not found\n", name); 780 ctxt->wellFormed = 0; 781 ctxt->disableSAX = 1; 782 } else { 783 /* 784 * [ VC: Entity Declared ] 785 * In a document with an external subset or external 786 * parameter entities with "standalone='no'", ... 787 * ... The declaration of a parameter entity must precede 788 * any reference to it... 789 */ 790 if ((!ctxt->disableSAX) && 791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 792 ctxt->vctxt.error(ctxt->vctxt.userData, 793 "PEReference: %%%s; not found\n", name); 794 } else if ((!ctxt->disableSAX) && 795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 796 ctxt->sax->warning(ctxt->userData, 797 "PEReference: %%%s; not found\n", name); 798 ctxt->valid = 0; 799 } 800 } else { 801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 803 xmlChar start[4]; 804 xmlCharEncoding enc; 805 806 /* 807 * handle the extra spaces added before and after 808 * c.f. http://www.w3.org/TR/REC-xml#as-PE 809 * this is done independantly. 810 */ 811 input = xmlNewEntityInputStream(ctxt, entity); 812 xmlPushInput(ctxt, input); 813 814 /* 815 * Get the 4 first bytes and decode the charset 816 * if enc != XML_CHAR_ENCODING_NONE 817 * plug some encoding conversion routines. 818 */ 819 GROW 820 start[0] = RAW; 821 start[1] = NXT(1); 822 start[2] = NXT(2); 823 start[3] = NXT(3); 824 enc = xmlDetectCharEncoding(start, 4); 825 if (enc != XML_CHAR_ENCODING_NONE) { 826 xmlSwitchEncoding(ctxt, enc); 827 } 828 829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 830 (RAW == '<') && (NXT(1) == '?') && 831 (NXT(2) == 'x') && (NXT(3) == 'm') && 832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 833 xmlParseTextDecl(ctxt); 834 } 835 if (ctxt->token == 0) 836 ctxt->token = ' '; 837 } else { 838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 839 ctxt->sax->error(ctxt->userData, 840 "xmlHandlePEReference: %s is not a parameter entity\n", 841 name); 842 ctxt->wellFormed = 0; 843 ctxt->disableSAX = 1; 844 } 845 } 846 } else { 847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 849 ctxt->sax->error(ctxt->userData, 850 "xmlHandlePEReference: expecting ';'\n"); 851 ctxt->wellFormed = 0; 852 ctxt->disableSAX = 1; 853 } 854 xmlFree(name); 855 } 856} 857 858/* 859 * Macro used to grow the current buffer. 860 */ 861#define growBuffer(buffer) { \ 862 buffer##_size *= 2; \ 863 buffer = (xmlChar *) \ 864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 865 if (buffer == NULL) { \ 866 perror("realloc failed"); \ 867 return(NULL); \ 868 } \ 869} 870 871/** 872 * xmlStringDecodeEntities: 873 * @ctxt: the parser context 874 * @str: the input string 875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 876 * @end: an end marker xmlChar, 0 if none 877 * @end2: an end marker xmlChar, 0 if none 878 * @end3: an end marker xmlChar, 0 if none 879 * 880 * Takes a entity string content and process to do the adequate subtitutions. 881 * 882 * [67] Reference ::= EntityRef | CharRef 883 * 884 * [69] PEReference ::= '%' Name ';' 885 * 886 * Returns A newly allocated string with the substitution done. The caller 887 * must deallocate it ! 888 */ 889xmlChar * 890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 891 xmlChar end, xmlChar end2, xmlChar end3) { 892 xmlChar *buffer = NULL; 893 int buffer_size = 0; 894 895 xmlChar *current = NULL; 896 xmlEntityPtr ent; 897 int c,l; 898 int nbchars = 0; 899 900 if (str == NULL) 901 return(NULL); 902 903 if (ctxt->depth > 40) { 904 ctxt->errNo = XML_ERR_ENTITY_LOOP; 905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 906 ctxt->sax->error(ctxt->userData, 907 "Detected entity reference loop\n"); 908 ctxt->wellFormed = 0; 909 ctxt->disableSAX = 1; 910 return(NULL); 911 } 912 913 /* 914 * allocate a translation buffer. 915 */ 916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 918 if (buffer == NULL) { 919 perror("xmlDecodeEntities: malloc failed"); 920 return(NULL); 921 } 922 923 /* 924 * Ok loop until we reach one of the ending char or a size limit. 925 * we are operating on already parsed values. 926 */ 927 c = CUR_SCHAR(str, l); 928 while ((c != 0) && (c != end) && /* non input consuming loop */ 929 (c != end2) && (c != end3)) { 930 931 if (c == 0) break; 932 if ((c == '&') && (str[1] == '#')) { 933 int val = xmlParseStringCharRef(ctxt, &str); 934 if (val != 0) { 935 COPY_BUF(0,buffer,nbchars,val); 936 } 937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 938 if (xmlParserDebugEntities) 939 xmlGenericError(xmlGenericErrorContext, 940 "String decoding Entity Reference: %.30s\n", 941 str); 942 ent = xmlParseStringEntityRef(ctxt, &str); 943 if ((ent != NULL) && 944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 945 if (ent->content != NULL) { 946 COPY_BUF(0,buffer,nbchars,ent->content[0]); 947 } else { 948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 949 ctxt->sax->error(ctxt->userData, 950 "internal error entity has no content\n"); 951 } 952 } else if ((ent != NULL) && (ent->content != NULL)) { 953 xmlChar *rep; 954 955 ctxt->depth++; 956 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 957 0, 0, 0); 958 ctxt->depth--; 959 if (rep != NULL) { 960 current = rep; 961 while (*current != 0) { /* non input consuming loop */ 962 buffer[nbchars++] = *current++; 963 if (nbchars > 964 buffer_size - XML_PARSER_BUFFER_SIZE) { 965 growBuffer(buffer); 966 } 967 } 968 xmlFree(rep); 969 } 970 } else if (ent != NULL) { 971 int i = xmlStrlen(ent->name); 972 const xmlChar *cur = ent->name; 973 974 buffer[nbchars++] = '&'; 975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 976 growBuffer(buffer); 977 } 978 for (;i > 0;i--) 979 buffer[nbchars++] = *cur++; 980 buffer[nbchars++] = ';'; 981 } 982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 983 if (xmlParserDebugEntities) 984 xmlGenericError(xmlGenericErrorContext, 985 "String decoding PE Reference: %.30s\n", str); 986 ent = xmlParseStringPEReference(ctxt, &str); 987 if (ent != NULL) { 988 xmlChar *rep; 989 990 ctxt->depth++; 991 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 992 0, 0, 0); 993 ctxt->depth--; 994 if (rep != NULL) { 995 current = rep; 996 while (*current != 0) { /* non input consuming loop */ 997 buffer[nbchars++] = *current++; 998 if (nbchars > 999 buffer_size - XML_PARSER_BUFFER_SIZE) { 1000 growBuffer(buffer); 1001 } 1002 } 1003 xmlFree(rep); 1004 } 1005 } 1006 } else { 1007 COPY_BUF(l,buffer,nbchars,c); 1008 str += l; 1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1010 growBuffer(buffer); 1011 } 1012 } 1013 c = CUR_SCHAR(str, l); 1014 } 1015 buffer[nbchars++] = 0; 1016 return(buffer); 1017} 1018 1019 1020/************************************************************************ 1021 * * 1022 * Commodity functions to handle xmlChars * 1023 * * 1024 ************************************************************************/ 1025 1026/** 1027 * xmlStrndup: 1028 * @cur: the input xmlChar * 1029 * @len: the len of @cur 1030 * 1031 * a strndup for array of xmlChar's 1032 * 1033 * Returns a new xmlChar * or NULL 1034 */ 1035xmlChar * 1036xmlStrndup(const xmlChar *cur, int len) { 1037 xmlChar *ret; 1038 1039 if ((cur == NULL) || (len < 0)) return(NULL); 1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1041 if (ret == NULL) { 1042 xmlGenericError(xmlGenericErrorContext, 1043 "malloc of %ld byte failed\n", 1044 (len + 1) * (long)sizeof(xmlChar)); 1045 return(NULL); 1046 } 1047 memcpy(ret, cur, len * sizeof(xmlChar)); 1048 ret[len] = 0; 1049 return(ret); 1050} 1051 1052/** 1053 * xmlStrdup: 1054 * @cur: the input xmlChar * 1055 * 1056 * a strdup for array of xmlChar's. Since they are supposed to be 1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1058 * a termination mark of '0'. 1059 * 1060 * Returns a new xmlChar * or NULL 1061 */ 1062xmlChar * 1063xmlStrdup(const xmlChar *cur) { 1064 const xmlChar *p = cur; 1065 1066 if (cur == NULL) return(NULL); 1067 while (*p != 0) p++; /* non input consuming */ 1068 return(xmlStrndup(cur, p - cur)); 1069} 1070 1071/** 1072 * xmlCharStrndup: 1073 * @cur: the input char * 1074 * @len: the len of @cur 1075 * 1076 * a strndup for char's to xmlChar's 1077 * 1078 * Returns a new xmlChar * or NULL 1079 */ 1080 1081xmlChar * 1082xmlCharStrndup(const char *cur, int len) { 1083 int i; 1084 xmlChar *ret; 1085 1086 if ((cur == NULL) || (len < 0)) return(NULL); 1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1088 if (ret == NULL) { 1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1090 (len + 1) * (long)sizeof(xmlChar)); 1091 return(NULL); 1092 } 1093 for (i = 0;i < len;i++) 1094 ret[i] = (xmlChar) cur[i]; 1095 ret[len] = 0; 1096 return(ret); 1097} 1098 1099/** 1100 * xmlCharStrdup: 1101 * @cur: the input char * 1102 * @len: the len of @cur 1103 * 1104 * a strdup for char's to xmlChar's 1105 * 1106 * Returns a new xmlChar * or NULL 1107 */ 1108 1109xmlChar * 1110xmlCharStrdup(const char *cur) { 1111 const char *p = cur; 1112 1113 if (cur == NULL) return(NULL); 1114 while (*p != '\0') p++; /* non input consuming */ 1115 return(xmlCharStrndup(cur, p - cur)); 1116} 1117 1118/** 1119 * xmlStrcmp: 1120 * @str1: the first xmlChar * 1121 * @str2: the second xmlChar * 1122 * 1123 * a strcmp for xmlChar's 1124 * 1125 * Returns the integer result of the comparison 1126 */ 1127 1128int 1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1130 register int tmp; 1131 1132 if (str1 == str2) return(0); 1133 if (str1 == NULL) return(-1); 1134 if (str2 == NULL) return(1); 1135 do { 1136 tmp = *str1++ - *str2; 1137 if (tmp != 0) return(tmp); 1138 } while (*str2++ != 0); 1139 return 0; 1140} 1141 1142/** 1143 * xmlStrEqual: 1144 * @str1: the first xmlChar * 1145 * @str2: the second xmlChar * 1146 * 1147 * Check if both string are equal of have same content 1148 * Should be a bit more readable and faster than xmlStrEqual() 1149 * 1150 * Returns 1 if they are equal, 0 if they are different 1151 */ 1152 1153int 1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1155 if (str1 == str2) return(1); 1156 if (str1 == NULL) return(0); 1157 if (str2 == NULL) return(0); 1158 do { 1159 if (*str1++ != *str2) return(0); 1160 } while (*str2++); 1161 return(1); 1162} 1163 1164/** 1165 * xmlStrncmp: 1166 * @str1: the first xmlChar * 1167 * @str2: the second xmlChar * 1168 * @len: the max comparison length 1169 * 1170 * a strncmp for xmlChar's 1171 * 1172 * Returns the integer result of the comparison 1173 */ 1174 1175int 1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1177 register int tmp; 1178 1179 if (len <= 0) return(0); 1180 if (str1 == str2) return(0); 1181 if (str1 == NULL) return(-1); 1182 if (str2 == NULL) return(1); 1183 do { 1184 tmp = *str1++ - *str2; 1185 if (tmp != 0 || --len == 0) return(tmp); 1186 } while (*str2++ != 0); 1187 return 0; 1188} 1189 1190static xmlChar casemap[256] = { 1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1223}; 1224 1225/** 1226 * xmlStrcasecmp: 1227 * @str1: the first xmlChar * 1228 * @str2: the second xmlChar * 1229 * 1230 * a strcasecmp for xmlChar's 1231 * 1232 * Returns the integer result of the comparison 1233 */ 1234 1235int 1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1237 register int tmp; 1238 1239 if (str1 == str2) return(0); 1240 if (str1 == NULL) return(-1); 1241 if (str2 == NULL) return(1); 1242 do { 1243 tmp = casemap[*str1++] - casemap[*str2]; 1244 if (tmp != 0) return(tmp); 1245 } while (*str2++ != 0); 1246 return 0; 1247} 1248 1249/** 1250 * xmlStrncasecmp: 1251 * @str1: the first xmlChar * 1252 * @str2: the second xmlChar * 1253 * @len: the max comparison length 1254 * 1255 * a strncasecmp for xmlChar's 1256 * 1257 * Returns the integer result of the comparison 1258 */ 1259 1260int 1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1262 register int tmp; 1263 1264 if (len <= 0) return(0); 1265 if (str1 == str2) return(0); 1266 if (str1 == NULL) return(-1); 1267 if (str2 == NULL) return(1); 1268 do { 1269 tmp = casemap[*str1++] - casemap[*str2]; 1270 if (tmp != 0 || --len == 0) return(tmp); 1271 } while (*str2++ != 0); 1272 return 0; 1273} 1274 1275/** 1276 * xmlStrchr: 1277 * @str: the xmlChar * array 1278 * @val: the xmlChar to search 1279 * 1280 * a strchr for xmlChar's 1281 * 1282 * Returns the xmlChar * for the first occurence or NULL. 1283 */ 1284 1285const xmlChar * 1286xmlStrchr(const xmlChar *str, xmlChar val) { 1287 if (str == NULL) return(NULL); 1288 while (*str != 0) { /* non input consuming */ 1289 if (*str == val) return((xmlChar *) str); 1290 str++; 1291 } 1292 return(NULL); 1293} 1294 1295/** 1296 * xmlStrstr: 1297 * @str: the xmlChar * array (haystack) 1298 * @val: the xmlChar to search (needle) 1299 * 1300 * a strstr for xmlChar's 1301 * 1302 * Returns the xmlChar * for the first occurence or NULL. 1303 */ 1304 1305const xmlChar * 1306xmlStrstr(const xmlChar *str, const xmlChar *val) { 1307 int n; 1308 1309 if (str == NULL) return(NULL); 1310 if (val == NULL) return(NULL); 1311 n = xmlStrlen(val); 1312 1313 if (n == 0) return(str); 1314 while (*str != 0) { /* non input consuming */ 1315 if (*str == *val) { 1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1317 } 1318 str++; 1319 } 1320 return(NULL); 1321} 1322 1323/** 1324 * xmlStrcasestr: 1325 * @str: the xmlChar * array (haystack) 1326 * @val: the xmlChar to search (needle) 1327 * 1328 * a case-ignoring strstr for xmlChar's 1329 * 1330 * Returns the xmlChar * for the first occurence or NULL. 1331 */ 1332 1333const xmlChar * 1334xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1335 int n; 1336 1337 if (str == NULL) return(NULL); 1338 if (val == NULL) return(NULL); 1339 n = xmlStrlen(val); 1340 1341 if (n == 0) return(str); 1342 while (*str != 0) { /* non input consuming */ 1343 if (casemap[*str] == casemap[*val]) 1344 if (!xmlStrncasecmp(str, val, n)) return(str); 1345 str++; 1346 } 1347 return(NULL); 1348} 1349 1350/** 1351 * xmlStrsub: 1352 * @str: the xmlChar * array (haystack) 1353 * @start: the index of the first char (zero based) 1354 * @len: the length of the substring 1355 * 1356 * Extract a substring of a given string 1357 * 1358 * Returns the xmlChar * for the first occurence or NULL. 1359 */ 1360 1361xmlChar * 1362xmlStrsub(const xmlChar *str, int start, int len) { 1363 int i; 1364 1365 if (str == NULL) return(NULL); 1366 if (start < 0) return(NULL); 1367 if (len < 0) return(NULL); 1368 1369 for (i = 0;i < start;i++) { 1370 if (*str == 0) return(NULL); 1371 str++; 1372 } 1373 if (*str == 0) return(NULL); 1374 return(xmlStrndup(str, len)); 1375} 1376 1377/** 1378 * xmlStrlen: 1379 * @str: the xmlChar * array 1380 * 1381 * length of a xmlChar's string 1382 * 1383 * Returns the number of xmlChar contained in the ARRAY. 1384 */ 1385 1386int 1387xmlStrlen(const xmlChar *str) { 1388 int len = 0; 1389 1390 if (str == NULL) return(0); 1391 while (*str != 0) { /* non input consuming */ 1392 str++; 1393 len++; 1394 } 1395 return(len); 1396} 1397 1398/** 1399 * xmlStrncat: 1400 * @cur: the original xmlChar * array 1401 * @add: the xmlChar * array added 1402 * @len: the length of @add 1403 * 1404 * a strncat for array of xmlChar's, it will extend cur with the len 1405 * first bytes of @add. 1406 * 1407 * Returns a new xmlChar *, the original @cur is reallocated if needed 1408 * and should not be freed 1409 */ 1410 1411xmlChar * 1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1413 int size; 1414 xmlChar *ret; 1415 1416 if ((add == NULL) || (len == 0)) 1417 return(cur); 1418 if (cur == NULL) 1419 return(xmlStrndup(add, len)); 1420 1421 size = xmlStrlen(cur); 1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1423 if (ret == NULL) { 1424 xmlGenericError(xmlGenericErrorContext, 1425 "xmlStrncat: realloc of %ld byte failed\n", 1426 (size + len + 1) * (long)sizeof(xmlChar)); 1427 return(cur); 1428 } 1429 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1430 ret[size + len] = 0; 1431 return(ret); 1432} 1433 1434/** 1435 * xmlStrcat: 1436 * @cur: the original xmlChar * array 1437 * @add: the xmlChar * array added 1438 * 1439 * a strcat for array of xmlChar's. Since they are supposed to be 1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1441 * a termination mark of '0'. 1442 * 1443 * Returns a new xmlChar * containing the concatenated string. 1444 */ 1445xmlChar * 1446xmlStrcat(xmlChar *cur, const xmlChar *add) { 1447 const xmlChar *p = add; 1448 1449 if (add == NULL) return(cur); 1450 if (cur == NULL) 1451 return(xmlStrdup(add)); 1452 1453 while (*p != 0) p++; /* non input consuming */ 1454 return(xmlStrncat(cur, add, p - add)); 1455} 1456 1457/************************************************************************ 1458 * * 1459 * Commodity functions, cleanup needed ? * 1460 * * 1461 ************************************************************************/ 1462 1463/** 1464 * areBlanks: 1465 * @ctxt: an XML parser context 1466 * @str: a xmlChar * 1467 * @len: the size of @str 1468 * 1469 * Is this a sequence of blank chars that one can ignore ? 1470 * 1471 * Returns 1 if ignorable 0 otherwise. 1472 */ 1473 1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1475 int i, ret; 1476 xmlNodePtr lastChild; 1477 1478 if (ctxt->keepBlanks) 1479 return(0); 1480 1481 /* 1482 * Check for xml:space value. 1483 */ 1484 if (*(ctxt->space) == 1) 1485 return(0); 1486 1487 /* 1488 * Check that the string is made of blanks 1489 */ 1490 for (i = 0;i < len;i++) 1491 if (!(IS_BLANK(str[i]))) return(0); 1492 1493 /* 1494 * Look if the element is mixed content in the Dtd if available 1495 */ 1496 if (ctxt->node == NULL) return(0); 1497 if (ctxt->myDoc != NULL) { 1498 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1499 if (ret == 0) return(1); 1500 if (ret == 1) return(0); 1501 } 1502 1503 /* 1504 * Otherwise, heuristic :-\ 1505 */ 1506 if (RAW != '<') return(0); 1507 if ((ctxt->node->children == NULL) && 1508 (RAW == '<') && (NXT(1) == '/')) return(0); 1509 1510 lastChild = xmlGetLastChild(ctxt->node); 1511 if (lastChild == NULL) { 1512 if ((ctxt->node->type != XML_ELEMENT_NODE) && 1513 (ctxt->node->content != NULL)) return(0); 1514 } else if (xmlNodeIsText(lastChild)) 1515 return(0); 1516 else if ((ctxt->node->children != NULL) && 1517 (xmlNodeIsText(ctxt->node->children))) 1518 return(0); 1519 return(1); 1520} 1521 1522/* 1523 * Forward definition for recusive behaviour. 1524 */ 1525void xmlParsePEReference(xmlParserCtxtPtr ctxt); 1526void xmlParseReference(xmlParserCtxtPtr ctxt); 1527 1528/************************************************************************ 1529 * * 1530 * Extra stuff for namespace support * 1531 * Relates to http://www.w3.org/TR/WD-xml-names * 1532 * * 1533 ************************************************************************/ 1534 1535/** 1536 * xmlSplitQName: 1537 * @ctxt: an XML parser context 1538 * @name: an XML parser context 1539 * @prefix: a xmlChar ** 1540 * 1541 * parse an UTF8 encoded XML qualified name string 1542 * 1543 * [NS 5] QName ::= (Prefix ':')? LocalPart 1544 * 1545 * [NS 6] Prefix ::= NCName 1546 * 1547 * [NS 7] LocalPart ::= NCName 1548 * 1549 * Returns the local part, and prefix is updated 1550 * to get the Prefix if any. 1551 */ 1552 1553xmlChar * 1554xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1555 xmlChar buf[XML_MAX_NAMELEN + 5]; 1556 xmlChar *buffer = NULL; 1557 int len = 0; 1558 int max = XML_MAX_NAMELEN; 1559 xmlChar *ret = NULL; 1560 const xmlChar *cur = name; 1561 int c; 1562 1563 *prefix = NULL; 1564 1565#ifndef XML_XML_NAMESPACE 1566 /* xml: prefix is not really a namespace */ 1567 if ((cur[0] == 'x') && (cur[1] == 'm') && 1568 (cur[2] == 'l') && (cur[3] == ':')) 1569 return(xmlStrdup(name)); 1570#endif 1571 1572 /* nasty but valid */ 1573 if (cur[0] == ':') 1574 return(xmlStrdup(name)); 1575 1576 c = *cur++; 1577 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1578 buf[len++] = c; 1579 c = *cur++; 1580 } 1581 if (len >= max) { 1582 /* 1583 * Okay someone managed to make a huge name, so he's ready to pay 1584 * for the processing speed. 1585 */ 1586 max = len * 2; 1587 1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1589 if (buffer == NULL) { 1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1591 ctxt->sax->error(ctxt->userData, 1592 "xmlSplitQName: out of memory\n"); 1593 return(NULL); 1594 } 1595 memcpy(buffer, buf, len); 1596 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1597 if (len + 10 > max) { 1598 max *= 2; 1599 buffer = (xmlChar *) xmlRealloc(buffer, 1600 max * sizeof(xmlChar)); 1601 if (buffer == NULL) { 1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1603 ctxt->sax->error(ctxt->userData, 1604 "xmlSplitQName: out of memory\n"); 1605 return(NULL); 1606 } 1607 } 1608 buffer[len++] = c; 1609 c = *cur++; 1610 } 1611 buffer[len] = 0; 1612 } 1613 1614 if (buffer == NULL) 1615 ret = xmlStrndup(buf, len); 1616 else { 1617 ret = buffer; 1618 buffer = NULL; 1619 max = XML_MAX_NAMELEN; 1620 } 1621 1622 1623 if (c == ':') { 1624 c = *cur++; 1625 if (c == 0) return(ret); 1626 *prefix = ret; 1627 len = 0; 1628 1629 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1630 buf[len++] = c; 1631 c = *cur++; 1632 } 1633 if (len >= max) { 1634 /* 1635 * Okay someone managed to make a huge name, so he's ready to pay 1636 * for the processing speed. 1637 */ 1638 max = len * 2; 1639 1640 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1641 if (buffer == NULL) { 1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1643 ctxt->sax->error(ctxt->userData, 1644 "xmlSplitQName: out of memory\n"); 1645 return(NULL); 1646 } 1647 memcpy(buffer, buf, len); 1648 while (c != 0) { /* tested bigname2.xml */ 1649 if (len + 10 > max) { 1650 max *= 2; 1651 buffer = (xmlChar *) xmlRealloc(buffer, 1652 max * sizeof(xmlChar)); 1653 if (buffer == NULL) { 1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1655 ctxt->sax->error(ctxt->userData, 1656 "xmlSplitQName: out of memory\n"); 1657 return(NULL); 1658 } 1659 } 1660 buffer[len++] = c; 1661 c = *cur++; 1662 } 1663 buffer[len] = 0; 1664 } 1665 1666 if (buffer == NULL) 1667 ret = xmlStrndup(buf, len); 1668 else { 1669 ret = buffer; 1670 } 1671 } 1672 1673 return(ret); 1674} 1675 1676/************************************************************************ 1677 * * 1678 * The parser itself * 1679 * Relates to http://www.w3.org/TR/REC-xml * 1680 * * 1681 ************************************************************************/ 1682 1683static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1684/** 1685 * xmlParseName: 1686 * @ctxt: an XML parser context 1687 * 1688 * parse an XML name. 1689 * 1690 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1691 * CombiningChar | Extender 1692 * 1693 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1694 * 1695 * [6] Names ::= Name (S Name)* 1696 * 1697 * Returns the Name parsed or NULL 1698 */ 1699 1700xmlChar * 1701xmlParseName(xmlParserCtxtPtr ctxt) { 1702 const xmlChar *in; 1703 xmlChar *ret; 1704 int count = 0; 1705 1706 GROW; 1707 1708 /* 1709 * Accelerator for simple ASCII names 1710 */ 1711 in = ctxt->input->cur; 1712 if (((*in >= 0x61) && (*in <= 0x7A)) || 1713 ((*in >= 0x41) && (*in <= 0x5A)) || 1714 (*in == '_') || (*in == ':')) { 1715 in++; 1716 while (((*in >= 0x61) && (*in <= 0x7A)) || 1717 ((*in >= 0x41) && (*in <= 0x5A)) || 1718 ((*in >= 0x30) && (*in <= 0x39)) || 1719 (*in == '_') || (*in == '-') || 1720 (*in == ':') || (*in == '.')) 1721 in++; 1722 if ((*in > 0) && (*in < 0x80)) { 1723 count = in - ctxt->input->cur; 1724 ret = xmlStrndup(ctxt->input->cur, count); 1725 ctxt->input->cur = in; 1726 return(ret); 1727 } 1728 } 1729 return(xmlParseNameComplex(ctxt)); 1730} 1731 1732static xmlChar * 1733xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1734 xmlChar buf[XML_MAX_NAMELEN + 5]; 1735 int len = 0, l; 1736 int c; 1737 int count = 0; 1738 1739 /* 1740 * Handler for more complex cases 1741 */ 1742 GROW; 1743 c = CUR_CHAR(l); 1744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1745 (!IS_LETTER(c) && (c != '_') && 1746 (c != ':'))) { 1747 return(NULL); 1748 } 1749 1750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1751 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1752 (c == '.') || (c == '-') || 1753 (c == '_') || (c == ':') || 1754 (IS_COMBINING(c)) || 1755 (IS_EXTENDER(c)))) { 1756 if (count++ > 100) { 1757 count = 0; 1758 GROW; 1759 } 1760 COPY_BUF(l,buf,len,c); 1761 NEXTL(l); 1762 c = CUR_CHAR(l); 1763 if (len >= XML_MAX_NAMELEN) { 1764 /* 1765 * Okay someone managed to make a huge name, so he's ready to pay 1766 * for the processing speed. 1767 */ 1768 xmlChar *buffer; 1769 int max = len * 2; 1770 1771 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1772 if (buffer == NULL) { 1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1774 ctxt->sax->error(ctxt->userData, 1775 "xmlParseNameComplex: out of memory\n"); 1776 return(NULL); 1777 } 1778 memcpy(buffer, buf, len); 1779 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1780 (c == '.') || (c == '-') || 1781 (c == '_') || (c == ':') || 1782 (IS_COMBINING(c)) || 1783 (IS_EXTENDER(c))) { 1784 if (count++ > 100) { 1785 count = 0; 1786 GROW; 1787 } 1788 if (len + 10 > max) { 1789 max *= 2; 1790 buffer = (xmlChar *) xmlRealloc(buffer, 1791 max * sizeof(xmlChar)); 1792 if (buffer == NULL) { 1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1794 ctxt->sax->error(ctxt->userData, 1795 "xmlParseNameComplex: out of memory\n"); 1796 return(NULL); 1797 } 1798 } 1799 COPY_BUF(l,buffer,len,c); 1800 NEXTL(l); 1801 c = CUR_CHAR(l); 1802 } 1803 buffer[len] = 0; 1804 return(buffer); 1805 } 1806 } 1807 return(xmlStrndup(buf, len)); 1808} 1809 1810/** 1811 * xmlParseStringName: 1812 * @ctxt: an XML parser context 1813 * @str: a pointer to the string pointer (IN/OUT) 1814 * 1815 * parse an XML name. 1816 * 1817 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1818 * CombiningChar | Extender 1819 * 1820 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1821 * 1822 * [6] Names ::= Name (S Name)* 1823 * 1824 * Returns the Name parsed or NULL. The str pointer 1825 * is updated to the current location in the string. 1826 */ 1827 1828static xmlChar * 1829xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1830 xmlChar buf[XML_MAX_NAMELEN + 5]; 1831 const xmlChar *cur = *str; 1832 int len = 0, l; 1833 int c; 1834 1835 c = CUR_SCHAR(cur, l); 1836 if (!IS_LETTER(c) && (c != '_') && 1837 (c != ':')) { 1838 return(NULL); 1839 } 1840 1841 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1842 (c == '.') || (c == '-') || 1843 (c == '_') || (c == ':') || 1844 (IS_COMBINING(c)) || 1845 (IS_EXTENDER(c))) { 1846 COPY_BUF(l,buf,len,c); 1847 cur += l; 1848 c = CUR_SCHAR(cur, l); 1849 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1850 /* 1851 * Okay someone managed to make a huge name, so he's ready to pay 1852 * for the processing speed. 1853 */ 1854 xmlChar *buffer; 1855 int max = len * 2; 1856 1857 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1858 if (buffer == NULL) { 1859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1860 ctxt->sax->error(ctxt->userData, 1861 "xmlParseStringName: out of memory\n"); 1862 return(NULL); 1863 } 1864 memcpy(buffer, buf, len); 1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1866 (c == '.') || (c == '-') || 1867 (c == '_') || (c == ':') || 1868 (IS_COMBINING(c)) || 1869 (IS_EXTENDER(c))) { 1870 if (len + 10 > max) { 1871 max *= 2; 1872 buffer = (xmlChar *) xmlRealloc(buffer, 1873 max * sizeof(xmlChar)); 1874 if (buffer == NULL) { 1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1876 ctxt->sax->error(ctxt->userData, 1877 "xmlParseStringName: out of memory\n"); 1878 return(NULL); 1879 } 1880 } 1881 COPY_BUF(l,buffer,len,c); 1882 cur += l; 1883 c = CUR_SCHAR(cur, l); 1884 } 1885 buffer[len] = 0; 1886 *str = cur; 1887 return(buffer); 1888 } 1889 } 1890 *str = cur; 1891 return(xmlStrndup(buf, len)); 1892} 1893 1894/** 1895 * xmlParseNmtoken: 1896 * @ctxt: an XML parser context 1897 * 1898 * parse an XML Nmtoken. 1899 * 1900 * [7] Nmtoken ::= (NameChar)+ 1901 * 1902 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1903 * 1904 * Returns the Nmtoken parsed or NULL 1905 */ 1906 1907xmlChar * 1908xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1909 xmlChar buf[XML_MAX_NAMELEN + 5]; 1910 int len = 0, l; 1911 int c; 1912 int count = 0; 1913 1914 GROW; 1915 c = CUR_CHAR(l); 1916 1917 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1918 (c == '.') || (c == '-') || 1919 (c == '_') || (c == ':') || 1920 (IS_COMBINING(c)) || 1921 (IS_EXTENDER(c))) { 1922 if (count++ > 100) { 1923 count = 0; 1924 GROW; 1925 } 1926 COPY_BUF(l,buf,len,c); 1927 NEXTL(l); 1928 c = CUR_CHAR(l); 1929 if (len >= XML_MAX_NAMELEN) { 1930 /* 1931 * Okay someone managed to make a huge token, so he's ready to pay 1932 * for the processing speed. 1933 */ 1934 xmlChar *buffer; 1935 int max = len * 2; 1936 1937 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1938 if (buffer == NULL) { 1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1940 ctxt->sax->error(ctxt->userData, 1941 "xmlParseNmtoken: out of memory\n"); 1942 return(NULL); 1943 } 1944 memcpy(buffer, buf, len); 1945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1946 (c == '.') || (c == '-') || 1947 (c == '_') || (c == ':') || 1948 (IS_COMBINING(c)) || 1949 (IS_EXTENDER(c))) { 1950 if (count++ > 100) { 1951 count = 0; 1952 GROW; 1953 } 1954 if (len + 10 > max) { 1955 max *= 2; 1956 buffer = (xmlChar *) xmlRealloc(buffer, 1957 max * sizeof(xmlChar)); 1958 if (buffer == NULL) { 1959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1960 ctxt->sax->error(ctxt->userData, 1961 "xmlParseNameComplex: out of memory\n"); 1962 return(NULL); 1963 } 1964 } 1965 COPY_BUF(l,buffer,len,c); 1966 NEXTL(l); 1967 c = CUR_CHAR(l); 1968 } 1969 buffer[len] = 0; 1970 return(buffer); 1971 } 1972 } 1973 if (len == 0) 1974 return(NULL); 1975 return(xmlStrndup(buf, len)); 1976} 1977 1978/** 1979 * xmlParseEntityValue: 1980 * @ctxt: an XML parser context 1981 * @orig: if non-NULL store a copy of the original entity value 1982 * 1983 * parse a value for ENTITY declarations 1984 * 1985 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 1986 * "'" ([^%&'] | PEReference | Reference)* "'" 1987 * 1988 * Returns the EntityValue parsed with reference substitued or NULL 1989 */ 1990 1991xmlChar * 1992xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 1993 xmlChar *buf = NULL; 1994 int len = 0; 1995 int size = XML_PARSER_BUFFER_SIZE; 1996 int c, l; 1997 xmlChar stop; 1998 xmlChar *ret = NULL; 1999 const xmlChar *cur = NULL; 2000 xmlParserInputPtr input; 2001 2002 if (RAW == '"') stop = '"'; 2003 else if (RAW == '\'') stop = '\''; 2004 else { 2005 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2007 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2008 ctxt->wellFormed = 0; 2009 ctxt->disableSAX = 1; 2010 return(NULL); 2011 } 2012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2013 if (buf == NULL) { 2014 xmlGenericError(xmlGenericErrorContext, 2015 "malloc of %d byte failed\n", size); 2016 return(NULL); 2017 } 2018 2019 /* 2020 * The content of the entity definition is copied in a buffer. 2021 */ 2022 2023 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2024 input = ctxt->input; 2025 GROW; 2026 NEXT; 2027 c = CUR_CHAR(l); 2028 /* 2029 * NOTE: 4.4.5 Included in Literal 2030 * When a parameter entity reference appears in a literal entity 2031 * value, ... a single or double quote character in the replacement 2032 * text is always treated as a normal data character and will not 2033 * terminate the literal. 2034 * In practice it means we stop the loop only when back at parsing 2035 * the initial entity and the quote is found 2036 */ 2037 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2038 (ctxt->input != input))) { 2039 if (len + 5 >= size) { 2040 size *= 2; 2041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2042 if (buf == NULL) { 2043 xmlGenericError(xmlGenericErrorContext, 2044 "realloc of %d byte failed\n", size); 2045 return(NULL); 2046 } 2047 } 2048 COPY_BUF(l,buf,len,c); 2049 NEXTL(l); 2050 /* 2051 * Pop-up of finished entities. 2052 */ 2053 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2054 xmlPopInput(ctxt); 2055 2056 GROW; 2057 c = CUR_CHAR(l); 2058 if (c == 0) { 2059 GROW; 2060 c = CUR_CHAR(l); 2061 } 2062 } 2063 buf[len] = 0; 2064 2065 /* 2066 * Raise problem w.r.t. '&' and '%' being used in non-entities 2067 * reference constructs. Note Charref will be handled in 2068 * xmlStringDecodeEntities() 2069 */ 2070 cur = buf; 2071 while (*cur != 0) { /* non input consuming */ 2072 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2073 xmlChar *name; 2074 xmlChar tmp = *cur; 2075 2076 cur++; 2077 name = xmlParseStringName(ctxt, &cur); 2078 if ((name == NULL) || (*cur != ';')) { 2079 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2081 ctxt->sax->error(ctxt->userData, 2082 "EntityValue: '%c' forbidden except for entities references\n", 2083 tmp); 2084 ctxt->wellFormed = 0; 2085 ctxt->disableSAX = 1; 2086 } 2087 if ((ctxt->inSubset == 1) && (tmp == '%')) { 2088 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2090 ctxt->sax->error(ctxt->userData, 2091 "EntityValue: PEReferences forbidden in internal subset\n", 2092 tmp); 2093 ctxt->wellFormed = 0; 2094 ctxt->disableSAX = 1; 2095 } 2096 if (name != NULL) 2097 xmlFree(name); 2098 } 2099 cur++; 2100 } 2101 2102 /* 2103 * Then PEReference entities are substituted. 2104 */ 2105 if (c != stop) { 2106 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2108 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2109 ctxt->wellFormed = 0; 2110 ctxt->disableSAX = 1; 2111 xmlFree(buf); 2112 } else { 2113 NEXT; 2114 /* 2115 * NOTE: 4.4.7 Bypassed 2116 * When a general entity reference appears in the EntityValue in 2117 * an entity declaration, it is bypassed and left as is. 2118 * so XML_SUBSTITUTE_REF is not set here. 2119 */ 2120 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2121 0, 0, 0); 2122 if (orig != NULL) 2123 *orig = buf; 2124 else 2125 xmlFree(buf); 2126 } 2127 2128 return(ret); 2129} 2130 2131/** 2132 * xmlParseAttValue: 2133 * @ctxt: an XML parser context 2134 * 2135 * parse a value for an attribute 2136 * Note: the parser won't do substitution of entities here, this 2137 * will be handled later in xmlStringGetNodeList 2138 * 2139 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2140 * "'" ([^<&'] | Reference)* "'" 2141 * 2142 * 3.3.3 Attribute-Value Normalization: 2143 * Before the value of an attribute is passed to the application or 2144 * checked for validity, the XML processor must normalize it as follows: 2145 * - a character reference is processed by appending the referenced 2146 * character to the attribute value 2147 * - an entity reference is processed by recursively processing the 2148 * replacement text of the entity 2149 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2150 * appending #x20 to the normalized value, except that only a single 2151 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2152 * parsed entity or the literal entity value of an internal parsed entity 2153 * - other characters are processed by appending them to the normalized value 2154 * If the declared value is not CDATA, then the XML processor must further 2155 * process the normalized attribute value by discarding any leading and 2156 * trailing space (#x20) characters, and by replacing sequences of space 2157 * (#x20) characters by a single space (#x20) character. 2158 * All attributes for which no declaration has been read should be treated 2159 * by a non-validating parser as if declared CDATA. 2160 * 2161 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2162 */ 2163 2164xmlChar * 2165xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2166 xmlChar limit = 0; 2167 xmlChar *buf = NULL; 2168 int len = 0; 2169 int buf_size = 0; 2170 int c, l; 2171 xmlChar *current = NULL; 2172 xmlEntityPtr ent; 2173 2174 2175 SHRINK; 2176 if (NXT(0) == '"') { 2177 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2178 limit = '"'; 2179 NEXT; 2180 } else if (NXT(0) == '\'') { 2181 limit = '\''; 2182 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2183 NEXT; 2184 } else { 2185 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2187 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2188 ctxt->wellFormed = 0; 2189 ctxt->disableSAX = 1; 2190 return(NULL); 2191 } 2192 2193 /* 2194 * allocate a translation buffer. 2195 */ 2196 buf_size = XML_PARSER_BUFFER_SIZE; 2197 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2198 if (buf == NULL) { 2199 perror("xmlParseAttValue: malloc failed"); 2200 return(NULL); 2201 } 2202 2203 /* 2204 * Ok loop until we reach one of the ending char or a size limit. 2205 */ 2206 c = CUR_CHAR(l); 2207 while (((NXT(0) != limit) && /* checked */ 2208 (c != '<')) || (ctxt->token != 0)) { 2209 if (c == 0) break; 2210 if (ctxt->token == '&') { 2211 /* 2212 * The reparsing will be done in xmlStringGetNodeList() 2213 * called by the attribute() function in SAX.c 2214 */ 2215 static xmlChar buffer[6] = "&"; 2216 2217 if (len > buf_size - 10) { 2218 growBuffer(buf); 2219 } 2220 current = &buffer[0]; 2221 while (*current != 0) { /* non input consuming */ 2222 buf[len++] = *current++; 2223 } 2224 ctxt->token = 0; 2225 } else if (c == '&') { 2226 if (NXT(1) == '#') { 2227 int val = xmlParseCharRef(ctxt); 2228 if (val == '&') { 2229 /* 2230 * The reparsing will be done in xmlStringGetNodeList() 2231 * called by the attribute() function in SAX.c 2232 */ 2233 static xmlChar buffer[6] = "&"; 2234 2235 if (len > buf_size - 10) { 2236 growBuffer(buf); 2237 } 2238 current = &buffer[0]; 2239 while (*current != 0) { /* non input consuming */ 2240 buf[len++] = *current++; 2241 } 2242 } else { 2243 if (len > buf_size - 10) { 2244 growBuffer(buf); 2245 } 2246 len += xmlCopyChar(0, &buf[len], val); 2247 } 2248 } else { 2249 ent = xmlParseEntityRef(ctxt); 2250 if ((ent != NULL) && 2251 (ctxt->replaceEntities != 0)) { 2252 xmlChar *rep; 2253 2254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2255 rep = xmlStringDecodeEntities(ctxt, ent->content, 2256 XML_SUBSTITUTE_REF, 0, 0, 0); 2257 if (rep != NULL) { 2258 current = rep; 2259 while (*current != 0) { /* non input consuming */ 2260 buf[len++] = *current++; 2261 if (len > buf_size - 10) { 2262 growBuffer(buf); 2263 } 2264 } 2265 xmlFree(rep); 2266 } 2267 } else { 2268 if (len > buf_size - 10) { 2269 growBuffer(buf); 2270 } 2271 if (ent->content != NULL) 2272 buf[len++] = ent->content[0]; 2273 } 2274 } else if (ent != NULL) { 2275 int i = xmlStrlen(ent->name); 2276 const xmlChar *cur = ent->name; 2277 2278 /* 2279 * This may look absurd but is needed to detect 2280 * entities problems 2281 */ 2282 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2283 (ent->content != NULL)) { 2284 xmlChar *rep; 2285 rep = xmlStringDecodeEntities(ctxt, ent->content, 2286 XML_SUBSTITUTE_REF, 0, 0, 0); 2287 if (rep != NULL) 2288 xmlFree(rep); 2289 } 2290 2291 /* 2292 * Just output the reference 2293 */ 2294 buf[len++] = '&'; 2295 if (len > buf_size - i - 10) { 2296 growBuffer(buf); 2297 } 2298 for (;i > 0;i--) 2299 buf[len++] = *cur++; 2300 buf[len++] = ';'; 2301 } 2302 } 2303 } else { 2304 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2305 COPY_BUF(l,buf,len,0x20); 2306 if (len > buf_size - 10) { 2307 growBuffer(buf); 2308 } 2309 } else { 2310 COPY_BUF(l,buf,len,c); 2311 if (len > buf_size - 10) { 2312 growBuffer(buf); 2313 } 2314 } 2315 NEXTL(l); 2316 } 2317 GROW; 2318 c = CUR_CHAR(l); 2319 } 2320 buf[len++] = 0; 2321 if (RAW == '<') { 2322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2324 ctxt->sax->error(ctxt->userData, 2325 "Unescaped '<' not allowed in attributes values\n"); 2326 ctxt->wellFormed = 0; 2327 ctxt->disableSAX = 1; 2328 } else if (RAW != limit) { 2329 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2331 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2332 ctxt->wellFormed = 0; 2333 ctxt->disableSAX = 1; 2334 } else 2335 NEXT; 2336 return(buf); 2337} 2338 2339/** 2340 * xmlParseSystemLiteral: 2341 * @ctxt: an XML parser context 2342 * 2343 * parse an XML Literal 2344 * 2345 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2346 * 2347 * Returns the SystemLiteral parsed or NULL 2348 */ 2349 2350xmlChar * 2351xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2352 xmlChar *buf = NULL; 2353 int len = 0; 2354 int size = XML_PARSER_BUFFER_SIZE; 2355 int cur, l; 2356 xmlChar stop; 2357 int state = ctxt->instate; 2358 int count = 0; 2359 2360 SHRINK; 2361 if (RAW == '"') { 2362 NEXT; 2363 stop = '"'; 2364 } else if (RAW == '\'') { 2365 NEXT; 2366 stop = '\''; 2367 } else { 2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2370 ctxt->sax->error(ctxt->userData, 2371 "SystemLiteral \" or ' expected\n"); 2372 ctxt->wellFormed = 0; 2373 ctxt->disableSAX = 1; 2374 return(NULL); 2375 } 2376 2377 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2378 if (buf == NULL) { 2379 xmlGenericError(xmlGenericErrorContext, 2380 "malloc of %d byte failed\n", size); 2381 return(NULL); 2382 } 2383 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2384 cur = CUR_CHAR(l); 2385 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2386 if (len + 5 >= size) { 2387 size *= 2; 2388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2389 if (buf == NULL) { 2390 xmlGenericError(xmlGenericErrorContext, 2391 "realloc of %d byte failed\n", size); 2392 ctxt->instate = (xmlParserInputState) state; 2393 return(NULL); 2394 } 2395 } 2396 count++; 2397 if (count > 50) { 2398 GROW; 2399 count = 0; 2400 } 2401 COPY_BUF(l,buf,len,cur); 2402 NEXTL(l); 2403 cur = CUR_CHAR(l); 2404 if (cur == 0) { 2405 GROW; 2406 SHRINK; 2407 cur = CUR_CHAR(l); 2408 } 2409 } 2410 buf[len] = 0; 2411 ctxt->instate = (xmlParserInputState) state; 2412 if (!IS_CHAR(cur)) { 2413 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2415 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2416 ctxt->wellFormed = 0; 2417 ctxt->disableSAX = 1; 2418 } else { 2419 NEXT; 2420 } 2421 return(buf); 2422} 2423 2424/** 2425 * xmlParsePubidLiteral: 2426 * @ctxt: an XML parser context 2427 * 2428 * parse an XML public literal 2429 * 2430 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2431 * 2432 * Returns the PubidLiteral parsed or NULL. 2433 */ 2434 2435xmlChar * 2436xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2437 xmlChar *buf = NULL; 2438 int len = 0; 2439 int size = XML_PARSER_BUFFER_SIZE; 2440 xmlChar cur; 2441 xmlChar stop; 2442 int count = 0; 2443 2444 SHRINK; 2445 if (RAW == '"') { 2446 NEXT; 2447 stop = '"'; 2448 } else if (RAW == '\'') { 2449 NEXT; 2450 stop = '\''; 2451 } else { 2452 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2454 ctxt->sax->error(ctxt->userData, 2455 "SystemLiteral \" or ' expected\n"); 2456 ctxt->wellFormed = 0; 2457 ctxt->disableSAX = 1; 2458 return(NULL); 2459 } 2460 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2461 if (buf == NULL) { 2462 xmlGenericError(xmlGenericErrorContext, 2463 "malloc of %d byte failed\n", size); 2464 return(NULL); 2465 } 2466 cur = CUR; 2467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2468 if (len + 1 >= size) { 2469 size *= 2; 2470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2471 if (buf == NULL) { 2472 xmlGenericError(xmlGenericErrorContext, 2473 "realloc of %d byte failed\n", size); 2474 return(NULL); 2475 } 2476 } 2477 buf[len++] = cur; 2478 count++; 2479 if (count > 50) { 2480 GROW; 2481 count = 0; 2482 } 2483 NEXT; 2484 cur = CUR; 2485 if (cur == 0) { 2486 GROW; 2487 SHRINK; 2488 cur = CUR; 2489 } 2490 } 2491 buf[len] = 0; 2492 if (cur != stop) { 2493 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2495 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2496 ctxt->wellFormed = 0; 2497 ctxt->disableSAX = 1; 2498 } else { 2499 NEXT; 2500 } 2501 return(buf); 2502} 2503 2504void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2505/** 2506 * xmlParseCharData: 2507 * @ctxt: an XML parser context 2508 * @cdata: int indicating whether we are within a CDATA section 2509 * 2510 * parse a CharData section. 2511 * if we are within a CDATA section ']]>' marks an end of section. 2512 * 2513 * The right angle bracket (>) may be represented using the string ">", 2514 * and must, for compatibility, be escaped using ">" or a character 2515 * reference when it appears in the string "]]>" in content, when that 2516 * string is not marking the end of a CDATA section. 2517 * 2518 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2519 */ 2520 2521void 2522xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2523 const xmlChar *in; 2524 int nbchar = 0; 2525 int line = ctxt->input->line; 2526 int col = ctxt->input->col; 2527 2528 SHRINK; 2529 GROW; 2530 /* 2531 * Accelerated common case where input don't need to be 2532 * modified before passing it to the handler. 2533 */ 2534 if ((ctxt->token == 0) && (!cdata)) { 2535 in = ctxt->input->cur; 2536 do { 2537get_more: 2538 while (((*in >= 0x20) && (*in != '<') && 2539 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2540 in++; 2541 if (*in == 0xA) { 2542 ctxt->input->line++; 2543 in++; 2544 while (*in == 0xA) { 2545 ctxt->input->line++; 2546 in++; 2547 } 2548 goto get_more; 2549 } 2550 nbchar = in - ctxt->input->cur; 2551 if (nbchar > 0) { 2552 if (IS_BLANK(*ctxt->input->cur)) { 2553 const xmlChar *tmp = ctxt->input->cur; 2554 ctxt->input->cur = in; 2555 if (areBlanks(ctxt, tmp, nbchar)) { 2556 if (ctxt->sax->ignorableWhitespace != NULL) 2557 ctxt->sax->ignorableWhitespace(ctxt->userData, 2558 tmp, nbchar); 2559 } else { 2560 if (ctxt->sax->characters != NULL) 2561 ctxt->sax->characters(ctxt->userData, 2562 tmp, nbchar); 2563 } 2564 line = ctxt->input->line; 2565 col = ctxt->input->col; 2566 } else { 2567 if (ctxt->sax->characters != NULL) 2568 ctxt->sax->characters(ctxt->userData, 2569 ctxt->input->cur, nbchar); 2570 line = ctxt->input->line; 2571 col = ctxt->input->col; 2572 } 2573 } 2574 ctxt->input->cur = in; 2575 if (*in == 0xD) { 2576 in++; 2577 if (*in == 0xA) { 2578 ctxt->input->cur = in; 2579 in++; 2580 ctxt->input->line++; 2581 continue; /* while */ 2582 } 2583 in--; 2584 } 2585 if (*in == '<') { 2586 return; 2587 } 2588 if (*in == '&') { 2589 return; 2590 } 2591 SHRINK; 2592 GROW; 2593 in = ctxt->input->cur; 2594 } while ((*in >= 0x20) && (*in <= 0x7F)); 2595 nbchar = 0; 2596 } 2597 ctxt->input->line = line; 2598 ctxt->input->col = col; 2599 xmlParseCharDataComplex(ctxt, cdata); 2600} 2601 2602void 2603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2605 int nbchar = 0; 2606 int cur, l; 2607 int count = 0; 2608 2609 SHRINK; 2610 GROW; 2611 cur = CUR_CHAR(l); 2612 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2613 ((cur != '&') || (ctxt->token == '&')) && 2614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2615 if ((cur == ']') && (NXT(1) == ']') && 2616 (NXT(2) == '>')) { 2617 if (cdata) break; 2618 else { 2619 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2621 ctxt->sax->error(ctxt->userData, 2622 "Sequence ']]>' not allowed in content\n"); 2623 /* Should this be relaxed ??? I see a "must here */ 2624 ctxt->wellFormed = 0; 2625 ctxt->disableSAX = 1; 2626 } 2627 } 2628 COPY_BUF(l,buf,nbchar,cur); 2629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2630 /* 2631 * Ok the segment is to be consumed as chars. 2632 */ 2633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2634 if (areBlanks(ctxt, buf, nbchar)) { 2635 if (ctxt->sax->ignorableWhitespace != NULL) 2636 ctxt->sax->ignorableWhitespace(ctxt->userData, 2637 buf, nbchar); 2638 } else { 2639 if (ctxt->sax->characters != NULL) 2640 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2641 } 2642 } 2643 nbchar = 0; 2644 } 2645 count++; 2646 if (count > 50) { 2647 GROW; 2648 count = 0; 2649 } 2650 NEXTL(l); 2651 cur = CUR_CHAR(l); 2652 } 2653 if (nbchar != 0) { 2654 /* 2655 * Ok the segment is to be consumed as chars. 2656 */ 2657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2658 if (areBlanks(ctxt, buf, nbchar)) { 2659 if (ctxt->sax->ignorableWhitespace != NULL) 2660 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2661 } else { 2662 if (ctxt->sax->characters != NULL) 2663 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2664 } 2665 } 2666 } 2667} 2668 2669/** 2670 * xmlParseExternalID: 2671 * @ctxt: an XML parser context 2672 * @publicID: a xmlChar** receiving PubidLiteral 2673 * @strict: indicate whether we should restrict parsing to only 2674 * production [75], see NOTE below 2675 * 2676 * Parse an External ID or a Public ID 2677 * 2678 * NOTE: Productions [75] and [83] interract badly since [75] can generate 2679 * 'PUBLIC' S PubidLiteral S SystemLiteral 2680 * 2681 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2682 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2683 * 2684 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2685 * 2686 * Returns the function returns SystemLiteral and in the second 2687 * case publicID receives PubidLiteral, is strict is off 2688 * it is possible to return NULL and have publicID set. 2689 */ 2690 2691xmlChar * 2692xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2693 xmlChar *URI = NULL; 2694 2695 SHRINK; 2696 2697 *publicID = NULL; 2698 if ((RAW == 'S') && (NXT(1) == 'Y') && 2699 (NXT(2) == 'S') && (NXT(3) == 'T') && 2700 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2701 SKIP(6); 2702 if (!IS_BLANK(CUR)) { 2703 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2705 ctxt->sax->error(ctxt->userData, 2706 "Space required after 'SYSTEM'\n"); 2707 ctxt->wellFormed = 0; 2708 ctxt->disableSAX = 1; 2709 } 2710 SKIP_BLANKS; 2711 URI = xmlParseSystemLiteral(ctxt); 2712 if (URI == NULL) { 2713 ctxt->errNo = XML_ERR_URI_REQUIRED; 2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2715 ctxt->sax->error(ctxt->userData, 2716 "xmlParseExternalID: SYSTEM, no URI\n"); 2717 ctxt->wellFormed = 0; 2718 ctxt->disableSAX = 1; 2719 } 2720 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2721 (NXT(2) == 'B') && (NXT(3) == 'L') && 2722 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2723 SKIP(6); 2724 if (!IS_BLANK(CUR)) { 2725 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2727 ctxt->sax->error(ctxt->userData, 2728 "Space required after 'PUBLIC'\n"); 2729 ctxt->wellFormed = 0; 2730 ctxt->disableSAX = 1; 2731 } 2732 SKIP_BLANKS; 2733 *publicID = xmlParsePubidLiteral(ctxt); 2734 if (*publicID == NULL) { 2735 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2737 ctxt->sax->error(ctxt->userData, 2738 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2739 ctxt->wellFormed = 0; 2740 ctxt->disableSAX = 1; 2741 } 2742 if (strict) { 2743 /* 2744 * We don't handle [83] so "S SystemLiteral" is required. 2745 */ 2746 if (!IS_BLANK(CUR)) { 2747 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2749 ctxt->sax->error(ctxt->userData, 2750 "Space required after the Public Identifier\n"); 2751 ctxt->wellFormed = 0; 2752 ctxt->disableSAX = 1; 2753 } 2754 } else { 2755 /* 2756 * We handle [83] so we return immediately, if 2757 * "S SystemLiteral" is not detected. From a purely parsing 2758 * point of view that's a nice mess. 2759 */ 2760 const xmlChar *ptr; 2761 GROW; 2762 2763 ptr = CUR_PTR; 2764 if (!IS_BLANK(*ptr)) return(NULL); 2765 2766 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2768 } 2769 SKIP_BLANKS; 2770 URI = xmlParseSystemLiteral(ctxt); 2771 if (URI == NULL) { 2772 ctxt->errNo = XML_ERR_URI_REQUIRED; 2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2774 ctxt->sax->error(ctxt->userData, 2775 "xmlParseExternalID: PUBLIC, no URI\n"); 2776 ctxt->wellFormed = 0; 2777 ctxt->disableSAX = 1; 2778 } 2779 } 2780 return(URI); 2781} 2782 2783/** 2784 * xmlParseComment: 2785 * @ctxt: an XML parser context 2786 * 2787 * Skip an XML (SGML) comment <!-- .... --> 2788 * The spec says that "For compatibility, the string "--" (double-hyphen) 2789 * must not occur within comments. " 2790 * 2791 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2792 */ 2793void 2794xmlParseComment(xmlParserCtxtPtr ctxt) { 2795 xmlChar *buf = NULL; 2796 int len; 2797 int size = XML_PARSER_BUFFER_SIZE; 2798 int q, ql; 2799 int r, rl; 2800 int cur, l; 2801 xmlParserInputState state; 2802 xmlParserInputPtr input = ctxt->input; 2803 int count = 0; 2804 2805 /* 2806 * Check that there is a comment right here. 2807 */ 2808 if ((RAW != '<') || (NXT(1) != '!') || 2809 (NXT(2) != '-') || (NXT(3) != '-')) return; 2810 2811 state = ctxt->instate; 2812 ctxt->instate = XML_PARSER_COMMENT; 2813 SHRINK; 2814 SKIP(4); 2815 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2816 if (buf == NULL) { 2817 xmlGenericError(xmlGenericErrorContext, 2818 "malloc of %d byte failed\n", size); 2819 ctxt->instate = state; 2820 return; 2821 } 2822 q = CUR_CHAR(ql); 2823 NEXTL(ql); 2824 r = CUR_CHAR(rl); 2825 NEXTL(rl); 2826 cur = CUR_CHAR(l); 2827 len = 0; 2828 while (IS_CHAR(cur) && /* checked */ 2829 ((cur != '>') || 2830 (r != '-') || (q != '-'))) { 2831 if ((r == '-') && (q == '-') && (len > 1)) { 2832 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2834 ctxt->sax->error(ctxt->userData, 2835 "Comment must not contain '--' (double-hyphen)`\n"); 2836 ctxt->wellFormed = 0; 2837 ctxt->disableSAX = 1; 2838 } 2839 if (len + 5 >= size) { 2840 size *= 2; 2841 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2842 if (buf == NULL) { 2843 xmlGenericError(xmlGenericErrorContext, 2844 "realloc of %d byte failed\n", size); 2845 ctxt->instate = state; 2846 return; 2847 } 2848 } 2849 COPY_BUF(ql,buf,len,q); 2850 q = r; 2851 ql = rl; 2852 r = cur; 2853 rl = l; 2854 2855 count++; 2856 if (count > 50) { 2857 GROW; 2858 count = 0; 2859 } 2860 NEXTL(l); 2861 cur = CUR_CHAR(l); 2862 if (cur == 0) { 2863 SHRINK; 2864 GROW; 2865 cur = CUR_CHAR(l); 2866 } 2867 } 2868 buf[len] = 0; 2869 if (!IS_CHAR(cur)) { 2870 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2872 ctxt->sax->error(ctxt->userData, 2873 "Comment not terminated \n<!--%.50s\n", buf); 2874 ctxt->wellFormed = 0; 2875 ctxt->disableSAX = 1; 2876 xmlFree(buf); 2877 } else { 2878 if (input != ctxt->input) { 2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2881 ctxt->sax->error(ctxt->userData, 2882"Comment doesn't start and stop in the same entity\n"); 2883 ctxt->wellFormed = 0; 2884 ctxt->disableSAX = 1; 2885 } 2886 NEXT; 2887 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2888 (!ctxt->disableSAX)) 2889 ctxt->sax->comment(ctxt->userData, buf); 2890 xmlFree(buf); 2891 } 2892 ctxt->instate = state; 2893} 2894 2895/** 2896 * xmlParsePITarget: 2897 * @ctxt: an XML parser context 2898 * 2899 * parse the name of a PI 2900 * 2901 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2902 * 2903 * Returns the PITarget name or NULL 2904 */ 2905 2906xmlChar * 2907xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2908 xmlChar *name; 2909 2910 name = xmlParseName(ctxt); 2911 if ((name != NULL) && 2912 ((name[0] == 'x') || (name[0] == 'X')) && 2913 ((name[1] == 'm') || (name[1] == 'M')) && 2914 ((name[2] == 'l') || (name[2] == 'L'))) { 2915 int i; 2916 if ((name[0] == 'x') && (name[1] == 'm') && 2917 (name[2] == 'l') && (name[3] == 0)) { 2918 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2920 ctxt->sax->error(ctxt->userData, 2921 "XML declaration allowed only at the start of the document\n"); 2922 ctxt->wellFormed = 0; 2923 ctxt->disableSAX = 1; 2924 return(name); 2925 } else if (name[3] == 0) { 2926 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2928 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2929 ctxt->wellFormed = 0; 2930 ctxt->disableSAX = 1; 2931 return(name); 2932 } 2933 for (i = 0;;i++) { 2934 if (xmlW3CPIs[i] == NULL) break; 2935 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2936 return(name); 2937 } 2938 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2940 ctxt->sax->warning(ctxt->userData, 2941 "xmlParsePItarget: invalid name prefix 'xml'\n"); 2942 } 2943 } 2944 return(name); 2945} 2946 2947#ifdef LIBXML_CATALOG_ENABLED 2948/** 2949 * xmlParseCatalogPI: 2950 * @ctxt: an XML parser context 2951 * @catalog: the PI value string 2952 * 2953 * parse an XML Catalog Processing Instruction. 2954 * 2955 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 2956 * 2957 * Occurs only if allowed by the user and if happening in the Misc 2958 * part of the document before any doctype informations 2959 * This will add the given catalog to the parsing context in order 2960 * to be used if there is a resolution need further down in the document 2961 */ 2962 2963static void 2964xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 2965 xmlChar *URL = NULL; 2966 const xmlChar *tmp, *base; 2967 xmlChar marker; 2968 2969 tmp = catalog; 2970 while (IS_BLANK(*tmp)) tmp++; 2971 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 2972 goto error; 2973 tmp += 7; 2974 while (IS_BLANK(*tmp)) tmp++; 2975 if (*tmp != '=') { 2976 return; 2977 } 2978 tmp++; 2979 while (IS_BLANK(*tmp)) tmp++; 2980 marker = *tmp; 2981 if ((marker != '\'') && (marker != '"')) 2982 goto error; 2983 tmp++; 2984 base = tmp; 2985 while ((*tmp != 0) && (*tmp != marker)) tmp++; 2986 if (*tmp == 0) 2987 goto error; 2988 URL = xmlStrndup(base, tmp - base); 2989 tmp++; 2990 while (IS_BLANK(*tmp)) tmp++; 2991 if (*tmp != 0) 2992 goto error; 2993 2994 if (URL != NULL) { 2995 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 2996 xmlFree(URL); 2997 } 2998 return; 2999 3000error: 3001 ctxt->errNo = XML_WAR_CATALOG_PI; 3002 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 3003 ctxt->sax->warning(ctxt->userData, 3004 "Catalog PI syntax error: %s\n", catalog); 3005 if (URL != NULL) 3006 xmlFree(URL); 3007} 3008#endif 3009 3010/** 3011 * xmlParsePI: 3012 * @ctxt: an XML parser context 3013 * 3014 * parse an XML Processing Instruction. 3015 * 3016 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3017 * 3018 * The processing is transfered to SAX once parsed. 3019 */ 3020 3021void 3022xmlParsePI(xmlParserCtxtPtr ctxt) { 3023 xmlChar *buf = NULL; 3024 int len = 0; 3025 int size = XML_PARSER_BUFFER_SIZE; 3026 int cur, l; 3027 xmlChar *target; 3028 xmlParserInputState state; 3029 int count = 0; 3030 3031 if ((RAW == '<') && (NXT(1) == '?')) { 3032 xmlParserInputPtr input = ctxt->input; 3033 state = ctxt->instate; 3034 ctxt->instate = XML_PARSER_PI; 3035 /* 3036 * this is a Processing Instruction. 3037 */ 3038 SKIP(2); 3039 SHRINK; 3040 3041 /* 3042 * Parse the target name and check for special support like 3043 * namespace. 3044 */ 3045 target = xmlParsePITarget(ctxt); 3046 if (target != NULL) { 3047 if ((RAW == '?') && (NXT(1) == '>')) { 3048 if (input != ctxt->input) { 3049 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3051 ctxt->sax->error(ctxt->userData, 3052 "PI declaration doesn't start and stop in the same entity\n"); 3053 ctxt->wellFormed = 0; 3054 ctxt->disableSAX = 1; 3055 } 3056 SKIP(2); 3057 3058 /* 3059 * SAX: PI detected. 3060 */ 3061 if ((ctxt->sax) && (!ctxt->disableSAX) && 3062 (ctxt->sax->processingInstruction != NULL)) 3063 ctxt->sax->processingInstruction(ctxt->userData, 3064 target, NULL); 3065 ctxt->instate = state; 3066 xmlFree(target); 3067 return; 3068 } 3069 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3070 if (buf == NULL) { 3071 xmlGenericError(xmlGenericErrorContext, 3072 "malloc of %d byte failed\n", size); 3073 ctxt->instate = state; 3074 return; 3075 } 3076 cur = CUR; 3077 if (!IS_BLANK(cur)) { 3078 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3080 ctxt->sax->error(ctxt->userData, 3081 "xmlParsePI: PI %s space expected\n", target); 3082 ctxt->wellFormed = 0; 3083 ctxt->disableSAX = 1; 3084 } 3085 SKIP_BLANKS; 3086 cur = CUR_CHAR(l); 3087 while (IS_CHAR(cur) && /* checked */ 3088 ((cur != '?') || (NXT(1) != '>'))) { 3089 if (len + 5 >= size) { 3090 size *= 2; 3091 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3092 if (buf == NULL) { 3093 xmlGenericError(xmlGenericErrorContext, 3094 "realloc of %d byte failed\n", size); 3095 ctxt->instate = state; 3096 return; 3097 } 3098 } 3099 count++; 3100 if (count > 50) { 3101 GROW; 3102 count = 0; 3103 } 3104 COPY_BUF(l,buf,len,cur); 3105 NEXTL(l); 3106 cur = CUR_CHAR(l); 3107 if (cur == 0) { 3108 SHRINK; 3109 GROW; 3110 cur = CUR_CHAR(l); 3111 } 3112 } 3113 buf[len] = 0; 3114 if (cur != '?') { 3115 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3117 ctxt->sax->error(ctxt->userData, 3118 "xmlParsePI: PI %s never end ...\n", target); 3119 ctxt->wellFormed = 0; 3120 ctxt->disableSAX = 1; 3121 } else { 3122 if (input != ctxt->input) { 3123 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3125 ctxt->sax->error(ctxt->userData, 3126 "PI declaration doesn't start and stop in the same entity\n"); 3127 ctxt->wellFormed = 0; 3128 ctxt->disableSAX = 1; 3129 } 3130 SKIP(2); 3131 3132#ifdef LIBXML_CATALOG_ENABLED 3133 if (((state == XML_PARSER_MISC) || 3134 (state == XML_PARSER_START)) && 3135 (xmlStrEqual(target, XML_CATALOG_PI))) { 3136 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3137 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3138 (allow == XML_CATA_ALLOW_ALL)) 3139 xmlParseCatalogPI(ctxt, buf); 3140 } 3141#endif 3142 3143 3144 /* 3145 * SAX: PI detected. 3146 */ 3147 if ((ctxt->sax) && (!ctxt->disableSAX) && 3148 (ctxt->sax->processingInstruction != NULL)) 3149 ctxt->sax->processingInstruction(ctxt->userData, 3150 target, buf); 3151 } 3152 xmlFree(buf); 3153 xmlFree(target); 3154 } else { 3155 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3157 ctxt->sax->error(ctxt->userData, 3158 "xmlParsePI : no target name\n"); 3159 ctxt->wellFormed = 0; 3160 ctxt->disableSAX = 1; 3161 } 3162 ctxt->instate = state; 3163 } 3164} 3165 3166/** 3167 * xmlParseNotationDecl: 3168 * @ctxt: an XML parser context 3169 * 3170 * parse a notation declaration 3171 * 3172 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3173 * 3174 * Hence there is actually 3 choices: 3175 * 'PUBLIC' S PubidLiteral 3176 * 'PUBLIC' S PubidLiteral S SystemLiteral 3177 * and 'SYSTEM' S SystemLiteral 3178 * 3179 * See the NOTE on xmlParseExternalID(). 3180 */ 3181 3182void 3183xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3184 xmlChar *name; 3185 xmlChar *Pubid; 3186 xmlChar *Systemid; 3187 3188 if ((RAW == '<') && (NXT(1) == '!') && 3189 (NXT(2) == 'N') && (NXT(3) == 'O') && 3190 (NXT(4) == 'T') && (NXT(5) == 'A') && 3191 (NXT(6) == 'T') && (NXT(7) == 'I') && 3192 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3193 xmlParserInputPtr input = ctxt->input; 3194 SHRINK; 3195 SKIP(10); 3196 if (!IS_BLANK(CUR)) { 3197 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3199 ctxt->sax->error(ctxt->userData, 3200 "Space required after '<!NOTATION'\n"); 3201 ctxt->wellFormed = 0; 3202 ctxt->disableSAX = 1; 3203 return; 3204 } 3205 SKIP_BLANKS; 3206 3207 name = xmlParseName(ctxt); 3208 if (name == NULL) { 3209 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3211 ctxt->sax->error(ctxt->userData, 3212 "NOTATION: Name expected here\n"); 3213 ctxt->wellFormed = 0; 3214 ctxt->disableSAX = 1; 3215 return; 3216 } 3217 if (!IS_BLANK(CUR)) { 3218 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3220 ctxt->sax->error(ctxt->userData, 3221 "Space required after the NOTATION name'\n"); 3222 ctxt->wellFormed = 0; 3223 ctxt->disableSAX = 1; 3224 return; 3225 } 3226 SKIP_BLANKS; 3227 3228 /* 3229 * Parse the IDs. 3230 */ 3231 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3232 SKIP_BLANKS; 3233 3234 if (RAW == '>') { 3235 if (input != ctxt->input) { 3236 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3238 ctxt->sax->error(ctxt->userData, 3239"Notation declaration doesn't start and stop in the same entity\n"); 3240 ctxt->wellFormed = 0; 3241 ctxt->disableSAX = 1; 3242 } 3243 NEXT; 3244 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3245 (ctxt->sax->notationDecl != NULL)) 3246 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3247 } else { 3248 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3250 ctxt->sax->error(ctxt->userData, 3251 "'>' required to close NOTATION declaration\n"); 3252 ctxt->wellFormed = 0; 3253 ctxt->disableSAX = 1; 3254 } 3255 xmlFree(name); 3256 if (Systemid != NULL) xmlFree(Systemid); 3257 if (Pubid != NULL) xmlFree(Pubid); 3258 } 3259} 3260 3261/** 3262 * xmlParseEntityDecl: 3263 * @ctxt: an XML parser context 3264 * 3265 * parse <!ENTITY declarations 3266 * 3267 * [70] EntityDecl ::= GEDecl | PEDecl 3268 * 3269 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3270 * 3271 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3272 * 3273 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3274 * 3275 * [74] PEDef ::= EntityValue | ExternalID 3276 * 3277 * [76] NDataDecl ::= S 'NDATA' S Name 3278 * 3279 * [ VC: Notation Declared ] 3280 * The Name must match the declared name of a notation. 3281 */ 3282 3283void 3284xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3285 xmlChar *name = NULL; 3286 xmlChar *value = NULL; 3287 xmlChar *URI = NULL, *literal = NULL; 3288 xmlChar *ndata = NULL; 3289 int isParameter = 0; 3290 xmlChar *orig = NULL; 3291 3292 GROW; 3293 if ((RAW == '<') && (NXT(1) == '!') && 3294 (NXT(2) == 'E') && (NXT(3) == 'N') && 3295 (NXT(4) == 'T') && (NXT(5) == 'I') && 3296 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3297 xmlParserInputPtr input = ctxt->input; 3298 ctxt->instate = XML_PARSER_ENTITY_DECL; 3299 SHRINK; 3300 SKIP(8); 3301 if (!IS_BLANK(CUR)) { 3302 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3304 ctxt->sax->error(ctxt->userData, 3305 "Space required after '<!ENTITY'\n"); 3306 ctxt->wellFormed = 0; 3307 ctxt->disableSAX = 1; 3308 } 3309 SKIP_BLANKS; 3310 3311 if (RAW == '%') { 3312 NEXT; 3313 if (!IS_BLANK(CUR)) { 3314 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3316 ctxt->sax->error(ctxt->userData, 3317 "Space required after '%'\n"); 3318 ctxt->wellFormed = 0; 3319 ctxt->disableSAX = 1; 3320 } 3321 SKIP_BLANKS; 3322 isParameter = 1; 3323 } 3324 3325 name = xmlParseName(ctxt); 3326 if (name == NULL) { 3327 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3329 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3330 ctxt->wellFormed = 0; 3331 ctxt->disableSAX = 1; 3332 return; 3333 } 3334 if (!IS_BLANK(CUR)) { 3335 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3337 ctxt->sax->error(ctxt->userData, 3338 "Space required after the entity name\n"); 3339 ctxt->wellFormed = 0; 3340 ctxt->disableSAX = 1; 3341 } 3342 SKIP_BLANKS; 3343 3344 /* 3345 * handle the various case of definitions... 3346 */ 3347 if (isParameter) { 3348 if ((RAW == '"') || (RAW == '\'')) { 3349 value = xmlParseEntityValue(ctxt, &orig); 3350 if (value) { 3351 if ((ctxt->sax != NULL) && 3352 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3353 ctxt->sax->entityDecl(ctxt->userData, name, 3354 XML_INTERNAL_PARAMETER_ENTITY, 3355 NULL, NULL, value); 3356 } 3357 } else { 3358 URI = xmlParseExternalID(ctxt, &literal, 1); 3359 if ((URI == NULL) && (literal == NULL)) { 3360 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3362 ctxt->sax->error(ctxt->userData, 3363 "Entity value required\n"); 3364 ctxt->wellFormed = 0; 3365 ctxt->disableSAX = 1; 3366 } 3367 if (URI) { 3368 xmlURIPtr uri; 3369 3370 uri = xmlParseURI((const char *) URI); 3371 if (uri == NULL) { 3372 ctxt->errNo = XML_ERR_INVALID_URI; 3373 if ((ctxt->sax != NULL) && 3374 (!ctxt->disableSAX) && 3375 (ctxt->sax->error != NULL)) 3376 ctxt->sax->error(ctxt->userData, 3377 "Invalid URI: %s\n", URI); 3378 ctxt->wellFormed = 0; 3379 } else { 3380 if (uri->fragment != NULL) { 3381 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3382 if ((ctxt->sax != NULL) && 3383 (!ctxt->disableSAX) && 3384 (ctxt->sax->error != NULL)) 3385 ctxt->sax->error(ctxt->userData, 3386 "Fragment not allowed: %s\n", URI); 3387 ctxt->wellFormed = 0; 3388 } else { 3389 if ((ctxt->sax != NULL) && 3390 (!ctxt->disableSAX) && 3391 (ctxt->sax->entityDecl != NULL)) 3392 ctxt->sax->entityDecl(ctxt->userData, name, 3393 XML_EXTERNAL_PARAMETER_ENTITY, 3394 literal, URI, NULL); 3395 } 3396 xmlFreeURI(uri); 3397 } 3398 } 3399 } 3400 } else { 3401 if ((RAW == '"') || (RAW == '\'')) { 3402 value = xmlParseEntityValue(ctxt, &orig); 3403 if ((ctxt->sax != NULL) && 3404 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3405 ctxt->sax->entityDecl(ctxt->userData, name, 3406 XML_INTERNAL_GENERAL_ENTITY, 3407 NULL, NULL, value); 3408 } else { 3409 URI = xmlParseExternalID(ctxt, &literal, 1); 3410 if ((URI == NULL) && (literal == NULL)) { 3411 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3413 ctxt->sax->error(ctxt->userData, 3414 "Entity value required\n"); 3415 ctxt->wellFormed = 0; 3416 ctxt->disableSAX = 1; 3417 } 3418 if (URI) { 3419 xmlURIPtr uri; 3420 3421 uri = xmlParseURI((const char *)URI); 3422 if (uri == NULL) { 3423 ctxt->errNo = XML_ERR_INVALID_URI; 3424 if ((ctxt->sax != NULL) && 3425 (!ctxt->disableSAX) && 3426 (ctxt->sax->error != NULL)) 3427 ctxt->sax->error(ctxt->userData, 3428 "Invalid URI: %s\n", URI); 3429 ctxt->wellFormed = 0; 3430 } else { 3431 if (uri->fragment != NULL) { 3432 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3433 if ((ctxt->sax != NULL) && 3434 (!ctxt->disableSAX) && 3435 (ctxt->sax->error != NULL)) 3436 ctxt->sax->error(ctxt->userData, 3437 "Fragment not allowed: %s\n", URI); 3438 ctxt->wellFormed = 0; 3439 } 3440 xmlFreeURI(uri); 3441 } 3442 } 3443 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3444 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3446 ctxt->sax->error(ctxt->userData, 3447 "Space required before 'NDATA'\n"); 3448 ctxt->wellFormed = 0; 3449 ctxt->disableSAX = 1; 3450 } 3451 SKIP_BLANKS; 3452 if ((RAW == 'N') && (NXT(1) == 'D') && 3453 (NXT(2) == 'A') && (NXT(3) == 'T') && 3454 (NXT(4) == 'A')) { 3455 SKIP(5); 3456 if (!IS_BLANK(CUR)) { 3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3459 ctxt->sax->error(ctxt->userData, 3460 "Space required after 'NDATA'\n"); 3461 ctxt->wellFormed = 0; 3462 ctxt->disableSAX = 1; 3463 } 3464 SKIP_BLANKS; 3465 ndata = xmlParseName(ctxt); 3466 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3467 (ctxt->sax->unparsedEntityDecl != NULL)) 3468 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3469 literal, URI, ndata); 3470 } else { 3471 if ((ctxt->sax != NULL) && 3472 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3473 ctxt->sax->entityDecl(ctxt->userData, name, 3474 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3475 literal, URI, NULL); 3476 } 3477 } 3478 } 3479 SKIP_BLANKS; 3480 if (RAW != '>') { 3481 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3483 ctxt->sax->error(ctxt->userData, 3484 "xmlParseEntityDecl: entity %s not terminated\n", name); 3485 ctxt->wellFormed = 0; 3486 ctxt->disableSAX = 1; 3487 } else { 3488 if (input != ctxt->input) { 3489 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3491 ctxt->sax->error(ctxt->userData, 3492"Entity declaration doesn't start and stop in the same entity\n"); 3493 ctxt->wellFormed = 0; 3494 ctxt->disableSAX = 1; 3495 } 3496 NEXT; 3497 } 3498 if (orig != NULL) { 3499 /* 3500 * Ugly mechanism to save the raw entity value. 3501 */ 3502 xmlEntityPtr cur = NULL; 3503 3504 if (isParameter) { 3505 if ((ctxt->sax != NULL) && 3506 (ctxt->sax->getParameterEntity != NULL)) 3507 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3508 } else { 3509 if ((ctxt->sax != NULL) && 3510 (ctxt->sax->getEntity != NULL)) 3511 cur = ctxt->sax->getEntity(ctxt->userData, name); 3512 } 3513 if (cur != NULL) { 3514 if (cur->orig != NULL) 3515 xmlFree(orig); 3516 else 3517 cur->orig = orig; 3518 } else 3519 xmlFree(orig); 3520 } 3521 if (name != NULL) xmlFree(name); 3522 if (value != NULL) xmlFree(value); 3523 if (URI != NULL) xmlFree(URI); 3524 if (literal != NULL) xmlFree(literal); 3525 if (ndata != NULL) xmlFree(ndata); 3526 } 3527} 3528 3529/** 3530 * xmlParseDefaultDecl: 3531 * @ctxt: an XML parser context 3532 * @value: Receive a possible fixed default value for the attribute 3533 * 3534 * Parse an attribute default declaration 3535 * 3536 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3537 * 3538 * [ VC: Required Attribute ] 3539 * if the default declaration is the keyword #REQUIRED, then the 3540 * attribute must be specified for all elements of the type in the 3541 * attribute-list declaration. 3542 * 3543 * [ VC: Attribute Default Legal ] 3544 * The declared default value must meet the lexical constraints of 3545 * the declared attribute type c.f. xmlValidateAttributeDecl() 3546 * 3547 * [ VC: Fixed Attribute Default ] 3548 * if an attribute has a default value declared with the #FIXED 3549 * keyword, instances of that attribute must match the default value. 3550 * 3551 * [ WFC: No < in Attribute Values ] 3552 * handled in xmlParseAttValue() 3553 * 3554 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3555 * or XML_ATTRIBUTE_FIXED. 3556 */ 3557 3558int 3559xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3560 int val; 3561 xmlChar *ret; 3562 3563 *value = NULL; 3564 if ((RAW == '#') && (NXT(1) == 'R') && 3565 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3566 (NXT(4) == 'U') && (NXT(5) == 'I') && 3567 (NXT(6) == 'R') && (NXT(7) == 'E') && 3568 (NXT(8) == 'D')) { 3569 SKIP(9); 3570 return(XML_ATTRIBUTE_REQUIRED); 3571 } 3572 if ((RAW == '#') && (NXT(1) == 'I') && 3573 (NXT(2) == 'M') && (NXT(3) == 'P') && 3574 (NXT(4) == 'L') && (NXT(5) == 'I') && 3575 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3576 SKIP(8); 3577 return(XML_ATTRIBUTE_IMPLIED); 3578 } 3579 val = XML_ATTRIBUTE_NONE; 3580 if ((RAW == '#') && (NXT(1) == 'F') && 3581 (NXT(2) == 'I') && (NXT(3) == 'X') && 3582 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3583 SKIP(6); 3584 val = XML_ATTRIBUTE_FIXED; 3585 if (!IS_BLANK(CUR)) { 3586 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3588 ctxt->sax->error(ctxt->userData, 3589 "Space required after '#FIXED'\n"); 3590 ctxt->wellFormed = 0; 3591 ctxt->disableSAX = 1; 3592 } 3593 SKIP_BLANKS; 3594 } 3595 ret = xmlParseAttValue(ctxt); 3596 ctxt->instate = XML_PARSER_DTD; 3597 if (ret == NULL) { 3598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3599 ctxt->sax->error(ctxt->userData, 3600 "Attribute default value declaration error\n"); 3601 ctxt->wellFormed = 0; 3602 ctxt->disableSAX = 1; 3603 } else 3604 *value = ret; 3605 return(val); 3606} 3607 3608/** 3609 * xmlParseNotationType: 3610 * @ctxt: an XML parser context 3611 * 3612 * parse an Notation attribute type. 3613 * 3614 * Note: the leading 'NOTATION' S part has already being parsed... 3615 * 3616 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3617 * 3618 * [ VC: Notation Attributes ] 3619 * Values of this type must match one of the notation names included 3620 * in the declaration; all notation names in the declaration must be declared. 3621 * 3622 * Returns: the notation attribute tree built while parsing 3623 */ 3624 3625xmlEnumerationPtr 3626xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3627 xmlChar *name; 3628 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3629 3630 if (RAW != '(') { 3631 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3633 ctxt->sax->error(ctxt->userData, 3634 "'(' required to start 'NOTATION'\n"); 3635 ctxt->wellFormed = 0; 3636 ctxt->disableSAX = 1; 3637 return(NULL); 3638 } 3639 SHRINK; 3640 do { 3641 NEXT; 3642 SKIP_BLANKS; 3643 name = xmlParseName(ctxt); 3644 if (name == NULL) { 3645 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3647 ctxt->sax->error(ctxt->userData, 3648 "Name expected in NOTATION declaration\n"); 3649 ctxt->wellFormed = 0; 3650 ctxt->disableSAX = 1; 3651 return(ret); 3652 } 3653 cur = xmlCreateEnumeration(name); 3654 xmlFree(name); 3655 if (cur == NULL) return(ret); 3656 if (last == NULL) ret = last = cur; 3657 else { 3658 last->next = cur; 3659 last = cur; 3660 } 3661 SKIP_BLANKS; 3662 } while (RAW == '|'); 3663 if (RAW != ')') { 3664 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3666 ctxt->sax->error(ctxt->userData, 3667 "')' required to finish NOTATION declaration\n"); 3668 ctxt->wellFormed = 0; 3669 ctxt->disableSAX = 1; 3670 if ((last != NULL) && (last != ret)) 3671 xmlFreeEnumeration(last); 3672 return(ret); 3673 } 3674 NEXT; 3675 return(ret); 3676} 3677 3678/** 3679 * xmlParseEnumerationType: 3680 * @ctxt: an XML parser context 3681 * 3682 * parse an Enumeration attribute type. 3683 * 3684 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3685 * 3686 * [ VC: Enumeration ] 3687 * Values of this type must match one of the Nmtoken tokens in 3688 * the declaration 3689 * 3690 * Returns: the enumeration attribute tree built while parsing 3691 */ 3692 3693xmlEnumerationPtr 3694xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3695 xmlChar *name; 3696 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3697 3698 if (RAW != '(') { 3699 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3701 ctxt->sax->error(ctxt->userData, 3702 "'(' required to start ATTLIST enumeration\n"); 3703 ctxt->wellFormed = 0; 3704 ctxt->disableSAX = 1; 3705 return(NULL); 3706 } 3707 SHRINK; 3708 do { 3709 NEXT; 3710 SKIP_BLANKS; 3711 name = xmlParseNmtoken(ctxt); 3712 if (name == NULL) { 3713 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3715 ctxt->sax->error(ctxt->userData, 3716 "NmToken expected in ATTLIST enumeration\n"); 3717 ctxt->wellFormed = 0; 3718 ctxt->disableSAX = 1; 3719 return(ret); 3720 } 3721 cur = xmlCreateEnumeration(name); 3722 xmlFree(name); 3723 if (cur == NULL) return(ret); 3724 if (last == NULL) ret = last = cur; 3725 else { 3726 last->next = cur; 3727 last = cur; 3728 } 3729 SKIP_BLANKS; 3730 } while (RAW == '|'); 3731 if (RAW != ')') { 3732 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3734 ctxt->sax->error(ctxt->userData, 3735 "')' required to finish ATTLIST enumeration\n"); 3736 ctxt->wellFormed = 0; 3737 ctxt->disableSAX = 1; 3738 return(ret); 3739 } 3740 NEXT; 3741 return(ret); 3742} 3743 3744/** 3745 * xmlParseEnumeratedType: 3746 * @ctxt: an XML parser context 3747 * @tree: the enumeration tree built while parsing 3748 * 3749 * parse an Enumerated attribute type. 3750 * 3751 * [57] EnumeratedType ::= NotationType | Enumeration 3752 * 3753 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3754 * 3755 * 3756 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3757 */ 3758 3759int 3760xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3761 if ((RAW == 'N') && (NXT(1) == 'O') && 3762 (NXT(2) == 'T') && (NXT(3) == 'A') && 3763 (NXT(4) == 'T') && (NXT(5) == 'I') && 3764 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3765 SKIP(8); 3766 if (!IS_BLANK(CUR)) { 3767 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3769 ctxt->sax->error(ctxt->userData, 3770 "Space required after 'NOTATION'\n"); 3771 ctxt->wellFormed = 0; 3772 ctxt->disableSAX = 1; 3773 return(0); 3774 } 3775 SKIP_BLANKS; 3776 *tree = xmlParseNotationType(ctxt); 3777 if (*tree == NULL) return(0); 3778 return(XML_ATTRIBUTE_NOTATION); 3779 } 3780 *tree = xmlParseEnumerationType(ctxt); 3781 if (*tree == NULL) return(0); 3782 return(XML_ATTRIBUTE_ENUMERATION); 3783} 3784 3785/** 3786 * xmlParseAttributeType: 3787 * @ctxt: an XML parser context 3788 * @tree: the enumeration tree built while parsing 3789 * 3790 * parse the Attribute list def for an element 3791 * 3792 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3793 * 3794 * [55] StringType ::= 'CDATA' 3795 * 3796 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3797 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3798 * 3799 * Validity constraints for attribute values syntax are checked in 3800 * xmlValidateAttributeValue() 3801 * 3802 * [ VC: ID ] 3803 * Values of type ID must match the Name production. A name must not 3804 * appear more than once in an XML document as a value of this type; 3805 * i.e., ID values must uniquely identify the elements which bear them. 3806 * 3807 * [ VC: One ID per Element Type ] 3808 * No element type may have more than one ID attribute specified. 3809 * 3810 * [ VC: ID Attribute Default ] 3811 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3812 * 3813 * [ VC: IDREF ] 3814 * Values of type IDREF must match the Name production, and values 3815 * of type IDREFS must match Names; each IDREF Name must match the value 3816 * of an ID attribute on some element in the XML document; i.e. IDREF 3817 * values must match the value of some ID attribute. 3818 * 3819 * [ VC: Entity Name ] 3820 * Values of type ENTITY must match the Name production, values 3821 * of type ENTITIES must match Names; each Entity Name must match the 3822 * name of an unparsed entity declared in the DTD. 3823 * 3824 * [ VC: Name Token ] 3825 * Values of type NMTOKEN must match the Nmtoken production; values 3826 * of type NMTOKENS must match Nmtokens. 3827 * 3828 * Returns the attribute type 3829 */ 3830int 3831xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3832 SHRINK; 3833 if ((RAW == 'C') && (NXT(1) == 'D') && 3834 (NXT(2) == 'A') && (NXT(3) == 'T') && 3835 (NXT(4) == 'A')) { 3836 SKIP(5); 3837 return(XML_ATTRIBUTE_CDATA); 3838 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3839 (NXT(2) == 'R') && (NXT(3) == 'E') && 3840 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3841 SKIP(6); 3842 return(XML_ATTRIBUTE_IDREFS); 3843 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3844 (NXT(2) == 'R') && (NXT(3) == 'E') && 3845 (NXT(4) == 'F')) { 3846 SKIP(5); 3847 return(XML_ATTRIBUTE_IDREF); 3848 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3849 SKIP(2); 3850 return(XML_ATTRIBUTE_ID); 3851 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3852 (NXT(2) == 'T') && (NXT(3) == 'I') && 3853 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3854 SKIP(6); 3855 return(XML_ATTRIBUTE_ENTITY); 3856 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3857 (NXT(2) == 'T') && (NXT(3) == 'I') && 3858 (NXT(4) == 'T') && (NXT(5) == 'I') && 3859 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3860 SKIP(8); 3861 return(XML_ATTRIBUTE_ENTITIES); 3862 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3863 (NXT(2) == 'T') && (NXT(3) == 'O') && 3864 (NXT(4) == 'K') && (NXT(5) == 'E') && 3865 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3866 SKIP(8); 3867 return(XML_ATTRIBUTE_NMTOKENS); 3868 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3869 (NXT(2) == 'T') && (NXT(3) == 'O') && 3870 (NXT(4) == 'K') && (NXT(5) == 'E') && 3871 (NXT(6) == 'N')) { 3872 SKIP(7); 3873 return(XML_ATTRIBUTE_NMTOKEN); 3874 } 3875 return(xmlParseEnumeratedType(ctxt, tree)); 3876} 3877 3878/** 3879 * xmlParseAttributeListDecl: 3880 * @ctxt: an XML parser context 3881 * 3882 * : parse the Attribute list def for an element 3883 * 3884 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3885 * 3886 * [53] AttDef ::= S Name S AttType S DefaultDecl 3887 * 3888 */ 3889void 3890xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3891 xmlChar *elemName; 3892 xmlChar *attrName; 3893 xmlEnumerationPtr tree; 3894 3895 if ((RAW == '<') && (NXT(1) == '!') && 3896 (NXT(2) == 'A') && (NXT(3) == 'T') && 3897 (NXT(4) == 'T') && (NXT(5) == 'L') && 3898 (NXT(6) == 'I') && (NXT(7) == 'S') && 3899 (NXT(8) == 'T')) { 3900 xmlParserInputPtr input = ctxt->input; 3901 3902 SKIP(9); 3903 if (!IS_BLANK(CUR)) { 3904 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3906 ctxt->sax->error(ctxt->userData, 3907 "Space required after '<!ATTLIST'\n"); 3908 ctxt->wellFormed = 0; 3909 ctxt->disableSAX = 1; 3910 } 3911 SKIP_BLANKS; 3912 elemName = xmlParseName(ctxt); 3913 if (elemName == NULL) { 3914 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3916 ctxt->sax->error(ctxt->userData, 3917 "ATTLIST: no name for Element\n"); 3918 ctxt->wellFormed = 0; 3919 ctxt->disableSAX = 1; 3920 return; 3921 } 3922 SKIP_BLANKS; 3923 GROW; 3924 while (RAW != '>') { 3925 const xmlChar *check = CUR_PTR; 3926 int type; 3927 int def; 3928 xmlChar *defaultValue = NULL; 3929 3930 GROW; 3931 tree = NULL; 3932 attrName = xmlParseName(ctxt); 3933 if (attrName == NULL) { 3934 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3936 ctxt->sax->error(ctxt->userData, 3937 "ATTLIST: no name for Attribute\n"); 3938 ctxt->wellFormed = 0; 3939 ctxt->disableSAX = 1; 3940 break; 3941 } 3942 GROW; 3943 if (!IS_BLANK(CUR)) { 3944 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3946 ctxt->sax->error(ctxt->userData, 3947 "Space required after the attribute name\n"); 3948 ctxt->wellFormed = 0; 3949 ctxt->disableSAX = 1; 3950 if (attrName != NULL) 3951 xmlFree(attrName); 3952 if (defaultValue != NULL) 3953 xmlFree(defaultValue); 3954 break; 3955 } 3956 SKIP_BLANKS; 3957 3958 type = xmlParseAttributeType(ctxt, &tree); 3959 if (type <= 0) { 3960 if (attrName != NULL) 3961 xmlFree(attrName); 3962 if (defaultValue != NULL) 3963 xmlFree(defaultValue); 3964 break; 3965 } 3966 3967 GROW; 3968 if (!IS_BLANK(CUR)) { 3969 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3971 ctxt->sax->error(ctxt->userData, 3972 "Space required after the attribute type\n"); 3973 ctxt->wellFormed = 0; 3974 ctxt->disableSAX = 1; 3975 if (attrName != NULL) 3976 xmlFree(attrName); 3977 if (defaultValue != NULL) 3978 xmlFree(defaultValue); 3979 if (tree != NULL) 3980 xmlFreeEnumeration(tree); 3981 break; 3982 } 3983 SKIP_BLANKS; 3984 3985 def = xmlParseDefaultDecl(ctxt, &defaultValue); 3986 if (def <= 0) { 3987 if (attrName != NULL) 3988 xmlFree(attrName); 3989 if (defaultValue != NULL) 3990 xmlFree(defaultValue); 3991 if (tree != NULL) 3992 xmlFreeEnumeration(tree); 3993 break; 3994 } 3995 3996 GROW; 3997 if (RAW != '>') { 3998 if (!IS_BLANK(CUR)) { 3999 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4001 ctxt->sax->error(ctxt->userData, 4002 "Space required after the attribute default value\n"); 4003 ctxt->wellFormed = 0; 4004 ctxt->disableSAX = 1; 4005 if (attrName != NULL) 4006 xmlFree(attrName); 4007 if (defaultValue != NULL) 4008 xmlFree(defaultValue); 4009 if (tree != NULL) 4010 xmlFreeEnumeration(tree); 4011 break; 4012 } 4013 SKIP_BLANKS; 4014 } 4015 if (check == CUR_PTR) { 4016 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4018 ctxt->sax->error(ctxt->userData, 4019 "xmlParseAttributeListDecl: detected internal error\n"); 4020 if (attrName != NULL) 4021 xmlFree(attrName); 4022 if (defaultValue != NULL) 4023 xmlFree(defaultValue); 4024 if (tree != NULL) 4025 xmlFreeEnumeration(tree); 4026 break; 4027 } 4028 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4029 (ctxt->sax->attributeDecl != NULL)) 4030 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4031 type, def, defaultValue, tree); 4032 if (attrName != NULL) 4033 xmlFree(attrName); 4034 if (defaultValue != NULL) 4035 xmlFree(defaultValue); 4036 GROW; 4037 } 4038 if (RAW == '>') { 4039 if (input != ctxt->input) { 4040 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4042 ctxt->sax->error(ctxt->userData, 4043"Attribute list declaration doesn't start and stop in the same entity\n"); 4044 ctxt->wellFormed = 0; 4045 ctxt->disableSAX = 1; 4046 } 4047 NEXT; 4048 } 4049 4050 xmlFree(elemName); 4051 } 4052} 4053 4054/** 4055 * xmlParseElementMixedContentDecl: 4056 * @ctxt: an XML parser context 4057 * 4058 * parse the declaration for a Mixed Element content 4059 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4060 * 4061 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4062 * '(' S? '#PCDATA' S? ')' 4063 * 4064 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4065 * 4066 * [ VC: No Duplicate Types ] 4067 * The same name must not appear more than once in a single 4068 * mixed-content declaration. 4069 * 4070 * returns: the list of the xmlElementContentPtr describing the element choices 4071 */ 4072xmlElementContentPtr 4073xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 4074 xmlElementContentPtr ret = NULL, cur = NULL, n; 4075 xmlChar *elem = NULL; 4076 4077 GROW; 4078 if ((RAW == '#') && (NXT(1) == 'P') && 4079 (NXT(2) == 'C') && (NXT(3) == 'D') && 4080 (NXT(4) == 'A') && (NXT(5) == 'T') && 4081 (NXT(6) == 'A')) { 4082 SKIP(7); 4083 SKIP_BLANKS; 4084 SHRINK; 4085 if (RAW == ')') { 4086 ctxt->entity = ctxt->input; 4087 NEXT; 4088 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4089 if (RAW == '*') { 4090 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4091 NEXT; 4092 } 4093 return(ret); 4094 } 4095 if ((RAW == '(') || (RAW == '|')) { 4096 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4097 if (ret == NULL) return(NULL); 4098 } 4099 while (RAW == '|') { 4100 NEXT; 4101 if (elem == NULL) { 4102 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4103 if (ret == NULL) return(NULL); 4104 ret->c1 = cur; 4105 if (cur != NULL) 4106 cur->parent = ret; 4107 cur = ret; 4108 } else { 4109 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4110 if (n == NULL) return(NULL); 4111 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4112 if (n->c1 != NULL) 4113 n->c1->parent = n; 4114 cur->c2 = n; 4115 if (n != NULL) 4116 n->parent = cur; 4117 cur = n; 4118 xmlFree(elem); 4119 } 4120 SKIP_BLANKS; 4121 elem = xmlParseName(ctxt); 4122 if (elem == NULL) { 4123 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4125 ctxt->sax->error(ctxt->userData, 4126 "xmlParseElementMixedContentDecl : Name expected\n"); 4127 ctxt->wellFormed = 0; 4128 ctxt->disableSAX = 1; 4129 xmlFreeElementContent(cur); 4130 return(NULL); 4131 } 4132 SKIP_BLANKS; 4133 GROW; 4134 } 4135 if ((RAW == ')') && (NXT(1) == '*')) { 4136 if (elem != NULL) { 4137 cur->c2 = xmlNewElementContent(elem, 4138 XML_ELEMENT_CONTENT_ELEMENT); 4139 if (cur->c2 != NULL) 4140 cur->c2->parent = cur; 4141 xmlFree(elem); 4142 } 4143 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4144 ctxt->entity = ctxt->input; 4145 SKIP(2); 4146 } else { 4147 if (elem != NULL) xmlFree(elem); 4148 xmlFreeElementContent(ret); 4149 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4151 ctxt->sax->error(ctxt->userData, 4152 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4153 ctxt->wellFormed = 0; 4154 ctxt->disableSAX = 1; 4155 return(NULL); 4156 } 4157 4158 } else { 4159 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4161 ctxt->sax->error(ctxt->userData, 4162 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4163 ctxt->wellFormed = 0; 4164 ctxt->disableSAX = 1; 4165 } 4166 return(ret); 4167} 4168 4169/** 4170 * xmlParseElementChildrenContentD: 4171 * @ctxt: an XML parser context 4172 * 4173 * VMS version of xmlParseElementChildrenContentDecl() 4174 * 4175 * Returns the tree of xmlElementContentPtr describing the element 4176 * hierarchy. 4177 */ 4178/** 4179 * xmlParseElementChildrenContentDecl: 4180 * @ctxt: an XML parser context 4181 * 4182 * parse the declaration for a Mixed Element content 4183 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4184 * 4185 * 4186 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4187 * 4188 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4189 * 4190 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4191 * 4192 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4193 * 4194 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4195 * TODO Parameter-entity replacement text must be properly nested 4196 * with parenthetized groups. That is to say, if either of the 4197 * opening or closing parentheses in a choice, seq, or Mixed 4198 * construct is contained in the replacement text for a parameter 4199 * entity, both must be contained in the same replacement text. For 4200 * interoperability, if a parameter-entity reference appears in a 4201 * choice, seq, or Mixed construct, its replacement text should not 4202 * be empty, and neither the first nor last non-blank character of 4203 * the replacement text should be a connector (| or ,). 4204 * 4205 * Returns the tree of xmlElementContentPtr describing the element 4206 * hierarchy. 4207 */ 4208xmlElementContentPtr 4209#ifdef VMS 4210xmlParseElementChildrenContentD 4211#else 4212xmlParseElementChildrenContentDecl 4213#endif 4214(xmlParserCtxtPtr ctxt) { 4215 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4216 xmlChar *elem; 4217 xmlChar type = 0; 4218 4219 SKIP_BLANKS; 4220 GROW; 4221 if (RAW == '(') { 4222 /* Recurse on first child */ 4223 NEXT; 4224 SKIP_BLANKS; 4225 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4226 SKIP_BLANKS; 4227 GROW; 4228 } else { 4229 elem = xmlParseName(ctxt); 4230 if (elem == NULL) { 4231 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4233 ctxt->sax->error(ctxt->userData, 4234 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4235 ctxt->wellFormed = 0; 4236 ctxt->disableSAX = 1; 4237 return(NULL); 4238 } 4239 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4240 GROW; 4241 if (RAW == '?') { 4242 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4243 NEXT; 4244 } else if (RAW == '*') { 4245 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4246 NEXT; 4247 } else if (RAW == '+') { 4248 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4249 NEXT; 4250 } else { 4251 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4252 } 4253 xmlFree(elem); 4254 GROW; 4255 } 4256 SKIP_BLANKS; 4257 SHRINK; 4258 while (RAW != ')') { 4259 /* 4260 * Each loop we parse one separator and one element. 4261 */ 4262 if (RAW == ',') { 4263 if (type == 0) type = CUR; 4264 4265 /* 4266 * Detect "Name | Name , Name" error 4267 */ 4268 else if (type != CUR) { 4269 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4271 ctxt->sax->error(ctxt->userData, 4272 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4273 type); 4274 ctxt->wellFormed = 0; 4275 ctxt->disableSAX = 1; 4276 if ((op != NULL) && (op != ret)) 4277 xmlFreeElementContent(op); 4278 if ((last != NULL) && (last != ret) && 4279 (last != ret->c1) && (last != ret->c2)) 4280 xmlFreeElementContent(last); 4281 if (ret != NULL) 4282 xmlFreeElementContent(ret); 4283 return(NULL); 4284 } 4285 NEXT; 4286 4287 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4288 if (op == NULL) { 4289 xmlFreeElementContent(ret); 4290 return(NULL); 4291 } 4292 if (last == NULL) { 4293 op->c1 = ret; 4294 if (ret != NULL) 4295 ret->parent = op; 4296 ret = cur = op; 4297 } else { 4298 cur->c2 = op; 4299 if (op != NULL) 4300 op->parent = cur; 4301 op->c1 = last; 4302 if (last != NULL) 4303 last->parent = op; 4304 cur =op; 4305 last = NULL; 4306 } 4307 } else if (RAW == '|') { 4308 if (type == 0) type = CUR; 4309 4310 /* 4311 * Detect "Name , Name | Name" error 4312 */ 4313 else if (type != CUR) { 4314 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4316 ctxt->sax->error(ctxt->userData, 4317 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4318 type); 4319 ctxt->wellFormed = 0; 4320 ctxt->disableSAX = 1; 4321 if ((op != NULL) && (op != ret) && (op != last)) 4322 xmlFreeElementContent(op); 4323 if ((last != NULL) && (last != ret) && 4324 (last != ret->c1) && (last != ret->c2)) 4325 xmlFreeElementContent(last); 4326 if (ret != NULL) 4327 xmlFreeElementContent(ret); 4328 return(NULL); 4329 } 4330 NEXT; 4331 4332 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4333 if (op == NULL) { 4334 if ((op != NULL) && (op != ret)) 4335 xmlFreeElementContent(op); 4336 if ((last != NULL) && (last != ret) && 4337 (last != ret->c1) && (last != ret->c2)) 4338 xmlFreeElementContent(last); 4339 if (ret != NULL) 4340 xmlFreeElementContent(ret); 4341 return(NULL); 4342 } 4343 if (last == NULL) { 4344 op->c1 = ret; 4345 if (ret != NULL) 4346 ret->parent = op; 4347 ret = cur = op; 4348 } else { 4349 cur->c2 = op; 4350 if (op != NULL) 4351 op->parent = cur; 4352 op->c1 = last; 4353 if (last != NULL) 4354 last->parent = op; 4355 cur =op; 4356 last = NULL; 4357 } 4358 } else { 4359 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4361 ctxt->sax->error(ctxt->userData, 4362 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4363 ctxt->wellFormed = 0; 4364 ctxt->disableSAX = 1; 4365 if ((op != NULL) && (op != ret)) 4366 xmlFreeElementContent(op); 4367 if ((last != NULL) && (last != ret) && 4368 (last != ret->c1) && (last != ret->c2)) 4369 xmlFreeElementContent(last); 4370 if (ret != NULL) 4371 xmlFreeElementContent(ret); 4372 return(NULL); 4373 } 4374 GROW; 4375 SKIP_BLANKS; 4376 GROW; 4377 if (RAW == '(') { 4378 /* Recurse on second child */ 4379 NEXT; 4380 SKIP_BLANKS; 4381 last = xmlParseElementChildrenContentDecl(ctxt); 4382 SKIP_BLANKS; 4383 } else { 4384 elem = xmlParseName(ctxt); 4385 if (elem == NULL) { 4386 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4388 ctxt->sax->error(ctxt->userData, 4389 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4390 ctxt->wellFormed = 0; 4391 ctxt->disableSAX = 1; 4392 if ((op != NULL) && (op != ret)) 4393 xmlFreeElementContent(op); 4394 if ((last != NULL) && (last != ret) && 4395 (last != ret->c1) && (last != ret->c2)) 4396 xmlFreeElementContent(last); 4397 if (ret != NULL) 4398 xmlFreeElementContent(ret); 4399 return(NULL); 4400 } 4401 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4402 xmlFree(elem); 4403 if (RAW == '?') { 4404 last->ocur = XML_ELEMENT_CONTENT_OPT; 4405 NEXT; 4406 } else if (RAW == '*') { 4407 last->ocur = XML_ELEMENT_CONTENT_MULT; 4408 NEXT; 4409 } else if (RAW == '+') { 4410 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4411 NEXT; 4412 } else { 4413 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4414 } 4415 } 4416 SKIP_BLANKS; 4417 GROW; 4418 } 4419 if ((cur != NULL) && (last != NULL)) { 4420 cur->c2 = last; 4421 if (last != NULL) 4422 last->parent = cur; 4423 } 4424 ctxt->entity = ctxt->input; 4425 NEXT; 4426 if (RAW == '?') { 4427 if (ret != NULL) 4428 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4429 NEXT; 4430 } else if (RAW == '*') { 4431 if (ret != NULL) 4432 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4433 NEXT; 4434 } else if (RAW == '+') { 4435 if (ret != NULL) 4436 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4437 NEXT; 4438 } 4439 return(ret); 4440} 4441 4442/** 4443 * xmlParseElementContentDecl: 4444 * @ctxt: an XML parser context 4445 * @name: the name of the element being defined. 4446 * @result: the Element Content pointer will be stored here if any 4447 * 4448 * parse the declaration for an Element content either Mixed or Children, 4449 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4450 * 4451 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4452 * 4453 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4454 */ 4455 4456int 4457xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4458 xmlElementContentPtr *result) { 4459 4460 xmlElementContentPtr tree = NULL; 4461 xmlParserInputPtr input = ctxt->input; 4462 int res; 4463 4464 *result = NULL; 4465 4466 if (RAW != '(') { 4467 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4469 ctxt->sax->error(ctxt->userData, 4470 "xmlParseElementContentDecl : %s '(' expected\n", name); 4471 ctxt->wellFormed = 0; 4472 ctxt->disableSAX = 1; 4473 return(-1); 4474 } 4475 NEXT; 4476 GROW; 4477 SKIP_BLANKS; 4478 if ((RAW == '#') && (NXT(1) == 'P') && 4479 (NXT(2) == 'C') && (NXT(3) == 'D') && 4480 (NXT(4) == 'A') && (NXT(5) == 'T') && 4481 (NXT(6) == 'A')) { 4482 tree = xmlParseElementMixedContentDecl(ctxt); 4483 res = XML_ELEMENT_TYPE_MIXED; 4484 } else { 4485 tree = xmlParseElementChildrenContentDecl(ctxt); 4486 res = XML_ELEMENT_TYPE_ELEMENT; 4487 } 4488 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4489 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4491 ctxt->sax->error(ctxt->userData, 4492"Element content declaration doesn't start and stop in the same entity\n"); 4493 ctxt->wellFormed = 0; 4494 ctxt->disableSAX = 1; 4495 } 4496 SKIP_BLANKS; 4497 *result = tree; 4498 return(res); 4499} 4500 4501/** 4502 * xmlParseElementDecl: 4503 * @ctxt: an XML parser context 4504 * 4505 * parse an Element declaration. 4506 * 4507 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4508 * 4509 * [ VC: Unique Element Type Declaration ] 4510 * No element type may be declared more than once 4511 * 4512 * Returns the type of the element, or -1 in case of error 4513 */ 4514int 4515xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4516 xmlChar *name; 4517 int ret = -1; 4518 xmlElementContentPtr content = NULL; 4519 4520 GROW; 4521 if ((RAW == '<') && (NXT(1) == '!') && 4522 (NXT(2) == 'E') && (NXT(3) == 'L') && 4523 (NXT(4) == 'E') && (NXT(5) == 'M') && 4524 (NXT(6) == 'E') && (NXT(7) == 'N') && 4525 (NXT(8) == 'T')) { 4526 xmlParserInputPtr input = ctxt->input; 4527 4528 SKIP(9); 4529 if (!IS_BLANK(CUR)) { 4530 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4532 ctxt->sax->error(ctxt->userData, 4533 "Space required after 'ELEMENT'\n"); 4534 ctxt->wellFormed = 0; 4535 ctxt->disableSAX = 1; 4536 } 4537 SKIP_BLANKS; 4538 name = xmlParseName(ctxt); 4539 if (name == NULL) { 4540 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4542 ctxt->sax->error(ctxt->userData, 4543 "xmlParseElementDecl: no name for Element\n"); 4544 ctxt->wellFormed = 0; 4545 ctxt->disableSAX = 1; 4546 return(-1); 4547 } 4548 while ((RAW == 0) && (ctxt->inputNr > 1)) 4549 xmlPopInput(ctxt); 4550 if (!IS_BLANK(CUR)) { 4551 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4553 ctxt->sax->error(ctxt->userData, 4554 "Space required after the element name\n"); 4555 ctxt->wellFormed = 0; 4556 ctxt->disableSAX = 1; 4557 } 4558 SKIP_BLANKS; 4559 if ((RAW == 'E') && (NXT(1) == 'M') && 4560 (NXT(2) == 'P') && (NXT(3) == 'T') && 4561 (NXT(4) == 'Y')) { 4562 SKIP(5); 4563 /* 4564 * Element must always be empty. 4565 */ 4566 ret = XML_ELEMENT_TYPE_EMPTY; 4567 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4568 (NXT(2) == 'Y')) { 4569 SKIP(3); 4570 /* 4571 * Element is a generic container. 4572 */ 4573 ret = XML_ELEMENT_TYPE_ANY; 4574 } else if (RAW == '(') { 4575 ret = xmlParseElementContentDecl(ctxt, name, &content); 4576 } else { 4577 /* 4578 * [ WFC: PEs in Internal Subset ] error handling. 4579 */ 4580 if ((RAW == '%') && (ctxt->external == 0) && 4581 (ctxt->inputNr == 1)) { 4582 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4584 ctxt->sax->error(ctxt->userData, 4585 "PEReference: forbidden within markup decl in internal subset\n"); 4586 } else { 4587 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4589 ctxt->sax->error(ctxt->userData, 4590 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4591 } 4592 ctxt->wellFormed = 0; 4593 ctxt->disableSAX = 1; 4594 if (name != NULL) xmlFree(name); 4595 return(-1); 4596 } 4597 4598 SKIP_BLANKS; 4599 /* 4600 * Pop-up of finished entities. 4601 */ 4602 while ((RAW == 0) && (ctxt->inputNr > 1)) 4603 xmlPopInput(ctxt); 4604 SKIP_BLANKS; 4605 4606 if (RAW != '>') { 4607 ctxt->errNo = XML_ERR_GT_REQUIRED; 4608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4609 ctxt->sax->error(ctxt->userData, 4610 "xmlParseElementDecl: expected '>' at the end\n"); 4611 ctxt->wellFormed = 0; 4612 ctxt->disableSAX = 1; 4613 } else { 4614 if (input != ctxt->input) { 4615 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4617 ctxt->sax->error(ctxt->userData, 4618"Element declaration doesn't start and stop in the same entity\n"); 4619 ctxt->wellFormed = 0; 4620 ctxt->disableSAX = 1; 4621 } 4622 4623 NEXT; 4624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4625 (ctxt->sax->elementDecl != NULL)) 4626 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4627 content); 4628 } 4629 if (content != NULL) { 4630 xmlFreeElementContent(content); 4631 } 4632 if (name != NULL) { 4633 xmlFree(name); 4634 } 4635 } 4636 return(ret); 4637} 4638 4639/** 4640 * xmlParseConditionalSections 4641 * @ctxt: an XML parser context 4642 * 4643 * [61] conditionalSect ::= includeSect | ignoreSect 4644 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4645 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4646 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4647 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4648 */ 4649 4650static void 4651xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4652 SKIP(3); 4653 SKIP_BLANKS; 4654 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4655 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4656 (NXT(6) == 'E')) { 4657 SKIP(7); 4658 SKIP_BLANKS; 4659 if (RAW != '[') { 4660 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4662 ctxt->sax->error(ctxt->userData, 4663 "XML conditional section '[' expected\n"); 4664 ctxt->wellFormed = 0; 4665 ctxt->disableSAX = 1; 4666 } else { 4667 NEXT; 4668 } 4669 if (xmlParserDebugEntities) { 4670 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4671 xmlGenericError(xmlGenericErrorContext, 4672 "%s(%d): ", ctxt->input->filename, 4673 ctxt->input->line); 4674 xmlGenericError(xmlGenericErrorContext, 4675 "Entering INCLUDE Conditional Section\n"); 4676 } 4677 4678 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4679 (NXT(2) != '>'))) { 4680 const xmlChar *check = CUR_PTR; 4681 int cons = ctxt->input->consumed; 4682 int tok = ctxt->token; 4683 4684 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4685 xmlParseConditionalSections(ctxt); 4686 } else if (IS_BLANK(CUR)) { 4687 NEXT; 4688 } else if (RAW == '%') { 4689 xmlParsePEReference(ctxt); 4690 } else 4691 xmlParseMarkupDecl(ctxt); 4692 4693 /* 4694 * Pop-up of finished entities. 4695 */ 4696 while ((RAW == 0) && (ctxt->inputNr > 1)) 4697 xmlPopInput(ctxt); 4698 4699 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4700 (tok == ctxt->token)) { 4701 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4703 ctxt->sax->error(ctxt->userData, 4704 "Content error in the external subset\n"); 4705 ctxt->wellFormed = 0; 4706 ctxt->disableSAX = 1; 4707 break; 4708 } 4709 } 4710 if (xmlParserDebugEntities) { 4711 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4712 xmlGenericError(xmlGenericErrorContext, 4713 "%s(%d): ", ctxt->input->filename, 4714 ctxt->input->line); 4715 xmlGenericError(xmlGenericErrorContext, 4716 "Leaving INCLUDE Conditional Section\n"); 4717 } 4718 4719 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4720 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4721 int state; 4722 int instate; 4723 int depth = 0; 4724 4725 SKIP(6); 4726 SKIP_BLANKS; 4727 if (RAW != '[') { 4728 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4730 ctxt->sax->error(ctxt->userData, 4731 "XML conditional section '[' expected\n"); 4732 ctxt->wellFormed = 0; 4733 ctxt->disableSAX = 1; 4734 } else { 4735 NEXT; 4736 } 4737 if (xmlParserDebugEntities) { 4738 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4739 xmlGenericError(xmlGenericErrorContext, 4740 "%s(%d): ", ctxt->input->filename, 4741 ctxt->input->line); 4742 xmlGenericError(xmlGenericErrorContext, 4743 "Entering IGNORE Conditional Section\n"); 4744 } 4745 4746 /* 4747 * Parse up to the end of the conditionnal section 4748 * But disable SAX event generating DTD building in the meantime 4749 */ 4750 state = ctxt->disableSAX; 4751 instate = ctxt->instate; 4752 ctxt->disableSAX = 1; 4753 ctxt->instate = XML_PARSER_IGNORE; 4754 4755 while (depth >= 0) { 4756 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4757 depth++; 4758 SKIP(3); 4759 continue; 4760 } 4761 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4762 if (--depth >= 0) SKIP(3); 4763 continue; 4764 } 4765 NEXT; 4766 continue; 4767 } 4768 4769 ctxt->disableSAX = state; 4770 ctxt->instate = instate; 4771 4772 if (xmlParserDebugEntities) { 4773 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4774 xmlGenericError(xmlGenericErrorContext, 4775 "%s(%d): ", ctxt->input->filename, 4776 ctxt->input->line); 4777 xmlGenericError(xmlGenericErrorContext, 4778 "Leaving IGNORE Conditional Section\n"); 4779 } 4780 4781 } else { 4782 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4784 ctxt->sax->error(ctxt->userData, 4785 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4786 ctxt->wellFormed = 0; 4787 ctxt->disableSAX = 1; 4788 } 4789 4790 if (RAW == 0) 4791 SHRINK; 4792 4793 if (RAW == 0) { 4794 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4796 ctxt->sax->error(ctxt->userData, 4797 "XML conditional section not closed\n"); 4798 ctxt->wellFormed = 0; 4799 ctxt->disableSAX = 1; 4800 } else { 4801 SKIP(3); 4802 } 4803} 4804 4805/** 4806 * xmlParseMarkupDecl: 4807 * @ctxt: an XML parser context 4808 * 4809 * parse Markup declarations 4810 * 4811 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4812 * NotationDecl | PI | Comment 4813 * 4814 * [ VC: Proper Declaration/PE Nesting ] 4815 * Parameter-entity replacement text must be properly nested with 4816 * markup declarations. That is to say, if either the first character 4817 * or the last character of a markup declaration (markupdecl above) is 4818 * contained in the replacement text for a parameter-entity reference, 4819 * both must be contained in the same replacement text. 4820 * 4821 * [ WFC: PEs in Internal Subset ] 4822 * In the internal DTD subset, parameter-entity references can occur 4823 * only where markup declarations can occur, not within markup declarations. 4824 * (This does not apply to references that occur in external parameter 4825 * entities or to the external subset.) 4826 */ 4827void 4828xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4829 GROW; 4830 xmlParseElementDecl(ctxt); 4831 xmlParseAttributeListDecl(ctxt); 4832 xmlParseEntityDecl(ctxt); 4833 xmlParseNotationDecl(ctxt); 4834 xmlParsePI(ctxt); 4835 xmlParseComment(ctxt); 4836 /* 4837 * This is only for internal subset. On external entities, 4838 * the replacement is done before parsing stage 4839 */ 4840 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4841 xmlParsePEReference(ctxt); 4842 4843 /* 4844 * Conditional sections are allowed from entities included 4845 * by PE References in the internal subset. 4846 */ 4847 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 4848 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4849 xmlParseConditionalSections(ctxt); 4850 } 4851 } 4852 4853 ctxt->instate = XML_PARSER_DTD; 4854} 4855 4856/** 4857 * xmlParseTextDecl: 4858 * @ctxt: an XML parser context 4859 * 4860 * parse an XML declaration header for external entities 4861 * 4862 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4863 * 4864 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4865 */ 4866 4867void 4868xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4869 xmlChar *version; 4870 4871 /* 4872 * We know that '<?xml' is here. 4873 */ 4874 if ((RAW == '<') && (NXT(1) == '?') && 4875 (NXT(2) == 'x') && (NXT(3) == 'm') && 4876 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4877 SKIP(5); 4878 } else { 4879 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4881 ctxt->sax->error(ctxt->userData, 4882 "Text declaration '<?xml' required\n"); 4883 ctxt->wellFormed = 0; 4884 ctxt->disableSAX = 1; 4885 4886 return; 4887 } 4888 4889 if (!IS_BLANK(CUR)) { 4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4892 ctxt->sax->error(ctxt->userData, 4893 "Space needed after '<?xml'\n"); 4894 ctxt->wellFormed = 0; 4895 ctxt->disableSAX = 1; 4896 } 4897 SKIP_BLANKS; 4898 4899 /* 4900 * We may have the VersionInfo here. 4901 */ 4902 version = xmlParseVersionInfo(ctxt); 4903 if (version == NULL) 4904 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4905 ctxt->input->version = version; 4906 4907 /* 4908 * We must have the encoding declaration 4909 */ 4910 if (!IS_BLANK(CUR)) { 4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4913 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4914 ctxt->wellFormed = 0; 4915 ctxt->disableSAX = 1; 4916 } 4917 xmlParseEncodingDecl(ctxt); 4918 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4919 /* 4920 * The XML REC instructs us to stop parsing right here 4921 */ 4922 return; 4923 } 4924 4925 SKIP_BLANKS; 4926 if ((RAW == '?') && (NXT(1) == '>')) { 4927 SKIP(2); 4928 } else if (RAW == '>') { 4929 /* Deprecated old WD ... */ 4930 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4932 ctxt->sax->error(ctxt->userData, 4933 "XML declaration must end-up with '?>'\n"); 4934 ctxt->wellFormed = 0; 4935 ctxt->disableSAX = 1; 4936 NEXT; 4937 } else { 4938 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4940 ctxt->sax->error(ctxt->userData, 4941 "parsing XML declaration: '?>' expected\n"); 4942 ctxt->wellFormed = 0; 4943 ctxt->disableSAX = 1; 4944 MOVETO_ENDTAG(CUR_PTR); 4945 NEXT; 4946 } 4947} 4948 4949/** 4950 * xmlParseExternalSubset: 4951 * @ctxt: an XML parser context 4952 * @ExternalID: the external identifier 4953 * @SystemID: the system identifier (or URL) 4954 * 4955 * parse Markup declarations from an external subset 4956 * 4957 * [30] extSubset ::= textDecl? extSubsetDecl 4958 * 4959 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4960 */ 4961void 4962xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4963 const xmlChar *SystemID) { 4964 GROW; 4965 if ((RAW == '<') && (NXT(1) == '?') && 4966 (NXT(2) == 'x') && (NXT(3) == 'm') && 4967 (NXT(4) == 'l')) { 4968 xmlParseTextDecl(ctxt); 4969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4970 /* 4971 * The XML REC instructs us to stop parsing right here 4972 */ 4973 ctxt->instate = XML_PARSER_EOF; 4974 return; 4975 } 4976 } 4977 if (ctxt->myDoc == NULL) { 4978 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4979 } 4980 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4981 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 4982 4983 ctxt->instate = XML_PARSER_DTD; 4984 ctxt->external = 1; 4985 while (((RAW == '<') && (NXT(1) == '?')) || 4986 ((RAW == '<') && (NXT(1) == '!')) || 4987 (RAW == '%') || IS_BLANK(CUR)) { 4988 const xmlChar *check = CUR_PTR; 4989 int cons = ctxt->input->consumed; 4990 int tok = ctxt->token; 4991 4992 GROW; 4993 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4994 xmlParseConditionalSections(ctxt); 4995 } else if (IS_BLANK(CUR)) { 4996 NEXT; 4997 } else if (RAW == '%') { 4998 xmlParsePEReference(ctxt); 4999 } else 5000 xmlParseMarkupDecl(ctxt); 5001 5002 /* 5003 * Pop-up of finished entities. 5004 */ 5005 while ((RAW == 0) && (ctxt->inputNr > 1)) 5006 xmlPopInput(ctxt); 5007 5008 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 5009 (tok == ctxt->token)) { 5010 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5012 ctxt->sax->error(ctxt->userData, 5013 "Content error in the external subset\n"); 5014 ctxt->wellFormed = 0; 5015 ctxt->disableSAX = 1; 5016 break; 5017 } 5018 } 5019 5020 if (RAW != 0) { 5021 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 5022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5023 ctxt->sax->error(ctxt->userData, 5024 "Extra content at the end of the document\n"); 5025 ctxt->wellFormed = 0; 5026 ctxt->disableSAX = 1; 5027 } 5028 5029} 5030 5031/** 5032 * xmlParseReference: 5033 * @ctxt: an XML parser context 5034 * 5035 * parse and handle entity references in content, depending on the SAX 5036 * interface, this may end-up in a call to character() if this is a 5037 * CharRef, a predefined entity, if there is no reference() callback. 5038 * or if the parser was asked to switch to that mode. 5039 * 5040 * [67] Reference ::= EntityRef | CharRef 5041 */ 5042void 5043xmlParseReference(xmlParserCtxtPtr ctxt) { 5044 xmlEntityPtr ent; 5045 xmlChar *val; 5046 if (RAW != '&') return; 5047 5048 if (NXT(1) == '#') { 5049 int i = 0; 5050 xmlChar out[10]; 5051 int hex = NXT(2); 5052 int value = xmlParseCharRef(ctxt); 5053 5054 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5055 /* 5056 * So we are using non-UTF-8 buffers 5057 * Check that the char fit on 8bits, if not 5058 * generate a CharRef. 5059 */ 5060 if (value <= 0xFF) { 5061 out[0] = value; 5062 out[1] = 0; 5063 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5064 (!ctxt->disableSAX)) 5065 ctxt->sax->characters(ctxt->userData, out, 1); 5066 } else { 5067 if ((hex == 'x') || (hex == 'X')) 5068 sprintf((char *)out, "#x%X", value); 5069 else 5070 sprintf((char *)out, "#%d", value); 5071 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5072 (!ctxt->disableSAX)) 5073 ctxt->sax->reference(ctxt->userData, out); 5074 } 5075 } else { 5076 /* 5077 * Just encode the value in UTF-8 5078 */ 5079 COPY_BUF(0 ,out, i, value); 5080 out[i] = 0; 5081 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5082 (!ctxt->disableSAX)) 5083 ctxt->sax->characters(ctxt->userData, out, i); 5084 } 5085 } else { 5086 ent = xmlParseEntityRef(ctxt); 5087 if (ent == NULL) return; 5088 if ((ent->name != NULL) && 5089 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5090 xmlNodePtr list = NULL; 5091 int ret; 5092 5093 5094 /* 5095 * The first reference to the entity trigger a parsing phase 5096 * where the ent->children is filled with the result from 5097 * the parsing. 5098 */ 5099 if (ent->children == NULL) { 5100 xmlChar *value; 5101 value = ent->content; 5102 5103 /* 5104 * Check that this entity is well formed 5105 */ 5106 if ((value != NULL) && 5107 (value[1] == 0) && (value[0] == '<') && 5108 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5109 /* 5110 * DONE: get definite answer on this !!! 5111 * Lots of entity decls are used to declare a single 5112 * char 5113 * <!ENTITY lt "<"> 5114 * Which seems to be valid since 5115 * 2.4: The ampersand character (&) and the left angle 5116 * bracket (<) may appear in their literal form only 5117 * when used ... They are also legal within the literal 5118 * entity value of an internal entity declaration;i 5119 * see "4.3.2 Well-Formed Parsed Entities". 5120 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5121 * Looking at the OASIS test suite and James Clark 5122 * tests, this is broken. However the XML REC uses 5123 * it. Is the XML REC not well-formed ???? 5124 * This is a hack to avoid this problem 5125 * 5126 * ANSWER: since lt gt amp .. are already defined, 5127 * this is a redefinition and hence the fact that the 5128 * contentis not well balanced is not a Wf error, this 5129 * is lousy but acceptable. 5130 */ 5131 list = xmlNewDocText(ctxt->myDoc, value); 5132 if (list != NULL) { 5133 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5134 (ent->children == NULL)) { 5135 ent->children = list; 5136 ent->last = list; 5137 list->parent = (xmlNodePtr) ent; 5138 } else { 5139 xmlFreeNodeList(list); 5140 } 5141 } else if (list != NULL) { 5142 xmlFreeNodeList(list); 5143 } 5144 } else { 5145 /* 5146 * 4.3.2: An internal general parsed entity is well-formed 5147 * if its replacement text matches the production labeled 5148 * content. 5149 */ 5150 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5151 ctxt->depth++; 5152 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 5153 ctxt->sax, NULL, ctxt->depth, 5154 value, &list); 5155 ctxt->depth--; 5156 } else if (ent->etype == 5157 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5158 ctxt->depth++; 5159 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5160 ctxt->sax, NULL, ctxt->depth, 5161 ent->URI, ent->ExternalID, &list); 5162 ctxt->depth--; 5163 } else { 5164 ret = -1; 5165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5166 ctxt->sax->error(ctxt->userData, 5167 "Internal: invalid entity type\n"); 5168 } 5169 if (ret == XML_ERR_ENTITY_LOOP) { 5170 ctxt->errNo = XML_ERR_ENTITY_LOOP; 5171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5172 ctxt->sax->error(ctxt->userData, 5173 "Detected entity reference loop\n"); 5174 ctxt->wellFormed = 0; 5175 ctxt->disableSAX = 1; 5176 } else if ((ret == 0) && (list != NULL)) { 5177 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5178 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5179 (ent->children == NULL)) { 5180 ent->children = list; 5181 if (ctxt->replaceEntities) { 5182 /* 5183 * Prune it directly in the generated document 5184 * except for single text nodes. 5185 */ 5186 if ((list->type == XML_TEXT_NODE) && 5187 (list->next == NULL)) { 5188 list->parent = (xmlNodePtr) ent; 5189 list = NULL; 5190 } else { 5191 while (list != NULL) { 5192 list->parent = (xmlNodePtr) ctxt->node; 5193 if (list->next == NULL) 5194 ent->last = list; 5195 list = list->next; 5196 } 5197 list = ent->children; 5198 } 5199 } else { 5200 while (list != NULL) { 5201 list->parent = (xmlNodePtr) ent; 5202 if (list->next == NULL) 5203 ent->last = list; 5204 list = list->next; 5205 } 5206 } 5207 } else { 5208 xmlFreeNodeList(list); 5209 list = NULL; 5210 } 5211 } else if (ret > 0) { 5212 ctxt->errNo = ret; 5213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5214 ctxt->sax->error(ctxt->userData, 5215 "Entity value required\n"); 5216 ctxt->wellFormed = 0; 5217 ctxt->disableSAX = 1; 5218 } else if (list != NULL) { 5219 xmlFreeNodeList(list); 5220 list = NULL; 5221 } 5222 } 5223 } 5224 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5225 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5226 /* 5227 * Create a node. 5228 */ 5229 ctxt->sax->reference(ctxt->userData, ent->name); 5230 return; 5231 } else if (ctxt->replaceEntities) { 5232 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5233 /* 5234 * Seems we are generating the DOM content, do 5235 * a simple tree copy for all references except the first 5236 * In the first occurence list contains the replacement 5237 */ 5238 if (list == NULL) { 5239 xmlNodePtr new, cur; 5240 cur = ent->children; 5241 while (cur != NULL) { 5242 new = xmlCopyNode(cur, 1); 5243 xmlAddChild(ctxt->node, new); 5244 if (cur == ent->last) 5245 break; 5246 cur = cur->next; 5247 } 5248 } else { 5249 /* 5250 * the name change is to avoid coalescing of the 5251 * node with a prossible previous text one which 5252 * would make ent->children a dandling pointer 5253 */ 5254 if (ent->children->type == XML_TEXT_NODE) 5255 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5256 if ((ent->last != ent->children) && 5257 (ent->last->type == XML_TEXT_NODE)) 5258 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5259 xmlAddChildList(ctxt->node, ent->children); 5260 } 5261 5262 /* 5263 * This is to avoid a nasty side effect, see 5264 * characters() in SAX.c 5265 */ 5266 ctxt->nodemem = 0; 5267 ctxt->nodelen = 0; 5268 return; 5269 } else { 5270 /* 5271 * Probably running in SAX mode 5272 */ 5273 xmlParserInputPtr input; 5274 5275 input = xmlNewEntityInputStream(ctxt, ent); 5276 xmlPushInput(ctxt, input); 5277 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5278 (RAW == '<') && (NXT(1) == '?') && 5279 (NXT(2) == 'x') && (NXT(3) == 'm') && 5280 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5281 xmlParseTextDecl(ctxt); 5282 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5283 /* 5284 * The XML REC instructs us to stop parsing right here 5285 */ 5286 ctxt->instate = XML_PARSER_EOF; 5287 return; 5288 } 5289 if (input->standalone == 1) { 5290 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5292 ctxt->sax->error(ctxt->userData, 5293 "external parsed entities cannot be standalone\n"); 5294 ctxt->wellFormed = 0; 5295 ctxt->disableSAX = 1; 5296 } 5297 } 5298 return; 5299 } 5300 } 5301 } else { 5302 val = ent->content; 5303 if (val == NULL) return; 5304 /* 5305 * inline the entity. 5306 */ 5307 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5308 (!ctxt->disableSAX)) 5309 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5310 } 5311 } 5312} 5313 5314/** 5315 * xmlParseEntityRef: 5316 * @ctxt: an XML parser context 5317 * 5318 * parse ENTITY references declarations 5319 * 5320 * [68] EntityRef ::= '&' Name ';' 5321 * 5322 * [ WFC: Entity Declared ] 5323 * In a document without any DTD, a document with only an internal DTD 5324 * subset which contains no parameter entity references, or a document 5325 * with "standalone='yes'", the Name given in the entity reference 5326 * must match that in an entity declaration, except that well-formed 5327 * documents need not declare any of the following entities: amp, lt, 5328 * gt, apos, quot. The declaration of a parameter entity must precede 5329 * any reference to it. Similarly, the declaration of a general entity 5330 * must precede any reference to it which appears in a default value in an 5331 * attribute-list declaration. Note that if entities are declared in the 5332 * external subset or in external parameter entities, a non-validating 5333 * processor is not obligated to read and process their declarations; 5334 * for such documents, the rule that an entity must be declared is a 5335 * well-formedness constraint only if standalone='yes'. 5336 * 5337 * [ WFC: Parsed Entity ] 5338 * An entity reference must not contain the name of an unparsed entity 5339 * 5340 * Returns the xmlEntityPtr if found, or NULL otherwise. 5341 */ 5342xmlEntityPtr 5343xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5344 xmlChar *name; 5345 xmlEntityPtr ent = NULL; 5346 5347 GROW; 5348 5349 if (RAW == '&') { 5350 NEXT; 5351 name = xmlParseName(ctxt); 5352 if (name == NULL) { 5353 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5355 ctxt->sax->error(ctxt->userData, 5356 "xmlParseEntityRef: no name\n"); 5357 ctxt->wellFormed = 0; 5358 ctxt->disableSAX = 1; 5359 } else { 5360 if (RAW == ';') { 5361 NEXT; 5362 /* 5363 * Ask first SAX for entity resolution, otherwise try the 5364 * predefined set. 5365 */ 5366 if (ctxt->sax != NULL) { 5367 if (ctxt->sax->getEntity != NULL) 5368 ent = ctxt->sax->getEntity(ctxt->userData, name); 5369 if (ent == NULL) 5370 ent = xmlGetPredefinedEntity(name); 5371 } 5372 /* 5373 * [ WFC: Entity Declared ] 5374 * In a document without any DTD, a document with only an 5375 * internal DTD subset which contains no parameter entity 5376 * references, or a document with "standalone='yes'", the 5377 * Name given in the entity reference must match that in an 5378 * entity declaration, except that well-formed documents 5379 * need not declare any of the following entities: amp, lt, 5380 * gt, apos, quot. 5381 * The declaration of a parameter entity must precede any 5382 * reference to it. 5383 * Similarly, the declaration of a general entity must 5384 * precede any reference to it which appears in a default 5385 * value in an attribute-list declaration. Note that if 5386 * entities are declared in the external subset or in 5387 * external parameter entities, a non-validating processor 5388 * is not obligated to read and process their declarations; 5389 * for such documents, the rule that an entity must be 5390 * declared is a well-formedness constraint only if 5391 * standalone='yes'. 5392 */ 5393 if (ent == NULL) { 5394 if ((ctxt->standalone == 1) || 5395 ((ctxt->hasExternalSubset == 0) && 5396 (ctxt->hasPErefs == 0))) { 5397 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5399 ctxt->sax->error(ctxt->userData, 5400 "Entity '%s' not defined\n", name); 5401 ctxt->wellFormed = 0; 5402 ctxt->disableSAX = 1; 5403 } else { 5404 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5406 ctxt->sax->error(ctxt->userData, 5407 "Entity '%s' not defined\n", name); 5408 } 5409 } 5410 5411 /* 5412 * [ WFC: Parsed Entity ] 5413 * An entity reference must not contain the name of an 5414 * unparsed entity 5415 */ 5416 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5417 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5419 ctxt->sax->error(ctxt->userData, 5420 "Entity reference to unparsed entity %s\n", name); 5421 ctxt->wellFormed = 0; 5422 ctxt->disableSAX = 1; 5423 } 5424 5425 /* 5426 * [ WFC: No External Entity References ] 5427 * Attribute values cannot contain direct or indirect 5428 * entity references to external entities. 5429 */ 5430 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5431 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5432 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5434 ctxt->sax->error(ctxt->userData, 5435 "Attribute references external entity '%s'\n", name); 5436 ctxt->wellFormed = 0; 5437 ctxt->disableSAX = 1; 5438 } 5439 /* 5440 * [ WFC: No < in Attribute Values ] 5441 * The replacement text of any entity referred to directly or 5442 * indirectly in an attribute value (other than "<") must 5443 * not contain a <. 5444 */ 5445 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5446 (ent != NULL) && 5447 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5448 (ent->content != NULL) && 5449 (xmlStrchr(ent->content, '<'))) { 5450 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5452 ctxt->sax->error(ctxt->userData, 5453 "'<' in entity '%s' is not allowed in attributes values\n", name); 5454 ctxt->wellFormed = 0; 5455 ctxt->disableSAX = 1; 5456 } 5457 5458 /* 5459 * Internal check, no parameter entities here ... 5460 */ 5461 else { 5462 switch (ent->etype) { 5463 case XML_INTERNAL_PARAMETER_ENTITY: 5464 case XML_EXTERNAL_PARAMETER_ENTITY: 5465 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5467 ctxt->sax->error(ctxt->userData, 5468 "Attempt to reference the parameter entity '%s'\n", name); 5469 ctxt->wellFormed = 0; 5470 ctxt->disableSAX = 1; 5471 break; 5472 default: 5473 break; 5474 } 5475 } 5476 5477 /* 5478 * [ WFC: No Recursion ] 5479 * A parsed entity must not contain a recursive reference 5480 * to itself, either directly or indirectly. 5481 * Done somewhere else 5482 */ 5483 5484 } else { 5485 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5487 ctxt->sax->error(ctxt->userData, 5488 "xmlParseEntityRef: expecting ';'\n"); 5489 ctxt->wellFormed = 0; 5490 ctxt->disableSAX = 1; 5491 } 5492 xmlFree(name); 5493 } 5494 } 5495 return(ent); 5496} 5497 5498/** 5499 * xmlParseStringEntityRef: 5500 * @ctxt: an XML parser context 5501 * @str: a pointer to an index in the string 5502 * 5503 * parse ENTITY references declarations, but this version parses it from 5504 * a string value. 5505 * 5506 * [68] EntityRef ::= '&' Name ';' 5507 * 5508 * [ WFC: Entity Declared ] 5509 * In a document without any DTD, a document with only an internal DTD 5510 * subset which contains no parameter entity references, or a document 5511 * with "standalone='yes'", the Name given in the entity reference 5512 * must match that in an entity declaration, except that well-formed 5513 * documents need not declare any of the following entities: amp, lt, 5514 * gt, apos, quot. The declaration of a parameter entity must precede 5515 * any reference to it. Similarly, the declaration of a general entity 5516 * must precede any reference to it which appears in a default value in an 5517 * attribute-list declaration. Note that if entities are declared in the 5518 * external subset or in external parameter entities, a non-validating 5519 * processor is not obligated to read and process their declarations; 5520 * for such documents, the rule that an entity must be declared is a 5521 * well-formedness constraint only if standalone='yes'. 5522 * 5523 * [ WFC: Parsed Entity ] 5524 * An entity reference must not contain the name of an unparsed entity 5525 * 5526 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5527 * is updated to the current location in the string. 5528 */ 5529xmlEntityPtr 5530xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5531 xmlChar *name; 5532 const xmlChar *ptr; 5533 xmlChar cur; 5534 xmlEntityPtr ent = NULL; 5535 5536 if ((str == NULL) || (*str == NULL)) 5537 return(NULL); 5538 ptr = *str; 5539 cur = *ptr; 5540 if (cur == '&') { 5541 ptr++; 5542 cur = *ptr; 5543 name = xmlParseStringName(ctxt, &ptr); 5544 if (name == NULL) { 5545 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5547 ctxt->sax->error(ctxt->userData, 5548 "xmlParseStringEntityRef: no name\n"); 5549 ctxt->wellFormed = 0; 5550 ctxt->disableSAX = 1; 5551 } else { 5552 if (*ptr == ';') { 5553 ptr++; 5554 /* 5555 * Ask first SAX for entity resolution, otherwise try the 5556 * predefined set. 5557 */ 5558 if (ctxt->sax != NULL) { 5559 if (ctxt->sax->getEntity != NULL) 5560 ent = ctxt->sax->getEntity(ctxt->userData, name); 5561 if (ent == NULL) 5562 ent = xmlGetPredefinedEntity(name); 5563 } 5564 /* 5565 * [ WFC: Entity Declared ] 5566 * In a document without any DTD, a document with only an 5567 * internal DTD subset which contains no parameter entity 5568 * references, or a document with "standalone='yes'", the 5569 * Name given in the entity reference must match that in an 5570 * entity declaration, except that well-formed documents 5571 * need not declare any of the following entities: amp, lt, 5572 * gt, apos, quot. 5573 * The declaration of a parameter entity must precede any 5574 * reference to it. 5575 * Similarly, the declaration of a general entity must 5576 * precede any reference to it which appears in a default 5577 * value in an attribute-list declaration. Note that if 5578 * entities are declared in the external subset or in 5579 * external parameter entities, a non-validating processor 5580 * is not obligated to read and process their declarations; 5581 * for such documents, the rule that an entity must be 5582 * declared is a well-formedness constraint only if 5583 * standalone='yes'. 5584 */ 5585 if (ent == NULL) { 5586 if ((ctxt->standalone == 1) || 5587 ((ctxt->hasExternalSubset == 0) && 5588 (ctxt->hasPErefs == 0))) { 5589 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5591 ctxt->sax->error(ctxt->userData, 5592 "Entity '%s' not defined\n", name); 5593 ctxt->wellFormed = 0; 5594 ctxt->disableSAX = 1; 5595 } else { 5596 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5597 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5598 ctxt->sax->warning(ctxt->userData, 5599 "Entity '%s' not defined\n", name); 5600 } 5601 } 5602 5603 /* 5604 * [ WFC: Parsed Entity ] 5605 * An entity reference must not contain the name of an 5606 * unparsed entity 5607 */ 5608 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5609 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5611 ctxt->sax->error(ctxt->userData, 5612 "Entity reference to unparsed entity %s\n", name); 5613 ctxt->wellFormed = 0; 5614 ctxt->disableSAX = 1; 5615 } 5616 5617 /* 5618 * [ WFC: No External Entity References ] 5619 * Attribute values cannot contain direct or indirect 5620 * entity references to external entities. 5621 */ 5622 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5623 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5624 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5626 ctxt->sax->error(ctxt->userData, 5627 "Attribute references external entity '%s'\n", name); 5628 ctxt->wellFormed = 0; 5629 ctxt->disableSAX = 1; 5630 } 5631 /* 5632 * [ WFC: No < in Attribute Values ] 5633 * The replacement text of any entity referred to directly or 5634 * indirectly in an attribute value (other than "<") must 5635 * not contain a <. 5636 */ 5637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5638 (ent != NULL) && 5639 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5640 (ent->content != NULL) && 5641 (xmlStrchr(ent->content, '<'))) { 5642 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5644 ctxt->sax->error(ctxt->userData, 5645 "'<' in entity '%s' is not allowed in attributes values\n", name); 5646 ctxt->wellFormed = 0; 5647 ctxt->disableSAX = 1; 5648 } 5649 5650 /* 5651 * Internal check, no parameter entities here ... 5652 */ 5653 else { 5654 switch (ent->etype) { 5655 case XML_INTERNAL_PARAMETER_ENTITY: 5656 case XML_EXTERNAL_PARAMETER_ENTITY: 5657 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5659 ctxt->sax->error(ctxt->userData, 5660 "Attempt to reference the parameter entity '%s'\n", name); 5661 ctxt->wellFormed = 0; 5662 ctxt->disableSAX = 1; 5663 break; 5664 default: 5665 break; 5666 } 5667 } 5668 5669 /* 5670 * [ WFC: No Recursion ] 5671 * A parsed entity must not contain a recursive reference 5672 * to itself, either directly or indirectly. 5673 * Done somewhwere else 5674 */ 5675 5676 } else { 5677 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5679 ctxt->sax->error(ctxt->userData, 5680 "xmlParseStringEntityRef: expecting ';'\n"); 5681 ctxt->wellFormed = 0; 5682 ctxt->disableSAX = 1; 5683 } 5684 xmlFree(name); 5685 } 5686 } 5687 *str = ptr; 5688 return(ent); 5689} 5690 5691/** 5692 * xmlParsePEReference: 5693 * @ctxt: an XML parser context 5694 * 5695 * parse PEReference declarations 5696 * The entity content is handled directly by pushing it's content as 5697 * a new input stream. 5698 * 5699 * [69] PEReference ::= '%' Name ';' 5700 * 5701 * [ WFC: No Recursion ] 5702 * A parsed entity must not contain a recursive 5703 * reference to itself, either directly or indirectly. 5704 * 5705 * [ WFC: Entity Declared ] 5706 * In a document without any DTD, a document with only an internal DTD 5707 * subset which contains no parameter entity references, or a document 5708 * with "standalone='yes'", ... ... The declaration of a parameter 5709 * entity must precede any reference to it... 5710 * 5711 * [ VC: Entity Declared ] 5712 * In a document with an external subset or external parameter entities 5713 * with "standalone='no'", ... ... The declaration of a parameter entity 5714 * must precede any reference to it... 5715 * 5716 * [ WFC: In DTD ] 5717 * Parameter-entity references may only appear in the DTD. 5718 * NOTE: misleading but this is handled. 5719 */ 5720void 5721xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5722 xmlChar *name; 5723 xmlEntityPtr entity = NULL; 5724 xmlParserInputPtr input; 5725 5726 if (RAW == '%') { 5727 NEXT; 5728 name = xmlParseName(ctxt); 5729 if (name == NULL) { 5730 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5732 ctxt->sax->error(ctxt->userData, 5733 "xmlParsePEReference: no name\n"); 5734 ctxt->wellFormed = 0; 5735 ctxt->disableSAX = 1; 5736 } else { 5737 if (RAW == ';') { 5738 NEXT; 5739 if ((ctxt->sax != NULL) && 5740 (ctxt->sax->getParameterEntity != NULL)) 5741 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5742 name); 5743 if (entity == NULL) { 5744 /* 5745 * [ WFC: Entity Declared ] 5746 * In a document without any DTD, a document with only an 5747 * internal DTD subset which contains no parameter entity 5748 * references, or a document with "standalone='yes'", ... 5749 * ... The declaration of a parameter entity must precede 5750 * any reference to it... 5751 */ 5752 if ((ctxt->standalone == 1) || 5753 ((ctxt->hasExternalSubset == 0) && 5754 (ctxt->hasPErefs == 0))) { 5755 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5756 if ((!ctxt->disableSAX) && 5757 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5758 ctxt->sax->error(ctxt->userData, 5759 "PEReference: %%%s; not found\n", name); 5760 ctxt->wellFormed = 0; 5761 ctxt->disableSAX = 1; 5762 } else { 5763 /* 5764 * [ VC: Entity Declared ] 5765 * In a document with an external subset or external 5766 * parameter entities with "standalone='no'", ... 5767 * ... The declaration of a parameter entity must precede 5768 * any reference to it... 5769 */ 5770 if ((!ctxt->disableSAX) && 5771 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5772 ctxt->sax->warning(ctxt->userData, 5773 "PEReference: %%%s; not found\n", name); 5774 ctxt->valid = 0; 5775 } 5776 } else { 5777 /* 5778 * Internal checking in case the entity quest barfed 5779 */ 5780 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5781 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5782 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5783 ctxt->sax->warning(ctxt->userData, 5784 "Internal: %%%s; is not a parameter entity\n", name); 5785 } else { 5786 /* 5787 * TODO !!! 5788 * handle the extra spaces added before and after 5789 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5790 */ 5791 input = xmlNewEntityInputStream(ctxt, entity); 5792 xmlPushInput(ctxt, input); 5793 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5794 (RAW == '<') && (NXT(1) == '?') && 5795 (NXT(2) == 'x') && (NXT(3) == 'm') && 5796 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5797 xmlParseTextDecl(ctxt); 5798 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5799 /* 5800 * The XML REC instructs us to stop parsing 5801 * right here 5802 */ 5803 ctxt->instate = XML_PARSER_EOF; 5804 xmlFree(name); 5805 return; 5806 } 5807 } 5808 if (ctxt->token == 0) 5809 ctxt->token = ' '; 5810 } 5811 } 5812 ctxt->hasPErefs = 1; 5813 } else { 5814 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5816 ctxt->sax->error(ctxt->userData, 5817 "xmlParsePEReference: expecting ';'\n"); 5818 ctxt->wellFormed = 0; 5819 ctxt->disableSAX = 1; 5820 } 5821 xmlFree(name); 5822 } 5823 } 5824} 5825 5826/** 5827 * xmlParseStringPEReference: 5828 * @ctxt: an XML parser context 5829 * @str: a pointer to an index in the string 5830 * 5831 * parse PEReference declarations 5832 * 5833 * [69] PEReference ::= '%' Name ';' 5834 * 5835 * [ WFC: No Recursion ] 5836 * A parsed entity must not contain a recursive 5837 * reference to itself, either directly or indirectly. 5838 * 5839 * [ WFC: Entity Declared ] 5840 * In a document without any DTD, a document with only an internal DTD 5841 * subset which contains no parameter entity references, or a document 5842 * with "standalone='yes'", ... ... The declaration of a parameter 5843 * entity must precede any reference to it... 5844 * 5845 * [ VC: Entity Declared ] 5846 * In a document with an external subset or external parameter entities 5847 * with "standalone='no'", ... ... The declaration of a parameter entity 5848 * must precede any reference to it... 5849 * 5850 * [ WFC: In DTD ] 5851 * Parameter-entity references may only appear in the DTD. 5852 * NOTE: misleading but this is handled. 5853 * 5854 * Returns the string of the entity content. 5855 * str is updated to the current value of the index 5856 */ 5857xmlEntityPtr 5858xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5859 const xmlChar *ptr; 5860 xmlChar cur; 5861 xmlChar *name; 5862 xmlEntityPtr entity = NULL; 5863 5864 if ((str == NULL) || (*str == NULL)) return(NULL); 5865 ptr = *str; 5866 cur = *ptr; 5867 if (cur == '%') { 5868 ptr++; 5869 cur = *ptr; 5870 name = xmlParseStringName(ctxt, &ptr); 5871 if (name == NULL) { 5872 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5874 ctxt->sax->error(ctxt->userData, 5875 "xmlParseStringPEReference: no name\n"); 5876 ctxt->wellFormed = 0; 5877 ctxt->disableSAX = 1; 5878 } else { 5879 cur = *ptr; 5880 if (cur == ';') { 5881 ptr++; 5882 cur = *ptr; 5883 if ((ctxt->sax != NULL) && 5884 (ctxt->sax->getParameterEntity != NULL)) 5885 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5886 name); 5887 if (entity == NULL) { 5888 /* 5889 * [ WFC: Entity Declared ] 5890 * In a document without any DTD, a document with only an 5891 * internal DTD subset which contains no parameter entity 5892 * references, or a document with "standalone='yes'", ... 5893 * ... The declaration of a parameter entity must precede 5894 * any reference to it... 5895 */ 5896 if ((ctxt->standalone == 1) || 5897 ((ctxt->hasExternalSubset == 0) && 5898 (ctxt->hasPErefs == 0))) { 5899 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5901 ctxt->sax->error(ctxt->userData, 5902 "PEReference: %%%s; not found\n", name); 5903 ctxt->wellFormed = 0; 5904 ctxt->disableSAX = 1; 5905 } else { 5906 /* 5907 * [ VC: Entity Declared ] 5908 * In a document with an external subset or external 5909 * parameter entities with "standalone='no'", ... 5910 * ... The declaration of a parameter entity must 5911 * precede any reference to it... 5912 */ 5913 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5914 ctxt->sax->warning(ctxt->userData, 5915 "PEReference: %%%s; not found\n", name); 5916 ctxt->valid = 0; 5917 } 5918 } else { 5919 /* 5920 * Internal checking in case the entity quest barfed 5921 */ 5922 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5923 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5924 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5925 ctxt->sax->warning(ctxt->userData, 5926 "Internal: %%%s; is not a parameter entity\n", name); 5927 } 5928 } 5929 ctxt->hasPErefs = 1; 5930 } else { 5931 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5933 ctxt->sax->error(ctxt->userData, 5934 "xmlParseStringPEReference: expecting ';'\n"); 5935 ctxt->wellFormed = 0; 5936 ctxt->disableSAX = 1; 5937 } 5938 xmlFree(name); 5939 } 5940 } 5941 *str = ptr; 5942 return(entity); 5943} 5944 5945/** 5946 * xmlParseDocTypeDecl: 5947 * @ctxt: an XML parser context 5948 * 5949 * parse a DOCTYPE declaration 5950 * 5951 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5952 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5953 * 5954 * [ VC: Root Element Type ] 5955 * The Name in the document type declaration must match the element 5956 * type of the root element. 5957 */ 5958 5959void 5960xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5961 xmlChar *name = NULL; 5962 xmlChar *ExternalID = NULL; 5963 xmlChar *URI = NULL; 5964 5965 /* 5966 * We know that '<!DOCTYPE' has been detected. 5967 */ 5968 SKIP(9); 5969 5970 SKIP_BLANKS; 5971 5972 /* 5973 * Parse the DOCTYPE name. 5974 */ 5975 name = xmlParseName(ctxt); 5976 if (name == NULL) { 5977 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5979 ctxt->sax->error(ctxt->userData, 5980 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5981 ctxt->wellFormed = 0; 5982 ctxt->disableSAX = 1; 5983 } 5984 ctxt->intSubName = name; 5985 5986 SKIP_BLANKS; 5987 5988 /* 5989 * Check for SystemID and ExternalID 5990 */ 5991 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 5992 5993 if ((URI != NULL) || (ExternalID != NULL)) { 5994 ctxt->hasExternalSubset = 1; 5995 } 5996 ctxt->extSubURI = URI; 5997 ctxt->extSubSystem = ExternalID; 5998 5999 SKIP_BLANKS; 6000 6001 /* 6002 * Create and update the internal subset. 6003 */ 6004 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6005 (!ctxt->disableSAX)) 6006 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6007 6008 /* 6009 * Is there any internal subset declarations ? 6010 * they are handled separately in xmlParseInternalSubset() 6011 */ 6012 if (RAW == '[') 6013 return; 6014 6015 /* 6016 * We should be at the end of the DOCTYPE declaration. 6017 */ 6018 if (RAW != '>') { 6019 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6021 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 6022 ctxt->wellFormed = 0; 6023 ctxt->disableSAX = 1; 6024 } 6025 NEXT; 6026} 6027 6028/** 6029 * xmlParseInternalsubset: 6030 * @ctxt: an XML parser context 6031 * 6032 * parse the internal subset declaration 6033 * 6034 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6035 */ 6036 6037static void 6038xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6039 /* 6040 * Is there any DTD definition ? 6041 */ 6042 if (RAW == '[') { 6043 ctxt->instate = XML_PARSER_DTD; 6044 NEXT; 6045 /* 6046 * Parse the succession of Markup declarations and 6047 * PEReferences. 6048 * Subsequence (markupdecl | PEReference | S)* 6049 */ 6050 while (RAW != ']') { 6051 const xmlChar *check = CUR_PTR; 6052 int cons = ctxt->input->consumed; 6053 6054 SKIP_BLANKS; 6055 xmlParseMarkupDecl(ctxt); 6056 xmlParsePEReference(ctxt); 6057 6058 /* 6059 * Pop-up of finished entities. 6060 */ 6061 while ((RAW == 0) && (ctxt->inputNr > 1)) 6062 xmlPopInput(ctxt); 6063 6064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6065 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6067 ctxt->sax->error(ctxt->userData, 6068 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6069 ctxt->wellFormed = 0; 6070 ctxt->disableSAX = 1; 6071 break; 6072 } 6073 } 6074 if (RAW == ']') { 6075 NEXT; 6076 SKIP_BLANKS; 6077 } 6078 } 6079 6080 /* 6081 * We should be at the end of the DOCTYPE declaration. 6082 */ 6083 if (RAW != '>') { 6084 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 6085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6086 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 6087 ctxt->wellFormed = 0; 6088 ctxt->disableSAX = 1; 6089 } 6090 NEXT; 6091} 6092 6093/** 6094 * xmlParseAttribute: 6095 * @ctxt: an XML parser context 6096 * @value: a xmlChar ** used to store the value of the attribute 6097 * 6098 * parse an attribute 6099 * 6100 * [41] Attribute ::= Name Eq AttValue 6101 * 6102 * [ WFC: No External Entity References ] 6103 * Attribute values cannot contain direct or indirect entity references 6104 * to external entities. 6105 * 6106 * [ WFC: No < in Attribute Values ] 6107 * The replacement text of any entity referred to directly or indirectly in 6108 * an attribute value (other than "<") must not contain a <. 6109 * 6110 * [ VC: Attribute Value Type ] 6111 * The attribute must have been declared; the value must be of the type 6112 * declared for it. 6113 * 6114 * [25] Eq ::= S? '=' S? 6115 * 6116 * With namespace: 6117 * 6118 * [NS 11] Attribute ::= QName Eq AttValue 6119 * 6120 * Also the case QName == xmlns:??? is handled independently as a namespace 6121 * definition. 6122 * 6123 * Returns the attribute name, and the value in *value. 6124 */ 6125 6126xmlChar * 6127xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6128 xmlChar *name, *val; 6129 6130 *value = NULL; 6131 name = xmlParseName(ctxt); 6132 if (name == NULL) { 6133 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6135 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 6136 ctxt->wellFormed = 0; 6137 ctxt->disableSAX = 1; 6138 return(NULL); 6139 } 6140 6141 /* 6142 * read the value 6143 */ 6144 SKIP_BLANKS; 6145 if (RAW == '=') { 6146 NEXT; 6147 SKIP_BLANKS; 6148 val = xmlParseAttValue(ctxt); 6149 ctxt->instate = XML_PARSER_CONTENT; 6150 } else { 6151 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6153 ctxt->sax->error(ctxt->userData, 6154 "Specification mandate value for attribute %s\n", name); 6155 ctxt->wellFormed = 0; 6156 ctxt->disableSAX = 1; 6157 xmlFree(name); 6158 return(NULL); 6159 } 6160 6161 /* 6162 * Check that xml:lang conforms to the specification 6163 * No more registered as an error, just generate a warning now 6164 * since this was deprecated in XML second edition 6165 */ 6166 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6167 if (!xmlCheckLanguageID(val)) { 6168 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 6169 ctxt->sax->warning(ctxt->userData, 6170 "Malformed value for xml:lang : %s\n", val); 6171 } 6172 } 6173 6174 /* 6175 * Check that xml:space conforms to the specification 6176 */ 6177 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6178 if (xmlStrEqual(val, BAD_CAST "default")) 6179 *(ctxt->space) = 0; 6180 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6181 *(ctxt->space) = 1; 6182 else { 6183 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6185 ctxt->sax->error(ctxt->userData, 6186"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 6187 val); 6188 ctxt->wellFormed = 0; 6189 ctxt->disableSAX = 1; 6190 } 6191 } 6192 6193 *value = val; 6194 return(name); 6195} 6196 6197/** 6198 * xmlParseStartTag: 6199 * @ctxt: an XML parser context 6200 * 6201 * parse a start of tag either for rule element or 6202 * EmptyElement. In both case we don't parse the tag closing chars. 6203 * 6204 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6205 * 6206 * [ WFC: Unique Att Spec ] 6207 * No attribute name may appear more than once in the same start-tag or 6208 * empty-element tag. 6209 * 6210 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6211 * 6212 * [ WFC: Unique Att Spec ] 6213 * No attribute name may appear more than once in the same start-tag or 6214 * empty-element tag. 6215 * 6216 * With namespace: 6217 * 6218 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6219 * 6220 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6221 * 6222 * Returns the element name parsed 6223 */ 6224 6225xmlChar * 6226xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6227 xmlChar *name; 6228 xmlChar *attname; 6229 xmlChar *attvalue; 6230 const xmlChar **atts = NULL; 6231 int nbatts = 0; 6232 int maxatts = 0; 6233 int i; 6234 6235 if (RAW != '<') return(NULL); 6236 NEXT1; 6237 6238 name = xmlParseName(ctxt); 6239 if (name == NULL) { 6240 ctxt->errNo = XML_ERR_NAME_REQUIRED; 6241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6242 ctxt->sax->error(ctxt->userData, 6243 "xmlParseStartTag: invalid element name\n"); 6244 ctxt->wellFormed = 0; 6245 ctxt->disableSAX = 1; 6246 return(NULL); 6247 } 6248 6249 /* 6250 * Now parse the attributes, it ends up with the ending 6251 * 6252 * (S Attribute)* S? 6253 */ 6254 SKIP_BLANKS; 6255 GROW; 6256 6257 while ((RAW != '>') && 6258 ((RAW != '/') || (NXT(1) != '>')) && 6259 (IS_CHAR(RAW))) { 6260 const xmlChar *q = CUR_PTR; 6261 int cons = ctxt->input->consumed; 6262 6263 attname = xmlParseAttribute(ctxt, &attvalue); 6264 if ((attname != NULL) && (attvalue != NULL)) { 6265 /* 6266 * [ WFC: Unique Att Spec ] 6267 * No attribute name may appear more than once in the same 6268 * start-tag or empty-element tag. 6269 */ 6270 for (i = 0; i < nbatts;i += 2) { 6271 if (xmlStrEqual(atts[i], attname)) { 6272 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 6273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6274 ctxt->sax->error(ctxt->userData, 6275 "Attribute %s redefined\n", 6276 attname); 6277 ctxt->wellFormed = 0; 6278 ctxt->disableSAX = 1; 6279 xmlFree(attname); 6280 xmlFree(attvalue); 6281 goto failed; 6282 } 6283 } 6284 6285 /* 6286 * Add the pair to atts 6287 */ 6288 if (atts == NULL) { 6289 maxatts = 10; 6290 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6291 if (atts == NULL) { 6292 xmlGenericError(xmlGenericErrorContext, 6293 "malloc of %ld byte failed\n", 6294 maxatts * (long)sizeof(xmlChar *)); 6295 return(NULL); 6296 } 6297 } else if (nbatts + 4 > maxatts) { 6298 maxatts *= 2; 6299 atts = (const xmlChar **) xmlRealloc((void *) atts, 6300 maxatts * sizeof(xmlChar *)); 6301 if (atts == NULL) { 6302 xmlGenericError(xmlGenericErrorContext, 6303 "realloc of %ld byte failed\n", 6304 maxatts * (long)sizeof(xmlChar *)); 6305 return(NULL); 6306 } 6307 } 6308 atts[nbatts++] = attname; 6309 atts[nbatts++] = attvalue; 6310 atts[nbatts] = NULL; 6311 atts[nbatts + 1] = NULL; 6312 } else { 6313 if (attname != NULL) 6314 xmlFree(attname); 6315 if (attvalue != NULL) 6316 xmlFree(attvalue); 6317 } 6318 6319failed: 6320 6321 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6322 break; 6323 if (!IS_BLANK(RAW)) { 6324 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6326 ctxt->sax->error(ctxt->userData, 6327 "attributes construct error\n"); 6328 ctxt->wellFormed = 0; 6329 ctxt->disableSAX = 1; 6330 } 6331 SKIP_BLANKS; 6332 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6333 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6335 ctxt->sax->error(ctxt->userData, 6336 "xmlParseStartTag: problem parsing attributes\n"); 6337 ctxt->wellFormed = 0; 6338 ctxt->disableSAX = 1; 6339 break; 6340 } 6341 GROW; 6342 } 6343 6344 /* 6345 * SAX: Start of Element ! 6346 */ 6347 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6348 (!ctxt->disableSAX)) 6349 ctxt->sax->startElement(ctxt->userData, name, atts); 6350 6351 if (atts != NULL) { 6352 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6353 xmlFree((void *) atts); 6354 } 6355 return(name); 6356} 6357 6358/** 6359 * xmlParseEndTag: 6360 * @ctxt: an XML parser context 6361 * 6362 * parse an end of tag 6363 * 6364 * [42] ETag ::= '</' Name S? '>' 6365 * 6366 * With namespace 6367 * 6368 * [NS 9] ETag ::= '</' QName S? '>' 6369 */ 6370 6371void 6372xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6373 xmlChar *name; 6374 xmlChar *oldname; 6375 6376 GROW; 6377 if ((RAW != '<') || (NXT(1) != '/')) { 6378 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6380 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6381 ctxt->wellFormed = 0; 6382 ctxt->disableSAX = 1; 6383 return; 6384 } 6385 SKIP(2); 6386 6387 name = xmlParseName(ctxt); 6388 6389 /* 6390 * We should definitely be at the ending "S? '>'" part 6391 */ 6392 GROW; 6393 SKIP_BLANKS; 6394 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6395 ctxt->errNo = XML_ERR_GT_REQUIRED; 6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6397 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6398 ctxt->wellFormed = 0; 6399 ctxt->disableSAX = 1; 6400 } else 6401 NEXT1; 6402 6403 /* 6404 * [ WFC: Element Type Match ] 6405 * The Name in an element's end-tag must match the element type in the 6406 * start-tag. 6407 * 6408 */ 6409 if ((name == NULL) || (ctxt->name == NULL) || 6410 (!xmlStrEqual(name, ctxt->name))) { 6411 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6413 if ((name != NULL) && (ctxt->name != NULL)) { 6414 ctxt->sax->error(ctxt->userData, 6415 "Opening and ending tag mismatch: %s and %s\n", 6416 ctxt->name, name); 6417 } else if (ctxt->name != NULL) { 6418 ctxt->sax->error(ctxt->userData, 6419 "Ending tag eror for: %s\n", ctxt->name); 6420 } else { 6421 ctxt->sax->error(ctxt->userData, 6422 "Ending tag error: internal error ???\n"); 6423 } 6424 6425 } 6426 ctxt->wellFormed = 0; 6427 ctxt->disableSAX = 1; 6428 } 6429 6430 /* 6431 * SAX: End of Tag 6432 */ 6433 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6434 (!ctxt->disableSAX)) 6435 ctxt->sax->endElement(ctxt->userData, name); 6436 6437 if (name != NULL) 6438 xmlFree(name); 6439 oldname = namePop(ctxt); 6440 spacePop(ctxt); 6441 if (oldname != NULL) { 6442#ifdef DEBUG_STACK 6443 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6444#endif 6445 xmlFree(oldname); 6446 } 6447 return; 6448} 6449 6450/** 6451 * xmlParseCDSect: 6452 * @ctxt: an XML parser context 6453 * 6454 * Parse escaped pure raw content. 6455 * 6456 * [18] CDSect ::= CDStart CData CDEnd 6457 * 6458 * [19] CDStart ::= '<![CDATA[' 6459 * 6460 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6461 * 6462 * [21] CDEnd ::= ']]>' 6463 */ 6464void 6465xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6466 xmlChar *buf = NULL; 6467 int len = 0; 6468 int size = XML_PARSER_BUFFER_SIZE; 6469 int r, rl; 6470 int s, sl; 6471 int cur, l; 6472 int count = 0; 6473 6474 if ((NXT(0) == '<') && (NXT(1) == '!') && 6475 (NXT(2) == '[') && (NXT(3) == 'C') && 6476 (NXT(4) == 'D') && (NXT(5) == 'A') && 6477 (NXT(6) == 'T') && (NXT(7) == 'A') && 6478 (NXT(8) == '[')) { 6479 SKIP(9); 6480 } else 6481 return; 6482 6483 ctxt->instate = XML_PARSER_CDATA_SECTION; 6484 r = CUR_CHAR(rl); 6485 if (!IS_CHAR(r)) { 6486 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6488 ctxt->sax->error(ctxt->userData, 6489 "CData section not finished\n"); 6490 ctxt->wellFormed = 0; 6491 ctxt->disableSAX = 1; 6492 ctxt->instate = XML_PARSER_CONTENT; 6493 return; 6494 } 6495 NEXTL(rl); 6496 s = CUR_CHAR(sl); 6497 if (!IS_CHAR(s)) { 6498 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6500 ctxt->sax->error(ctxt->userData, 6501 "CData section not finished\n"); 6502 ctxt->wellFormed = 0; 6503 ctxt->disableSAX = 1; 6504 ctxt->instate = XML_PARSER_CONTENT; 6505 return; 6506 } 6507 NEXTL(sl); 6508 cur = CUR_CHAR(l); 6509 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6510 if (buf == NULL) { 6511 xmlGenericError(xmlGenericErrorContext, 6512 "malloc of %d byte failed\n", size); 6513 return; 6514 } 6515 while (IS_CHAR(cur) && 6516 ((r != ']') || (s != ']') || (cur != '>'))) { 6517 if (len + 5 >= size) { 6518 size *= 2; 6519 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6520 if (buf == NULL) { 6521 xmlGenericError(xmlGenericErrorContext, 6522 "realloc of %d byte failed\n", size); 6523 return; 6524 } 6525 } 6526 COPY_BUF(rl,buf,len,r); 6527 r = s; 6528 rl = sl; 6529 s = cur; 6530 sl = l; 6531 count++; 6532 if (count > 50) { 6533 GROW; 6534 count = 0; 6535 } 6536 NEXTL(l); 6537 cur = CUR_CHAR(l); 6538 } 6539 buf[len] = 0; 6540 ctxt->instate = XML_PARSER_CONTENT; 6541 if (cur != '>') { 6542 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6544 ctxt->sax->error(ctxt->userData, 6545 "CData section not finished\n%.50s\n", buf); 6546 ctxt->wellFormed = 0; 6547 ctxt->disableSAX = 1; 6548 xmlFree(buf); 6549 return; 6550 } 6551 NEXTL(l); 6552 6553 /* 6554 * Ok the buffer is to be consumed as cdata. 6555 */ 6556 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6557 if (ctxt->sax->cdataBlock != NULL) 6558 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6559 else if (ctxt->sax->characters != NULL) 6560 ctxt->sax->characters(ctxt->userData, buf, len); 6561 } 6562 xmlFree(buf); 6563} 6564 6565/** 6566 * xmlParseContent: 6567 * @ctxt: an XML parser context 6568 * 6569 * Parse a content: 6570 * 6571 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6572 */ 6573 6574void 6575xmlParseContent(xmlParserCtxtPtr ctxt) { 6576 GROW; 6577 while (((RAW != 0) || (ctxt->token != 0)) && 6578 ((RAW != '<') || (NXT(1) != '/'))) { 6579 const xmlChar *test = CUR_PTR; 6580 int cons = ctxt->input->consumed; 6581 int tok = ctxt->token; 6582 const xmlChar *cur = ctxt->input->cur; 6583 6584 /* 6585 * Handle possible processed charrefs. 6586 */ 6587 if (ctxt->token != 0) { 6588 xmlParseCharData(ctxt, 0); 6589 } 6590 /* 6591 * First case : a Processing Instruction. 6592 */ 6593 else if ((*cur == '<') && (cur[1] == '?')) { 6594 xmlParsePI(ctxt); 6595 } 6596 6597 /* 6598 * Second case : a CDSection 6599 */ 6600 else if ((*cur == '<') && (NXT(1) == '!') && 6601 (NXT(2) == '[') && (NXT(3) == 'C') && 6602 (NXT(4) == 'D') && (NXT(5) == 'A') && 6603 (NXT(6) == 'T') && (NXT(7) == 'A') && 6604 (NXT(8) == '[')) { 6605 xmlParseCDSect(ctxt); 6606 } 6607 6608 /* 6609 * Third case : a comment 6610 */ 6611 else if ((*cur == '<') && (NXT(1) == '!') && 6612 (NXT(2) == '-') && (NXT(3) == '-')) { 6613 xmlParseComment(ctxt); 6614 ctxt->instate = XML_PARSER_CONTENT; 6615 } 6616 6617 /* 6618 * Fourth case : a sub-element. 6619 */ 6620 else if (*cur == '<') { 6621 xmlParseElement(ctxt); 6622 } 6623 6624 /* 6625 * Fifth case : a reference. If if has not been resolved, 6626 * parsing returns it's Name, create the node 6627 */ 6628 6629 else if (*cur == '&') { 6630 xmlParseReference(ctxt); 6631 } 6632 6633 /* 6634 * Last case, text. Note that References are handled directly. 6635 */ 6636 else { 6637 xmlParseCharData(ctxt, 0); 6638 } 6639 6640 GROW; 6641 /* 6642 * Pop-up of finished entities. 6643 */ 6644 while ((RAW == 0) && (ctxt->inputNr > 1)) 6645 xmlPopInput(ctxt); 6646 SHRINK; 6647 6648 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6649 (tok == ctxt->token)) { 6650 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6652 ctxt->sax->error(ctxt->userData, 6653 "detected an error in element content\n"); 6654 ctxt->wellFormed = 0; 6655 ctxt->disableSAX = 1; 6656 ctxt->instate = XML_PARSER_EOF; 6657 break; 6658 } 6659 } 6660} 6661 6662/** 6663 * xmlParseElement: 6664 * @ctxt: an XML parser context 6665 * 6666 * parse an XML element, this is highly recursive 6667 * 6668 * [39] element ::= EmptyElemTag | STag content ETag 6669 * 6670 * [ WFC: Element Type Match ] 6671 * The Name in an element's end-tag must match the element type in the 6672 * start-tag. 6673 * 6674 * [ VC: Element Valid ] 6675 * An element is valid if there is a declaration matching elementdecl 6676 * where the Name matches the element type and one of the following holds: 6677 * - The declaration matches EMPTY and the element has no content. 6678 * - The declaration matches children and the sequence of child elements 6679 * belongs to the language generated by the regular expression in the 6680 * content model, with optional white space (characters matching the 6681 * nonterminal S) between each pair of child elements. 6682 * - The declaration matches Mixed and the content consists of character 6683 * data and child elements whose types match names in the content model. 6684 * - The declaration matches ANY, and the types of any child elements have 6685 * been declared. 6686 */ 6687 6688void 6689xmlParseElement(xmlParserCtxtPtr ctxt) { 6690 const xmlChar *openTag = CUR_PTR; 6691 xmlChar *name; 6692 xmlChar *oldname; 6693 xmlParserNodeInfo node_info; 6694 xmlNodePtr ret; 6695 6696 /* Capture start position */ 6697 if (ctxt->record_info) { 6698 node_info.begin_pos = ctxt->input->consumed + 6699 (CUR_PTR - ctxt->input->base); 6700 node_info.begin_line = ctxt->input->line; 6701 } 6702 6703 if (ctxt->spaceNr == 0) 6704 spacePush(ctxt, -1); 6705 else 6706 spacePush(ctxt, *ctxt->space); 6707 6708 name = xmlParseStartTag(ctxt); 6709 if (name == NULL) { 6710 spacePop(ctxt); 6711 return; 6712 } 6713 namePush(ctxt, name); 6714 ret = ctxt->node; 6715 6716 /* 6717 * [ VC: Root Element Type ] 6718 * The Name in the document type declaration must match the element 6719 * type of the root element. 6720 */ 6721 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6722 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6723 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6724 6725 /* 6726 * Check for an Empty Element. 6727 */ 6728 if ((RAW == '/') && (NXT(1) == '>')) { 6729 SKIP(2); 6730 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6731 (!ctxt->disableSAX)) 6732 ctxt->sax->endElement(ctxt->userData, name); 6733 oldname = namePop(ctxt); 6734 spacePop(ctxt); 6735 if (oldname != NULL) { 6736#ifdef DEBUG_STACK 6737 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6738#endif 6739 xmlFree(oldname); 6740 } 6741 if ( ret != NULL && ctxt->record_info ) { 6742 node_info.end_pos = ctxt->input->consumed + 6743 (CUR_PTR - ctxt->input->base); 6744 node_info.end_line = ctxt->input->line; 6745 node_info.node = ret; 6746 xmlParserAddNodeInfo(ctxt, &node_info); 6747 } 6748 return; 6749 } 6750 if (RAW == '>') { 6751 NEXT1; 6752 } else { 6753 ctxt->errNo = XML_ERR_GT_REQUIRED; 6754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6755 ctxt->sax->error(ctxt->userData, 6756 "Couldn't find end of Start Tag\n%.30s\n", 6757 openTag); 6758 ctxt->wellFormed = 0; 6759 ctxt->disableSAX = 1; 6760 6761 /* 6762 * end of parsing of this node. 6763 */ 6764 nodePop(ctxt); 6765 oldname = namePop(ctxt); 6766 spacePop(ctxt); 6767 if (oldname != NULL) { 6768#ifdef DEBUG_STACK 6769 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6770#endif 6771 xmlFree(oldname); 6772 } 6773 6774 /* 6775 * Capture end position and add node 6776 */ 6777 if ( ret != NULL && ctxt->record_info ) { 6778 node_info.end_pos = ctxt->input->consumed + 6779 (CUR_PTR - ctxt->input->base); 6780 node_info.end_line = ctxt->input->line; 6781 node_info.node = ret; 6782 xmlParserAddNodeInfo(ctxt, &node_info); 6783 } 6784 return; 6785 } 6786 6787 /* 6788 * Parse the content of the element: 6789 */ 6790 xmlParseContent(ctxt); 6791 if (!IS_CHAR(RAW)) { 6792 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6794 ctxt->sax->error(ctxt->userData, 6795 "Premature end of data in tag %.30s\n", openTag); 6796 ctxt->wellFormed = 0; 6797 ctxt->disableSAX = 1; 6798 6799 /* 6800 * end of parsing of this node. 6801 */ 6802 nodePop(ctxt); 6803 oldname = namePop(ctxt); 6804 spacePop(ctxt); 6805 if (oldname != NULL) { 6806#ifdef DEBUG_STACK 6807 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6808#endif 6809 xmlFree(oldname); 6810 } 6811 return; 6812 } 6813 6814 /* 6815 * parse the end of tag: '</' should be here. 6816 */ 6817 xmlParseEndTag(ctxt); 6818 6819 /* 6820 * Capture end position and add node 6821 */ 6822 if ( ret != NULL && ctxt->record_info ) { 6823 node_info.end_pos = ctxt->input->consumed + 6824 (CUR_PTR - ctxt->input->base); 6825 node_info.end_line = ctxt->input->line; 6826 node_info.node = ret; 6827 xmlParserAddNodeInfo(ctxt, &node_info); 6828 } 6829} 6830 6831/** 6832 * xmlParseVersionNum: 6833 * @ctxt: an XML parser context 6834 * 6835 * parse the XML version value. 6836 * 6837 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6838 * 6839 * Returns the string giving the XML version number, or NULL 6840 */ 6841xmlChar * 6842xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6843 xmlChar *buf = NULL; 6844 int len = 0; 6845 int size = 10; 6846 xmlChar cur; 6847 6848 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6849 if (buf == NULL) { 6850 xmlGenericError(xmlGenericErrorContext, 6851 "malloc of %d byte failed\n", size); 6852 return(NULL); 6853 } 6854 cur = CUR; 6855 while (((cur >= 'a') && (cur <= 'z')) || 6856 ((cur >= 'A') && (cur <= 'Z')) || 6857 ((cur >= '0') && (cur <= '9')) || 6858 (cur == '_') || (cur == '.') || 6859 (cur == ':') || (cur == '-')) { 6860 if (len + 1 >= size) { 6861 size *= 2; 6862 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6863 if (buf == NULL) { 6864 xmlGenericError(xmlGenericErrorContext, 6865 "realloc of %d byte failed\n", size); 6866 return(NULL); 6867 } 6868 } 6869 buf[len++] = cur; 6870 NEXT; 6871 cur=CUR; 6872 } 6873 buf[len] = 0; 6874 return(buf); 6875} 6876 6877/** 6878 * xmlParseVersionInfo: 6879 * @ctxt: an XML parser context 6880 * 6881 * parse the XML version. 6882 * 6883 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6884 * 6885 * [25] Eq ::= S? '=' S? 6886 * 6887 * Returns the version string, e.g. "1.0" 6888 */ 6889 6890xmlChar * 6891xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6892 xmlChar *version = NULL; 6893 const xmlChar *q; 6894 6895 if ((RAW == 'v') && (NXT(1) == 'e') && 6896 (NXT(2) == 'r') && (NXT(3) == 's') && 6897 (NXT(4) == 'i') && (NXT(5) == 'o') && 6898 (NXT(6) == 'n')) { 6899 SKIP(7); 6900 SKIP_BLANKS; 6901 if (RAW != '=') { 6902 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6904 ctxt->sax->error(ctxt->userData, 6905 "xmlParseVersionInfo : expected '='\n"); 6906 ctxt->wellFormed = 0; 6907 ctxt->disableSAX = 1; 6908 return(NULL); 6909 } 6910 NEXT; 6911 SKIP_BLANKS; 6912 if (RAW == '"') { 6913 NEXT; 6914 q = CUR_PTR; 6915 version = xmlParseVersionNum(ctxt); 6916 if (RAW != '"') { 6917 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6919 ctxt->sax->error(ctxt->userData, 6920 "String not closed\n%.50s\n", q); 6921 ctxt->wellFormed = 0; 6922 ctxt->disableSAX = 1; 6923 } else 6924 NEXT; 6925 } else if (RAW == '\''){ 6926 NEXT; 6927 q = CUR_PTR; 6928 version = xmlParseVersionNum(ctxt); 6929 if (RAW != '\'') { 6930 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6932 ctxt->sax->error(ctxt->userData, 6933 "String not closed\n%.50s\n", q); 6934 ctxt->wellFormed = 0; 6935 ctxt->disableSAX = 1; 6936 } else 6937 NEXT; 6938 } else { 6939 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6941 ctxt->sax->error(ctxt->userData, 6942 "xmlParseVersionInfo : expected ' or \"\n"); 6943 ctxt->wellFormed = 0; 6944 ctxt->disableSAX = 1; 6945 } 6946 } 6947 return(version); 6948} 6949 6950/** 6951 * xmlParseEncName: 6952 * @ctxt: an XML parser context 6953 * 6954 * parse the XML encoding name 6955 * 6956 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6957 * 6958 * Returns the encoding name value or NULL 6959 */ 6960xmlChar * 6961xmlParseEncName(xmlParserCtxtPtr ctxt) { 6962 xmlChar *buf = NULL; 6963 int len = 0; 6964 int size = 10; 6965 xmlChar cur; 6966 6967 cur = CUR; 6968 if (((cur >= 'a') && (cur <= 'z')) || 6969 ((cur >= 'A') && (cur <= 'Z'))) { 6970 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6971 if (buf == NULL) { 6972 xmlGenericError(xmlGenericErrorContext, 6973 "malloc of %d byte failed\n", size); 6974 return(NULL); 6975 } 6976 6977 buf[len++] = cur; 6978 NEXT; 6979 cur = CUR; 6980 while (((cur >= 'a') && (cur <= 'z')) || 6981 ((cur >= 'A') && (cur <= 'Z')) || 6982 ((cur >= '0') && (cur <= '9')) || 6983 (cur == '.') || (cur == '_') || 6984 (cur == '-')) { 6985 if (len + 1 >= size) { 6986 size *= 2; 6987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6988 if (buf == NULL) { 6989 xmlGenericError(xmlGenericErrorContext, 6990 "realloc of %d byte failed\n", size); 6991 return(NULL); 6992 } 6993 } 6994 buf[len++] = cur; 6995 NEXT; 6996 cur = CUR; 6997 if (cur == 0) { 6998 SHRINK; 6999 GROW; 7000 cur = CUR; 7001 } 7002 } 7003 buf[len] = 0; 7004 } else { 7005 ctxt->errNo = XML_ERR_ENCODING_NAME; 7006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7007 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 7008 ctxt->wellFormed = 0; 7009 ctxt->disableSAX = 1; 7010 } 7011 return(buf); 7012} 7013 7014/** 7015 * xmlParseEncodingDecl: 7016 * @ctxt: an XML parser context 7017 * 7018 * parse the XML encoding declaration 7019 * 7020 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 7021 * 7022 * this setups the conversion filters. 7023 * 7024 * Returns the encoding value or NULL 7025 */ 7026 7027xmlChar * 7028xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 7029 xmlChar *encoding = NULL; 7030 const xmlChar *q; 7031 7032 SKIP_BLANKS; 7033 if ((RAW == 'e') && (NXT(1) == 'n') && 7034 (NXT(2) == 'c') && (NXT(3) == 'o') && 7035 (NXT(4) == 'd') && (NXT(5) == 'i') && 7036 (NXT(6) == 'n') && (NXT(7) == 'g')) { 7037 SKIP(8); 7038 SKIP_BLANKS; 7039 if (RAW != '=') { 7040 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7042 ctxt->sax->error(ctxt->userData, 7043 "xmlParseEncodingDecl : expected '='\n"); 7044 ctxt->wellFormed = 0; 7045 ctxt->disableSAX = 1; 7046 return(NULL); 7047 } 7048 NEXT; 7049 SKIP_BLANKS; 7050 if (RAW == '"') { 7051 NEXT; 7052 q = CUR_PTR; 7053 encoding = xmlParseEncName(ctxt); 7054 if (RAW != '"') { 7055 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7057 ctxt->sax->error(ctxt->userData, 7058 "String not closed\n%.50s\n", q); 7059 ctxt->wellFormed = 0; 7060 ctxt->disableSAX = 1; 7061 } else 7062 NEXT; 7063 } else if (RAW == '\''){ 7064 NEXT; 7065 q = CUR_PTR; 7066 encoding = xmlParseEncName(ctxt); 7067 if (RAW != '\'') { 7068 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7070 ctxt->sax->error(ctxt->userData, 7071 "String not closed\n%.50s\n", q); 7072 ctxt->wellFormed = 0; 7073 ctxt->disableSAX = 1; 7074 } else 7075 NEXT; 7076 } else if (RAW == '"'){ 7077 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7079 ctxt->sax->error(ctxt->userData, 7080 "xmlParseEncodingDecl : expected ' or \"\n"); 7081 ctxt->wellFormed = 0; 7082 ctxt->disableSAX = 1; 7083 } 7084 if (encoding != NULL) { 7085 xmlCharEncoding enc; 7086 xmlCharEncodingHandlerPtr handler; 7087 7088 if (ctxt->input->encoding != NULL) 7089 xmlFree((xmlChar *) ctxt->input->encoding); 7090 ctxt->input->encoding = encoding; 7091 7092 enc = xmlParseCharEncoding((const char *) encoding); 7093 /* 7094 * registered set of known encodings 7095 */ 7096 if (enc != XML_CHAR_ENCODING_ERROR) { 7097 xmlSwitchEncoding(ctxt, enc); 7098 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7099 xmlFree(encoding); 7100 return(NULL); 7101 } 7102 } else { 7103 /* 7104 * fallback for unknown encodings 7105 */ 7106 handler = xmlFindCharEncodingHandler((const char *) encoding); 7107 if (handler != NULL) { 7108 xmlSwitchToEncoding(ctxt, handler); 7109 } else { 7110 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 7111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7112 ctxt->sax->error(ctxt->userData, 7113 "Unsupported encoding %s\n", encoding); 7114 return(NULL); 7115 } 7116 } 7117 } 7118 } 7119 return(encoding); 7120} 7121 7122/** 7123 * xmlParseSDDecl: 7124 * @ctxt: an XML parser context 7125 * 7126 * parse the XML standalone declaration 7127 * 7128 * [32] SDDecl ::= S 'standalone' Eq 7129 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 7130 * 7131 * [ VC: Standalone Document Declaration ] 7132 * TODO The standalone document declaration must have the value "no" 7133 * if any external markup declarations contain declarations of: 7134 * - attributes with default values, if elements to which these 7135 * attributes apply appear in the document without specifications 7136 * of values for these attributes, or 7137 * - entities (other than amp, lt, gt, apos, quot), if references 7138 * to those entities appear in the document, or 7139 * - attributes with values subject to normalization, where the 7140 * attribute appears in the document with a value which will change 7141 * as a result of normalization, or 7142 * - element types with element content, if white space occurs directly 7143 * within any instance of those types. 7144 * 7145 * Returns 1 if standalone, 0 otherwise 7146 */ 7147 7148int 7149xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 7150 int standalone = -1; 7151 7152 SKIP_BLANKS; 7153 if ((RAW == 's') && (NXT(1) == 't') && 7154 (NXT(2) == 'a') && (NXT(3) == 'n') && 7155 (NXT(4) == 'd') && (NXT(5) == 'a') && 7156 (NXT(6) == 'l') && (NXT(7) == 'o') && 7157 (NXT(8) == 'n') && (NXT(9) == 'e')) { 7158 SKIP(10); 7159 SKIP_BLANKS; 7160 if (RAW != '=') { 7161 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 7162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7163 ctxt->sax->error(ctxt->userData, 7164 "XML standalone declaration : expected '='\n"); 7165 ctxt->wellFormed = 0; 7166 ctxt->disableSAX = 1; 7167 return(standalone); 7168 } 7169 NEXT; 7170 SKIP_BLANKS; 7171 if (RAW == '\''){ 7172 NEXT; 7173 if ((RAW == 'n') && (NXT(1) == 'o')) { 7174 standalone = 0; 7175 SKIP(2); 7176 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7177 (NXT(2) == 's')) { 7178 standalone = 1; 7179 SKIP(3); 7180 } else { 7181 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7183 ctxt->sax->error(ctxt->userData, 7184 "standalone accepts only 'yes' or 'no'\n"); 7185 ctxt->wellFormed = 0; 7186 ctxt->disableSAX = 1; 7187 } 7188 if (RAW != '\'') { 7189 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7191 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7192 ctxt->wellFormed = 0; 7193 ctxt->disableSAX = 1; 7194 } else 7195 NEXT; 7196 } else if (RAW == '"'){ 7197 NEXT; 7198 if ((RAW == 'n') && (NXT(1) == 'o')) { 7199 standalone = 0; 7200 SKIP(2); 7201 } else if ((RAW == 'y') && (NXT(1) == 'e') && 7202 (NXT(2) == 's')) { 7203 standalone = 1; 7204 SKIP(3); 7205 } else { 7206 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7208 ctxt->sax->error(ctxt->userData, 7209 "standalone accepts only 'yes' or 'no'\n"); 7210 ctxt->wellFormed = 0; 7211 ctxt->disableSAX = 1; 7212 } 7213 if (RAW != '"') { 7214 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 7215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7216 ctxt->sax->error(ctxt->userData, "String not closed\n"); 7217 ctxt->wellFormed = 0; 7218 ctxt->disableSAX = 1; 7219 } else 7220 NEXT; 7221 } else { 7222 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 7223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7224 ctxt->sax->error(ctxt->userData, 7225 "Standalone value not found\n"); 7226 ctxt->wellFormed = 0; 7227 ctxt->disableSAX = 1; 7228 } 7229 } 7230 return(standalone); 7231} 7232 7233/** 7234 * xmlParseXMLDecl: 7235 * @ctxt: an XML parser context 7236 * 7237 * parse an XML declaration header 7238 * 7239 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 7240 */ 7241 7242void 7243xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 7244 xmlChar *version; 7245 7246 /* 7247 * We know that '<?xml' is here. 7248 */ 7249 SKIP(5); 7250 7251 if (!IS_BLANK(RAW)) { 7252 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7254 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 7255 ctxt->wellFormed = 0; 7256 ctxt->disableSAX = 1; 7257 } 7258 SKIP_BLANKS; 7259 7260 /* 7261 * We should have the VersionInfo here. 7262 */ 7263 version = xmlParseVersionInfo(ctxt); 7264 if (version == NULL) 7265 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7266 ctxt->version = xmlStrdup(version); 7267 xmlFree(version); 7268 7269 /* 7270 * We may have the encoding declaration 7271 */ 7272 if (!IS_BLANK(RAW)) { 7273 if ((RAW == '?') && (NXT(1) == '>')) { 7274 SKIP(2); 7275 return; 7276 } 7277 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7279 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7280 ctxt->wellFormed = 0; 7281 ctxt->disableSAX = 1; 7282 } 7283 xmlParseEncodingDecl(ctxt); 7284 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7285 /* 7286 * The XML REC instructs us to stop parsing right here 7287 */ 7288 return; 7289 } 7290 7291 /* 7292 * We may have the standalone status. 7293 */ 7294 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7295 if ((RAW == '?') && (NXT(1) == '>')) { 7296 SKIP(2); 7297 return; 7298 } 7299 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7301 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7302 ctxt->wellFormed = 0; 7303 ctxt->disableSAX = 1; 7304 } 7305 SKIP_BLANKS; 7306 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7307 7308 SKIP_BLANKS; 7309 if ((RAW == '?') && (NXT(1) == '>')) { 7310 SKIP(2); 7311 } else if (RAW == '>') { 7312 /* Deprecated old WD ... */ 7313 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7315 ctxt->sax->error(ctxt->userData, 7316 "XML declaration must end-up with '?>'\n"); 7317 ctxt->wellFormed = 0; 7318 ctxt->disableSAX = 1; 7319 NEXT; 7320 } else { 7321 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7323 ctxt->sax->error(ctxt->userData, 7324 "parsing XML declaration: '?>' expected\n"); 7325 ctxt->wellFormed = 0; 7326 ctxt->disableSAX = 1; 7327 MOVETO_ENDTAG(CUR_PTR); 7328 NEXT; 7329 } 7330} 7331 7332/** 7333 * xmlParseMisc: 7334 * @ctxt: an XML parser context 7335 * 7336 * parse an XML Misc* optionnal field. 7337 * 7338 * [27] Misc ::= Comment | PI | S 7339 */ 7340 7341void 7342xmlParseMisc(xmlParserCtxtPtr ctxt) { 7343 while (((RAW == '<') && (NXT(1) == '?')) || 7344 ((RAW == '<') && (NXT(1) == '!') && 7345 (NXT(2) == '-') && (NXT(3) == '-')) || 7346 IS_BLANK(CUR)) { 7347 if ((RAW == '<') && (NXT(1) == '?')) { 7348 xmlParsePI(ctxt); 7349 } else if (IS_BLANK(CUR)) { 7350 NEXT; 7351 } else 7352 xmlParseComment(ctxt); 7353 } 7354} 7355 7356/** 7357 * xmlParseDocument: 7358 * @ctxt: an XML parser context 7359 * 7360 * parse an XML document (and build a tree if using the standard SAX 7361 * interface). 7362 * 7363 * [1] document ::= prolog element Misc* 7364 * 7365 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7366 * 7367 * Returns 0, -1 in case of error. the parser context is augmented 7368 * as a result of the parsing. 7369 */ 7370 7371int 7372xmlParseDocument(xmlParserCtxtPtr ctxt) { 7373 xmlChar start[4]; 7374 xmlCharEncoding enc; 7375 7376 xmlInitParser(); 7377 7378 GROW; 7379 7380 /* 7381 * SAX: beginning of the document processing. 7382 */ 7383 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7384 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7385 7386 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { 7387 /* 7388 * Get the 4 first bytes and decode the charset 7389 * if enc != XML_CHAR_ENCODING_NONE 7390 * plug some encoding conversion routines. 7391 */ 7392 start[0] = RAW; 7393 start[1] = NXT(1); 7394 start[2] = NXT(2); 7395 start[3] = NXT(3); 7396 enc = xmlDetectCharEncoding(start, 4); 7397 if (enc != XML_CHAR_ENCODING_NONE) { 7398 xmlSwitchEncoding(ctxt, enc); 7399 } 7400 } 7401 7402 7403 if (CUR == 0) { 7404 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7406 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7407 ctxt->wellFormed = 0; 7408 ctxt->disableSAX = 1; 7409 } 7410 7411 /* 7412 * Check for the XMLDecl in the Prolog. 7413 */ 7414 GROW; 7415 if ((RAW == '<') && (NXT(1) == '?') && 7416 (NXT(2) == 'x') && (NXT(3) == 'm') && 7417 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7418 7419 /* 7420 * Note that we will switch encoding on the fly. 7421 */ 7422 xmlParseXMLDecl(ctxt); 7423 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7424 /* 7425 * The XML REC instructs us to stop parsing right here 7426 */ 7427 return(-1); 7428 } 7429 ctxt->standalone = ctxt->input->standalone; 7430 SKIP_BLANKS; 7431 } else { 7432 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7433 } 7434 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7435 ctxt->sax->startDocument(ctxt->userData); 7436 7437 /* 7438 * The Misc part of the Prolog 7439 */ 7440 GROW; 7441 xmlParseMisc(ctxt); 7442 7443 /* 7444 * Then possibly doc type declaration(s) and more Misc 7445 * (doctypedecl Misc*)? 7446 */ 7447 GROW; 7448 if ((RAW == '<') && (NXT(1) == '!') && 7449 (NXT(2) == 'D') && (NXT(3) == 'O') && 7450 (NXT(4) == 'C') && (NXT(5) == 'T') && 7451 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7452 (NXT(8) == 'E')) { 7453 7454 ctxt->inSubset = 1; 7455 xmlParseDocTypeDecl(ctxt); 7456 if (RAW == '[') { 7457 ctxt->instate = XML_PARSER_DTD; 7458 xmlParseInternalSubset(ctxt); 7459 } 7460 7461 /* 7462 * Create and update the external subset. 7463 */ 7464 ctxt->inSubset = 2; 7465 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7466 (!ctxt->disableSAX)) 7467 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7468 ctxt->extSubSystem, ctxt->extSubURI); 7469 ctxt->inSubset = 0; 7470 7471 7472 ctxt->instate = XML_PARSER_PROLOG; 7473 xmlParseMisc(ctxt); 7474 } 7475 7476 /* 7477 * Time to start parsing the tree itself 7478 */ 7479 GROW; 7480 if (RAW != '<') { 7481 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7483 ctxt->sax->error(ctxt->userData, 7484 "Start tag expected, '<' not found\n"); 7485 ctxt->wellFormed = 0; 7486 ctxt->disableSAX = 1; 7487 ctxt->instate = XML_PARSER_EOF; 7488 } else { 7489 ctxt->instate = XML_PARSER_CONTENT; 7490 xmlParseElement(ctxt); 7491 ctxt->instate = XML_PARSER_EPILOG; 7492 7493 7494 /* 7495 * The Misc part at the end 7496 */ 7497 xmlParseMisc(ctxt); 7498 7499 if (RAW != 0) { 7500 ctxt->errNo = XML_ERR_DOCUMENT_END; 7501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7502 ctxt->sax->error(ctxt->userData, 7503 "Extra content at the end of the document\n"); 7504 ctxt->wellFormed = 0; 7505 ctxt->disableSAX = 1; 7506 } 7507 ctxt->instate = XML_PARSER_EOF; 7508 } 7509 7510 /* 7511 * SAX: end of the document processing. 7512 */ 7513 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7514 (!ctxt->disableSAX)) 7515 ctxt->sax->endDocument(ctxt->userData); 7516 7517 if (! ctxt->wellFormed) return(-1); 7518 return(0); 7519} 7520 7521/** 7522 * xmlParseExtParsedEnt: 7523 * @ctxt: an XML parser context 7524 * 7525 * parse a genreral parsed entity 7526 * An external general parsed entity is well-formed if it matches the 7527 * production labeled extParsedEnt. 7528 * 7529 * [78] extParsedEnt ::= TextDecl? content 7530 * 7531 * Returns 0, -1 in case of error. the parser context is augmented 7532 * as a result of the parsing. 7533 */ 7534 7535int 7536xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7537 xmlChar start[4]; 7538 xmlCharEncoding enc; 7539 7540 xmlDefaultSAXHandlerInit(); 7541 7542 GROW; 7543 7544 /* 7545 * SAX: beginning of the document processing. 7546 */ 7547 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7548 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7549 7550 /* 7551 * Get the 4 first bytes and decode the charset 7552 * if enc != XML_CHAR_ENCODING_NONE 7553 * plug some encoding conversion routines. 7554 */ 7555 start[0] = RAW; 7556 start[1] = NXT(1); 7557 start[2] = NXT(2); 7558 start[3] = NXT(3); 7559 enc = xmlDetectCharEncoding(start, 4); 7560 if (enc != XML_CHAR_ENCODING_NONE) { 7561 xmlSwitchEncoding(ctxt, enc); 7562 } 7563 7564 7565 if (CUR == 0) { 7566 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7568 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7569 ctxt->wellFormed = 0; 7570 ctxt->disableSAX = 1; 7571 } 7572 7573 /* 7574 * Check for the XMLDecl in the Prolog. 7575 */ 7576 GROW; 7577 if ((RAW == '<') && (NXT(1) == '?') && 7578 (NXT(2) == 'x') && (NXT(3) == 'm') && 7579 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7580 7581 /* 7582 * Note that we will switch encoding on the fly. 7583 */ 7584 xmlParseXMLDecl(ctxt); 7585 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7586 /* 7587 * The XML REC instructs us to stop parsing right here 7588 */ 7589 return(-1); 7590 } 7591 SKIP_BLANKS; 7592 } else { 7593 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7594 } 7595 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7596 ctxt->sax->startDocument(ctxt->userData); 7597 7598 /* 7599 * Doing validity checking on chunk doesn't make sense 7600 */ 7601 ctxt->instate = XML_PARSER_CONTENT; 7602 ctxt->validate = 0; 7603 ctxt->loadsubset = 0; 7604 ctxt->depth = 0; 7605 7606 xmlParseContent(ctxt); 7607 7608 if ((RAW == '<') && (NXT(1) == '/')) { 7609 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7611 ctxt->sax->error(ctxt->userData, 7612 "chunk is not well balanced\n"); 7613 ctxt->wellFormed = 0; 7614 ctxt->disableSAX = 1; 7615 } else if (RAW != 0) { 7616 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7618 ctxt->sax->error(ctxt->userData, 7619 "extra content at the end of well balanced chunk\n"); 7620 ctxt->wellFormed = 0; 7621 ctxt->disableSAX = 1; 7622 } 7623 7624 /* 7625 * SAX: end of the document processing. 7626 */ 7627 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7628 (!ctxt->disableSAX)) 7629 ctxt->sax->endDocument(ctxt->userData); 7630 7631 if (! ctxt->wellFormed) return(-1); 7632 return(0); 7633} 7634 7635/************************************************************************ 7636 * * 7637 * Progressive parsing interfaces * 7638 * * 7639 ************************************************************************/ 7640 7641/** 7642 * xmlParseLookupSequence: 7643 * @ctxt: an XML parser context 7644 * @first: the first char to lookup 7645 * @next: the next char to lookup or zero 7646 * @third: the next char to lookup or zero 7647 * 7648 * Try to find if a sequence (first, next, third) or just (first next) or 7649 * (first) is available in the input stream. 7650 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7651 * to avoid rescanning sequences of bytes, it DOES change the state of the 7652 * parser, do not use liberally. 7653 * 7654 * Returns the index to the current parsing point if the full sequence 7655 * is available, -1 otherwise. 7656 */ 7657static int 7658xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7659 xmlChar next, xmlChar third) { 7660 int base, len; 7661 xmlParserInputPtr in; 7662 const xmlChar *buf; 7663 7664 in = ctxt->input; 7665 if (in == NULL) return(-1); 7666 base = in->cur - in->base; 7667 if (base < 0) return(-1); 7668 if (ctxt->checkIndex > base) 7669 base = ctxt->checkIndex; 7670 if (in->buf == NULL) { 7671 buf = in->base; 7672 len = in->length; 7673 } else { 7674 buf = in->buf->buffer->content; 7675 len = in->buf->buffer->use; 7676 } 7677 /* take into account the sequence length */ 7678 if (third) len -= 2; 7679 else if (next) len --; 7680 for (;base < len;base++) { 7681 if (buf[base] == first) { 7682 if (third != 0) { 7683 if ((buf[base + 1] != next) || 7684 (buf[base + 2] != third)) continue; 7685 } else if (next != 0) { 7686 if (buf[base + 1] != next) continue; 7687 } 7688 ctxt->checkIndex = 0; 7689#ifdef DEBUG_PUSH 7690 if (next == 0) 7691 xmlGenericError(xmlGenericErrorContext, 7692 "PP: lookup '%c' found at %d\n", 7693 first, base); 7694 else if (third == 0) 7695 xmlGenericError(xmlGenericErrorContext, 7696 "PP: lookup '%c%c' found at %d\n", 7697 first, next, base); 7698 else 7699 xmlGenericError(xmlGenericErrorContext, 7700 "PP: lookup '%c%c%c' found at %d\n", 7701 first, next, third, base); 7702#endif 7703 return(base - (in->cur - in->base)); 7704 } 7705 } 7706 ctxt->checkIndex = base; 7707#ifdef DEBUG_PUSH 7708 if (next == 0) 7709 xmlGenericError(xmlGenericErrorContext, 7710 "PP: lookup '%c' failed\n", first); 7711 else if (third == 0) 7712 xmlGenericError(xmlGenericErrorContext, 7713 "PP: lookup '%c%c' failed\n", first, next); 7714 else 7715 xmlGenericError(xmlGenericErrorContext, 7716 "PP: lookup '%c%c%c' failed\n", first, next, third); 7717#endif 7718 return(-1); 7719} 7720 7721/** 7722 * xmlParseTryOrFinish: 7723 * @ctxt: an XML parser context 7724 * @terminate: last chunk indicator 7725 * 7726 * Try to progress on parsing 7727 * 7728 * Returns zero if no parsing was possible 7729 */ 7730static int 7731xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7732 int ret = 0; 7733 int avail; 7734 xmlChar cur, next; 7735 7736#ifdef DEBUG_PUSH 7737 switch (ctxt->instate) { 7738 case XML_PARSER_EOF: 7739 xmlGenericError(xmlGenericErrorContext, 7740 "PP: try EOF\n"); break; 7741 case XML_PARSER_START: 7742 xmlGenericError(xmlGenericErrorContext, 7743 "PP: try START\n"); break; 7744 case XML_PARSER_MISC: 7745 xmlGenericError(xmlGenericErrorContext, 7746 "PP: try MISC\n");break; 7747 case XML_PARSER_COMMENT: 7748 xmlGenericError(xmlGenericErrorContext, 7749 "PP: try COMMENT\n");break; 7750 case XML_PARSER_PROLOG: 7751 xmlGenericError(xmlGenericErrorContext, 7752 "PP: try PROLOG\n");break; 7753 case XML_PARSER_START_TAG: 7754 xmlGenericError(xmlGenericErrorContext, 7755 "PP: try START_TAG\n");break; 7756 case XML_PARSER_CONTENT: 7757 xmlGenericError(xmlGenericErrorContext, 7758 "PP: try CONTENT\n");break; 7759 case XML_PARSER_CDATA_SECTION: 7760 xmlGenericError(xmlGenericErrorContext, 7761 "PP: try CDATA_SECTION\n");break; 7762 case XML_PARSER_END_TAG: 7763 xmlGenericError(xmlGenericErrorContext, 7764 "PP: try END_TAG\n");break; 7765 case XML_PARSER_ENTITY_DECL: 7766 xmlGenericError(xmlGenericErrorContext, 7767 "PP: try ENTITY_DECL\n");break; 7768 case XML_PARSER_ENTITY_VALUE: 7769 xmlGenericError(xmlGenericErrorContext, 7770 "PP: try ENTITY_VALUE\n");break; 7771 case XML_PARSER_ATTRIBUTE_VALUE: 7772 xmlGenericError(xmlGenericErrorContext, 7773 "PP: try ATTRIBUTE_VALUE\n");break; 7774 case XML_PARSER_DTD: 7775 xmlGenericError(xmlGenericErrorContext, 7776 "PP: try DTD\n");break; 7777 case XML_PARSER_EPILOG: 7778 xmlGenericError(xmlGenericErrorContext, 7779 "PP: try EPILOG\n");break; 7780 case XML_PARSER_PI: 7781 xmlGenericError(xmlGenericErrorContext, 7782 "PP: try PI\n");break; 7783 case XML_PARSER_IGNORE: 7784 xmlGenericError(xmlGenericErrorContext, 7785 "PP: try IGNORE\n");break; 7786 } 7787#endif 7788 7789 while (1) { 7790 /* 7791 * Pop-up of finished entities. 7792 */ 7793 while ((RAW == 0) && (ctxt->inputNr > 1)) 7794 xmlPopInput(ctxt); 7795 7796 if (ctxt->input ==NULL) break; 7797 if (ctxt->input->buf == NULL) 7798 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7799 else 7800 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7801 if (avail < 1) 7802 goto done; 7803 switch (ctxt->instate) { 7804 case XML_PARSER_EOF: 7805 /* 7806 * Document parsing is done ! 7807 */ 7808 goto done; 7809 case XML_PARSER_START: 7810 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 7811 xmlChar start[4]; 7812 xmlCharEncoding enc; 7813 7814 /* 7815 * Very first chars read from the document flow. 7816 */ 7817 if (avail < 4) 7818 goto done; 7819 7820 /* 7821 * Get the 4 first bytes and decode the charset 7822 * if enc != XML_CHAR_ENCODING_NONE 7823 * plug some encoding conversion routines. 7824 */ 7825 start[0] = RAW; 7826 start[1] = NXT(1); 7827 start[2] = NXT(2); 7828 start[3] = NXT(3); 7829 enc = xmlDetectCharEncoding(start, 4); 7830 if (enc != XML_CHAR_ENCODING_NONE) { 7831 xmlSwitchEncoding(ctxt, enc); 7832 } 7833 break; 7834 } 7835 7836 cur = ctxt->input->cur[0]; 7837 next = ctxt->input->cur[1]; 7838 if (cur == 0) { 7839 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7840 ctxt->sax->setDocumentLocator(ctxt->userData, 7841 &xmlDefaultSAXLocator); 7842 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7844 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7845 ctxt->wellFormed = 0; 7846 ctxt->disableSAX = 1; 7847 ctxt->instate = XML_PARSER_EOF; 7848#ifdef DEBUG_PUSH 7849 xmlGenericError(xmlGenericErrorContext, 7850 "PP: entering EOF\n"); 7851#endif 7852 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7853 ctxt->sax->endDocument(ctxt->userData); 7854 goto done; 7855 } 7856 if ((cur == '<') && (next == '?')) { 7857 /* PI or XML decl */ 7858 if (avail < 5) return(ret); 7859 if ((!terminate) && 7860 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7861 return(ret); 7862 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7863 ctxt->sax->setDocumentLocator(ctxt->userData, 7864 &xmlDefaultSAXLocator); 7865 if ((ctxt->input->cur[2] == 'x') && 7866 (ctxt->input->cur[3] == 'm') && 7867 (ctxt->input->cur[4] == 'l') && 7868 (IS_BLANK(ctxt->input->cur[5]))) { 7869 ret += 5; 7870#ifdef DEBUG_PUSH 7871 xmlGenericError(xmlGenericErrorContext, 7872 "PP: Parsing XML Decl\n"); 7873#endif 7874 xmlParseXMLDecl(ctxt); 7875 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7876 /* 7877 * The XML REC instructs us to stop parsing right 7878 * here 7879 */ 7880 ctxt->instate = XML_PARSER_EOF; 7881 return(0); 7882 } 7883 ctxt->standalone = ctxt->input->standalone; 7884 if ((ctxt->encoding == NULL) && 7885 (ctxt->input->encoding != NULL)) 7886 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 7887 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7888 (!ctxt->disableSAX)) 7889 ctxt->sax->startDocument(ctxt->userData); 7890 ctxt->instate = XML_PARSER_MISC; 7891#ifdef DEBUG_PUSH 7892 xmlGenericError(xmlGenericErrorContext, 7893 "PP: entering MISC\n"); 7894#endif 7895 } else { 7896 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7897 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7898 (!ctxt->disableSAX)) 7899 ctxt->sax->startDocument(ctxt->userData); 7900 ctxt->instate = XML_PARSER_MISC; 7901#ifdef DEBUG_PUSH 7902 xmlGenericError(xmlGenericErrorContext, 7903 "PP: entering MISC\n"); 7904#endif 7905 } 7906 } else { 7907 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7908 ctxt->sax->setDocumentLocator(ctxt->userData, 7909 &xmlDefaultSAXLocator); 7910 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7911 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7912 (!ctxt->disableSAX)) 7913 ctxt->sax->startDocument(ctxt->userData); 7914 ctxt->instate = XML_PARSER_MISC; 7915#ifdef DEBUG_PUSH 7916 xmlGenericError(xmlGenericErrorContext, 7917 "PP: entering MISC\n"); 7918#endif 7919 } 7920 break; 7921 case XML_PARSER_MISC: 7922 SKIP_BLANKS; 7923 if (ctxt->input->buf == NULL) 7924 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7925 else 7926 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7927 if (avail < 2) 7928 goto done; 7929 cur = ctxt->input->cur[0]; 7930 next = ctxt->input->cur[1]; 7931 if ((cur == '<') && (next == '?')) { 7932 if ((!terminate) && 7933 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7934 goto done; 7935#ifdef DEBUG_PUSH 7936 xmlGenericError(xmlGenericErrorContext, 7937 "PP: Parsing PI\n"); 7938#endif 7939 xmlParsePI(ctxt); 7940 } else if ((cur == '<') && (next == '!') && 7941 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7942 if ((!terminate) && 7943 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7944 goto done; 7945#ifdef DEBUG_PUSH 7946 xmlGenericError(xmlGenericErrorContext, 7947 "PP: Parsing Comment\n"); 7948#endif 7949 xmlParseComment(ctxt); 7950 ctxt->instate = XML_PARSER_MISC; 7951 } else if ((cur == '<') && (next == '!') && 7952 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 7953 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 7954 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 7955 (ctxt->input->cur[8] == 'E')) { 7956 if ((!terminate) && 7957 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7958 goto done; 7959#ifdef DEBUG_PUSH 7960 xmlGenericError(xmlGenericErrorContext, 7961 "PP: Parsing internal subset\n"); 7962#endif 7963 ctxt->inSubset = 1; 7964 xmlParseDocTypeDecl(ctxt); 7965 if (RAW == '[') { 7966 ctxt->instate = XML_PARSER_DTD; 7967#ifdef DEBUG_PUSH 7968 xmlGenericError(xmlGenericErrorContext, 7969 "PP: entering DTD\n"); 7970#endif 7971 } else { 7972 /* 7973 * Create and update the external subset. 7974 */ 7975 ctxt->inSubset = 2; 7976 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7977 (ctxt->sax->externalSubset != NULL)) 7978 ctxt->sax->externalSubset(ctxt->userData, 7979 ctxt->intSubName, ctxt->extSubSystem, 7980 ctxt->extSubURI); 7981 ctxt->inSubset = 0; 7982 ctxt->instate = XML_PARSER_PROLOG; 7983#ifdef DEBUG_PUSH 7984 xmlGenericError(xmlGenericErrorContext, 7985 "PP: entering PROLOG\n"); 7986#endif 7987 } 7988 } else if ((cur == '<') && (next == '!') && 7989 (avail < 9)) { 7990 goto done; 7991 } else { 7992 ctxt->instate = XML_PARSER_START_TAG; 7993#ifdef DEBUG_PUSH 7994 xmlGenericError(xmlGenericErrorContext, 7995 "PP: entering START_TAG\n"); 7996#endif 7997 } 7998 break; 7999 case XML_PARSER_IGNORE: 8000 xmlGenericError(xmlGenericErrorContext, 8001 "PP: internal error, state == IGNORE"); 8002 ctxt->instate = XML_PARSER_DTD; 8003#ifdef DEBUG_PUSH 8004 xmlGenericError(xmlGenericErrorContext, 8005 "PP: entering DTD\n"); 8006#endif 8007 break; 8008 case XML_PARSER_PROLOG: 8009 SKIP_BLANKS; 8010 if (ctxt->input->buf == NULL) 8011 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8012 else 8013 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8014 if (avail < 2) 8015 goto done; 8016 cur = ctxt->input->cur[0]; 8017 next = ctxt->input->cur[1]; 8018 if ((cur == '<') && (next == '?')) { 8019 if ((!terminate) && 8020 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8021 goto done; 8022#ifdef DEBUG_PUSH 8023 xmlGenericError(xmlGenericErrorContext, 8024 "PP: Parsing PI\n"); 8025#endif 8026 xmlParsePI(ctxt); 8027 } else if ((cur == '<') && (next == '!') && 8028 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8029 if ((!terminate) && 8030 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8031 goto done; 8032#ifdef DEBUG_PUSH 8033 xmlGenericError(xmlGenericErrorContext, 8034 "PP: Parsing Comment\n"); 8035#endif 8036 xmlParseComment(ctxt); 8037 ctxt->instate = XML_PARSER_PROLOG; 8038 } else if ((cur == '<') && (next == '!') && 8039 (avail < 4)) { 8040 goto done; 8041 } else { 8042 ctxt->instate = XML_PARSER_START_TAG; 8043#ifdef DEBUG_PUSH 8044 xmlGenericError(xmlGenericErrorContext, 8045 "PP: entering START_TAG\n"); 8046#endif 8047 } 8048 break; 8049 case XML_PARSER_EPILOG: 8050 SKIP_BLANKS; 8051 if (ctxt->input->buf == NULL) 8052 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 8053 else 8054 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 8055 if (avail < 2) 8056 goto done; 8057 cur = ctxt->input->cur[0]; 8058 next = ctxt->input->cur[1]; 8059 if ((cur == '<') && (next == '?')) { 8060 if ((!terminate) && 8061 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8062 goto done; 8063#ifdef DEBUG_PUSH 8064 xmlGenericError(xmlGenericErrorContext, 8065 "PP: Parsing PI\n"); 8066#endif 8067 xmlParsePI(ctxt); 8068 ctxt->instate = XML_PARSER_EPILOG; 8069 } else if ((cur == '<') && (next == '!') && 8070 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8071 if ((!terminate) && 8072 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8073 goto done; 8074#ifdef DEBUG_PUSH 8075 xmlGenericError(xmlGenericErrorContext, 8076 "PP: Parsing Comment\n"); 8077#endif 8078 xmlParseComment(ctxt); 8079 ctxt->instate = XML_PARSER_EPILOG; 8080 } else if ((cur == '<') && (next == '!') && 8081 (avail < 4)) { 8082 goto done; 8083 } else { 8084 ctxt->errNo = XML_ERR_DOCUMENT_END; 8085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8086 ctxt->sax->error(ctxt->userData, 8087 "Extra content at the end of the document\n"); 8088 ctxt->wellFormed = 0; 8089 ctxt->disableSAX = 1; 8090 ctxt->instate = XML_PARSER_EOF; 8091#ifdef DEBUG_PUSH 8092 xmlGenericError(xmlGenericErrorContext, 8093 "PP: entering EOF\n"); 8094#endif 8095 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8096 (!ctxt->disableSAX)) 8097 ctxt->sax->endDocument(ctxt->userData); 8098 goto done; 8099 } 8100 break; 8101 case XML_PARSER_START_TAG: { 8102 xmlChar *name, *oldname; 8103 8104 if ((avail < 2) && (ctxt->inputNr == 1)) 8105 goto done; 8106 cur = ctxt->input->cur[0]; 8107 if (cur != '<') { 8108 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 8109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8110 ctxt->sax->error(ctxt->userData, 8111 "Start tag expect, '<' not found\n"); 8112 ctxt->wellFormed = 0; 8113 ctxt->disableSAX = 1; 8114 ctxt->instate = XML_PARSER_EOF; 8115#ifdef DEBUG_PUSH 8116 xmlGenericError(xmlGenericErrorContext, 8117 "PP: entering EOF\n"); 8118#endif 8119 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8120 (!ctxt->disableSAX)) 8121 ctxt->sax->endDocument(ctxt->userData); 8122 goto done; 8123 } 8124 if ((!terminate) && 8125 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8126 goto done; 8127 if (ctxt->spaceNr == 0) 8128 spacePush(ctxt, -1); 8129 else 8130 spacePush(ctxt, *ctxt->space); 8131 name = xmlParseStartTag(ctxt); 8132 if (name == NULL) { 8133 spacePop(ctxt); 8134 ctxt->instate = XML_PARSER_EOF; 8135#ifdef DEBUG_PUSH 8136 xmlGenericError(xmlGenericErrorContext, 8137 "PP: entering EOF\n"); 8138#endif 8139 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8140 (!ctxt->disableSAX)) 8141 ctxt->sax->endDocument(ctxt->userData); 8142 goto done; 8143 } 8144 namePush(ctxt, xmlStrdup(name)); 8145 8146 /* 8147 * [ VC: Root Element Type ] 8148 * The Name in the document type declaration must match 8149 * the element type of the root element. 8150 */ 8151 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8152 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8153 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8154 8155 /* 8156 * Check for an Empty Element. 8157 */ 8158 if ((RAW == '/') && (NXT(1) == '>')) { 8159 SKIP(2); 8160 if ((ctxt->sax != NULL) && 8161 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 8162 ctxt->sax->endElement(ctxt->userData, name); 8163 xmlFree(name); 8164 oldname = namePop(ctxt); 8165 spacePop(ctxt); 8166 if (oldname != NULL) { 8167#ifdef DEBUG_STACK 8168 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8169#endif 8170 xmlFree(oldname); 8171 } 8172 if (ctxt->name == NULL) { 8173 ctxt->instate = XML_PARSER_EPILOG; 8174#ifdef DEBUG_PUSH 8175 xmlGenericError(xmlGenericErrorContext, 8176 "PP: entering EPILOG\n"); 8177#endif 8178 } else { 8179 ctxt->instate = XML_PARSER_CONTENT; 8180#ifdef DEBUG_PUSH 8181 xmlGenericError(xmlGenericErrorContext, 8182 "PP: entering CONTENT\n"); 8183#endif 8184 } 8185 break; 8186 } 8187 if (RAW == '>') { 8188 NEXT; 8189 } else { 8190 ctxt->errNo = XML_ERR_GT_REQUIRED; 8191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8192 ctxt->sax->error(ctxt->userData, 8193 "Couldn't find end of Start Tag %s\n", 8194 name); 8195 ctxt->wellFormed = 0; 8196 ctxt->disableSAX = 1; 8197 8198 /* 8199 * end of parsing of this node. 8200 */ 8201 nodePop(ctxt); 8202 oldname = namePop(ctxt); 8203 spacePop(ctxt); 8204 if (oldname != NULL) { 8205#ifdef DEBUG_STACK 8206 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 8207#endif 8208 xmlFree(oldname); 8209 } 8210 } 8211 xmlFree(name); 8212 ctxt->instate = XML_PARSER_CONTENT; 8213#ifdef DEBUG_PUSH 8214 xmlGenericError(xmlGenericErrorContext, 8215 "PP: entering CONTENT\n"); 8216#endif 8217 break; 8218 } 8219 case XML_PARSER_CONTENT: { 8220 const xmlChar *test; 8221 int cons; 8222 int tok; 8223 8224 /* 8225 * Handle preparsed entities and charRef 8226 */ 8227 if (ctxt->token != 0) { 8228 xmlChar current[2] = { 0 , 0 } ; 8229 8230 current[0] = (xmlChar) ctxt->token; 8231 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8232 (ctxt->sax->characters != NULL)) 8233 ctxt->sax->characters(ctxt->userData, current, 1); 8234 ctxt->token = 0; 8235 } 8236 if ((avail < 2) && (ctxt->inputNr == 1)) 8237 goto done; 8238 cur = ctxt->input->cur[0]; 8239 next = ctxt->input->cur[1]; 8240 8241 test = CUR_PTR; 8242 cons = ctxt->input->consumed; 8243 tok = ctxt->token; 8244 if ((cur == '<') && (next == '?')) { 8245 if ((!terminate) && 8246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8247 goto done; 8248#ifdef DEBUG_PUSH 8249 xmlGenericError(xmlGenericErrorContext, 8250 "PP: Parsing PI\n"); 8251#endif 8252 xmlParsePI(ctxt); 8253 } else if ((cur == '<') && (next == '!') && 8254 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 8255 if ((!terminate) && 8256 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 8257 goto done; 8258#ifdef DEBUG_PUSH 8259 xmlGenericError(xmlGenericErrorContext, 8260 "PP: Parsing Comment\n"); 8261#endif 8262 xmlParseComment(ctxt); 8263 ctxt->instate = XML_PARSER_CONTENT; 8264 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 8265 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 8266 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 8267 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 8268 (ctxt->input->cur[8] == '[')) { 8269 SKIP(9); 8270 ctxt->instate = XML_PARSER_CDATA_SECTION; 8271#ifdef DEBUG_PUSH 8272 xmlGenericError(xmlGenericErrorContext, 8273 "PP: entering CDATA_SECTION\n"); 8274#endif 8275 break; 8276 } else if ((cur == '<') && (next == '!') && 8277 (avail < 9)) { 8278 goto done; 8279 } else if ((cur == '<') && (next == '/')) { 8280 ctxt->instate = XML_PARSER_END_TAG; 8281#ifdef DEBUG_PUSH 8282 xmlGenericError(xmlGenericErrorContext, 8283 "PP: entering END_TAG\n"); 8284#endif 8285 break; 8286 } else if (cur == '<') { 8287 ctxt->instate = XML_PARSER_START_TAG; 8288#ifdef DEBUG_PUSH 8289 xmlGenericError(xmlGenericErrorContext, 8290 "PP: entering START_TAG\n"); 8291#endif 8292 break; 8293 } else if (cur == '&') { 8294 if ((!terminate) && 8295 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 8296 goto done; 8297#ifdef DEBUG_PUSH 8298 xmlGenericError(xmlGenericErrorContext, 8299 "PP: Parsing Reference\n"); 8300#endif 8301 xmlParseReference(ctxt); 8302 } else { 8303 /* TODO Avoid the extra copy, handle directly !!! */ 8304 /* 8305 * Goal of the following test is: 8306 * - minimize calls to the SAX 'character' callback 8307 * when they are mergeable 8308 * - handle an problem for isBlank when we only parse 8309 * a sequence of blank chars and the next one is 8310 * not available to check against '<' presence. 8311 * - tries to homogenize the differences in SAX 8312 * callbacks beween the push and pull versions 8313 * of the parser. 8314 */ 8315 if ((ctxt->inputNr == 1) && 8316 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8317 if ((!terminate) && 8318 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8319 goto done; 8320 } 8321 ctxt->checkIndex = 0; 8322#ifdef DEBUG_PUSH 8323 xmlGenericError(xmlGenericErrorContext, 8324 "PP: Parsing char data\n"); 8325#endif 8326 xmlParseCharData(ctxt, 0); 8327 } 8328 /* 8329 * Pop-up of finished entities. 8330 */ 8331 while ((RAW == 0) && (ctxt->inputNr > 1)) 8332 xmlPopInput(ctxt); 8333 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 8334 (tok == ctxt->token)) { 8335 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8337 ctxt->sax->error(ctxt->userData, 8338 "detected an error in element content\n"); 8339 ctxt->wellFormed = 0; 8340 ctxt->disableSAX = 1; 8341 ctxt->instate = XML_PARSER_EOF; 8342 break; 8343 } 8344 break; 8345 } 8346 case XML_PARSER_CDATA_SECTION: { 8347 /* 8348 * The Push mode need to have the SAX callback for 8349 * cdataBlock merge back contiguous callbacks. 8350 */ 8351 int base; 8352 8353 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8354 if (base < 0) { 8355 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8356 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8357 if (ctxt->sax->cdataBlock != NULL) 8358 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8359 XML_PARSER_BIG_BUFFER_SIZE); 8360 } 8361 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8362 ctxt->checkIndex = 0; 8363 } 8364 goto done; 8365 } else { 8366 if ((ctxt->sax != NULL) && (base > 0) && 8367 (!ctxt->disableSAX)) { 8368 if (ctxt->sax->cdataBlock != NULL) 8369 ctxt->sax->cdataBlock(ctxt->userData, 8370 ctxt->input->cur, base); 8371 } 8372 SKIP(base + 3); 8373 ctxt->checkIndex = 0; 8374 ctxt->instate = XML_PARSER_CONTENT; 8375#ifdef DEBUG_PUSH 8376 xmlGenericError(xmlGenericErrorContext, 8377 "PP: entering CONTENT\n"); 8378#endif 8379 } 8380 break; 8381 } 8382 case XML_PARSER_END_TAG: 8383 if (avail < 2) 8384 goto done; 8385 if ((!terminate) && 8386 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8387 goto done; 8388 xmlParseEndTag(ctxt); 8389 if (ctxt->name == NULL) { 8390 ctxt->instate = XML_PARSER_EPILOG; 8391#ifdef DEBUG_PUSH 8392 xmlGenericError(xmlGenericErrorContext, 8393 "PP: entering EPILOG\n"); 8394#endif 8395 } else { 8396 ctxt->instate = XML_PARSER_CONTENT; 8397#ifdef DEBUG_PUSH 8398 xmlGenericError(xmlGenericErrorContext, 8399 "PP: entering CONTENT\n"); 8400#endif 8401 } 8402 break; 8403 case XML_PARSER_DTD: { 8404 /* 8405 * Sorry but progressive parsing of the internal subset 8406 * is not expected to be supported. We first check that 8407 * the full content of the internal subset is available and 8408 * the parsing is launched only at that point. 8409 * Internal subset ends up with "']' S? '>'" in an unescaped 8410 * section and not in a ']]>' sequence which are conditional 8411 * sections (whoever argued to keep that crap in XML deserve 8412 * a place in hell !). 8413 */ 8414 int base, i; 8415 xmlChar *buf; 8416 xmlChar quote = 0; 8417 8418 base = ctxt->input->cur - ctxt->input->base; 8419 if (base < 0) return(0); 8420 if (ctxt->checkIndex > base) 8421 base = ctxt->checkIndex; 8422 buf = ctxt->input->buf->buffer->content; 8423 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8424 base++) { 8425 if (quote != 0) { 8426 if (buf[base] == quote) 8427 quote = 0; 8428 continue; 8429 } 8430 if (buf[base] == '"') { 8431 quote = '"'; 8432 continue; 8433 } 8434 if (buf[base] == '\'') { 8435 quote = '\''; 8436 continue; 8437 } 8438 if (buf[base] == ']') { 8439 if ((unsigned int) base +1 >= 8440 ctxt->input->buf->buffer->use) 8441 break; 8442 if (buf[base + 1] == ']') { 8443 /* conditional crap, skip both ']' ! */ 8444 base++; 8445 continue; 8446 } 8447 for (i = 0; 8448 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8449 i++) { 8450 if (buf[base + i] == '>') 8451 goto found_end_int_subset; 8452 } 8453 break; 8454 } 8455 } 8456 /* 8457 * We didn't found the end of the Internal subset 8458 */ 8459 if (quote == 0) 8460 ctxt->checkIndex = base; 8461#ifdef DEBUG_PUSH 8462 if (next == 0) 8463 xmlGenericError(xmlGenericErrorContext, 8464 "PP: lookup of int subset end filed\n"); 8465#endif 8466 goto done; 8467 8468found_end_int_subset: 8469 xmlParseInternalSubset(ctxt); 8470 ctxt->inSubset = 2; 8471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8472 (ctxt->sax->externalSubset != NULL)) 8473 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8474 ctxt->extSubSystem, ctxt->extSubURI); 8475 ctxt->inSubset = 0; 8476 ctxt->instate = XML_PARSER_PROLOG; 8477 ctxt->checkIndex = 0; 8478#ifdef DEBUG_PUSH 8479 xmlGenericError(xmlGenericErrorContext, 8480 "PP: entering PROLOG\n"); 8481#endif 8482 break; 8483 } 8484 case XML_PARSER_COMMENT: 8485 xmlGenericError(xmlGenericErrorContext, 8486 "PP: internal error, state == COMMENT\n"); 8487 ctxt->instate = XML_PARSER_CONTENT; 8488#ifdef DEBUG_PUSH 8489 xmlGenericError(xmlGenericErrorContext, 8490 "PP: entering CONTENT\n"); 8491#endif 8492 break; 8493 case XML_PARSER_PI: 8494 xmlGenericError(xmlGenericErrorContext, 8495 "PP: internal error, state == PI\n"); 8496 ctxt->instate = XML_PARSER_CONTENT; 8497#ifdef DEBUG_PUSH 8498 xmlGenericError(xmlGenericErrorContext, 8499 "PP: entering CONTENT\n"); 8500#endif 8501 break; 8502 case XML_PARSER_ENTITY_DECL: 8503 xmlGenericError(xmlGenericErrorContext, 8504 "PP: internal error, state == ENTITY_DECL\n"); 8505 ctxt->instate = XML_PARSER_DTD; 8506#ifdef DEBUG_PUSH 8507 xmlGenericError(xmlGenericErrorContext, 8508 "PP: entering DTD\n"); 8509#endif 8510 break; 8511 case XML_PARSER_ENTITY_VALUE: 8512 xmlGenericError(xmlGenericErrorContext, 8513 "PP: internal error, state == ENTITY_VALUE\n"); 8514 ctxt->instate = XML_PARSER_CONTENT; 8515#ifdef DEBUG_PUSH 8516 xmlGenericError(xmlGenericErrorContext, 8517 "PP: entering DTD\n"); 8518#endif 8519 break; 8520 case XML_PARSER_ATTRIBUTE_VALUE: 8521 xmlGenericError(xmlGenericErrorContext, 8522 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8523 ctxt->instate = XML_PARSER_START_TAG; 8524#ifdef DEBUG_PUSH 8525 xmlGenericError(xmlGenericErrorContext, 8526 "PP: entering START_TAG\n"); 8527#endif 8528 break; 8529 case XML_PARSER_SYSTEM_LITERAL: 8530 xmlGenericError(xmlGenericErrorContext, 8531 "PP: internal error, state == SYSTEM_LITERAL\n"); 8532 ctxt->instate = XML_PARSER_START_TAG; 8533#ifdef DEBUG_PUSH 8534 xmlGenericError(xmlGenericErrorContext, 8535 "PP: entering START_TAG\n"); 8536#endif 8537 break; 8538 } 8539 } 8540done: 8541#ifdef DEBUG_PUSH 8542 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8543#endif 8544 return(ret); 8545} 8546 8547/** 8548 * xmlParseChunk: 8549 * @ctxt: an XML parser context 8550 * @chunk: an char array 8551 * @size: the size in byte of the chunk 8552 * @terminate: last chunk indicator 8553 * 8554 * Parse a Chunk of memory 8555 * 8556 * Returns zero if no error, the xmlParserErrors otherwise. 8557 */ 8558int 8559xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8560 int terminate) { 8561 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8562 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8563 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8564 int cur = ctxt->input->cur - ctxt->input->base; 8565 8566 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8567 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8568 ctxt->input->cur = ctxt->input->base + cur; 8569 ctxt->input->end = 8570 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8571#ifdef DEBUG_PUSH 8572 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8573#endif 8574 8575 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8576 xmlParseTryOrFinish(ctxt, terminate); 8577 } else if (ctxt->instate != XML_PARSER_EOF) { 8578 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8579 xmlParserInputBufferPtr in = ctxt->input->buf; 8580 if ((in->encoder != NULL) && (in->buffer != NULL) && 8581 (in->raw != NULL)) { 8582 int nbchars; 8583 8584 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8585 if (nbchars < 0) { 8586 xmlGenericError(xmlGenericErrorContext, 8587 "xmlParseChunk: encoder error\n"); 8588 return(XML_ERR_INVALID_ENCODING); 8589 } 8590 } 8591 } 8592 } 8593 xmlParseTryOrFinish(ctxt, terminate); 8594 if (terminate) { 8595 /* 8596 * Check for termination 8597 */ 8598 if ((ctxt->instate != XML_PARSER_EOF) && 8599 (ctxt->instate != XML_PARSER_EPILOG)) { 8600 ctxt->errNo = XML_ERR_DOCUMENT_END; 8601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8602 ctxt->sax->error(ctxt->userData, 8603 "Extra content at the end of the document\n"); 8604 ctxt->wellFormed = 0; 8605 ctxt->disableSAX = 1; 8606 } 8607 if (ctxt->instate != XML_PARSER_EOF) { 8608 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8609 (!ctxt->disableSAX)) 8610 ctxt->sax->endDocument(ctxt->userData); 8611 } 8612 ctxt->instate = XML_PARSER_EOF; 8613 } 8614 return((xmlParserErrors) ctxt->errNo); 8615} 8616 8617/************************************************************************ 8618 * * 8619 * I/O front end functions to the parser * 8620 * * 8621 ************************************************************************/ 8622 8623/** 8624 * xmlStopParser: 8625 * @ctxt: an XML parser context 8626 * 8627 * Blocks further parser processing 8628 */ 8629void 8630xmlStopParser(xmlParserCtxtPtr ctxt) { 8631 ctxt->instate = XML_PARSER_EOF; 8632 if (ctxt->input != NULL) 8633 ctxt->input->cur = BAD_CAST""; 8634} 8635 8636/** 8637 * xmlCreatePushParserCtxt: 8638 * @sax: a SAX handler 8639 * @user_data: The user data returned on SAX callbacks 8640 * @chunk: a pointer to an array of chars 8641 * @size: number of chars in the array 8642 * @filename: an optional file name or URI 8643 * 8644 * Create a parser context for using the XML parser in push mode 8645 * To allow content encoding detection, @size should be >= 4 8646 * The value of @filename is used for fetching external entities 8647 * and error/warning reports. 8648 * 8649 * Returns the new parser context or NULL 8650 */ 8651xmlParserCtxtPtr 8652xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8653 const char *chunk, int size, const char *filename) { 8654 xmlParserCtxtPtr ctxt; 8655 xmlParserInputPtr inputStream; 8656 xmlParserInputBufferPtr buf; 8657 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8658 8659 /* 8660 * plug some encoding conversion routines 8661 */ 8662 if ((chunk != NULL) && (size >= 4)) 8663 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8664 8665 buf = xmlAllocParserInputBuffer(enc); 8666 if (buf == NULL) return(NULL); 8667 8668 ctxt = xmlNewParserCtxt(); 8669 if (ctxt == NULL) { 8670 xmlFree(buf); 8671 return(NULL); 8672 } 8673 if (sax != NULL) { 8674 if (ctxt->sax != &xmlDefaultSAXHandler) 8675 xmlFree(ctxt->sax); 8676 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8677 if (ctxt->sax == NULL) { 8678 xmlFree(buf); 8679 xmlFree(ctxt); 8680 return(NULL); 8681 } 8682 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8683 if (user_data != NULL) 8684 ctxt->userData = user_data; 8685 } 8686 if (filename == NULL) { 8687 ctxt->directory = NULL; 8688 } else { 8689 ctxt->directory = xmlParserGetDirectory(filename); 8690 } 8691 8692 inputStream = xmlNewInputStream(ctxt); 8693 if (inputStream == NULL) { 8694 xmlFreeParserCtxt(ctxt); 8695 return(NULL); 8696 } 8697 8698 if (filename == NULL) 8699 inputStream->filename = NULL; 8700 else 8701 inputStream->filename = xmlMemStrdup(filename); 8702 inputStream->buf = buf; 8703 inputStream->base = inputStream->buf->buffer->content; 8704 inputStream->cur = inputStream->buf->buffer->content; 8705 inputStream->end = 8706 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 8707 8708 inputPush(ctxt, inputStream); 8709 8710 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8711 (ctxt->input->buf != NULL)) { 8712 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8713#ifdef DEBUG_PUSH 8714 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8715#endif 8716 } 8717 8718 if (enc != XML_CHAR_ENCODING_NONE) { 8719 xmlSwitchEncoding(ctxt, enc); 8720 } 8721 8722 return(ctxt); 8723} 8724 8725/** 8726 * xmlCreateIOParserCtxt: 8727 * @sax: a SAX handler 8728 * @user_data: The user data returned on SAX callbacks 8729 * @ioread: an I/O read function 8730 * @ioclose: an I/O close function 8731 * @ioctx: an I/O handler 8732 * @enc: the charset encoding if known 8733 * 8734 * Create a parser context for using the XML parser with an existing 8735 * I/O stream 8736 * 8737 * Returns the new parser context or NULL 8738 */ 8739xmlParserCtxtPtr 8740xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8741 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8742 void *ioctx, xmlCharEncoding enc) { 8743 xmlParserCtxtPtr ctxt; 8744 xmlParserInputPtr inputStream; 8745 xmlParserInputBufferPtr buf; 8746 8747 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8748 if (buf == NULL) return(NULL); 8749 8750 ctxt = xmlNewParserCtxt(); 8751 if (ctxt == NULL) { 8752 xmlFree(buf); 8753 return(NULL); 8754 } 8755 if (sax != NULL) { 8756 if (ctxt->sax != &xmlDefaultSAXHandler) 8757 xmlFree(ctxt->sax); 8758 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8759 if (ctxt->sax == NULL) { 8760 xmlFree(buf); 8761 xmlFree(ctxt); 8762 return(NULL); 8763 } 8764 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8765 if (user_data != NULL) 8766 ctxt->userData = user_data; 8767 } 8768 8769 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8770 if (inputStream == NULL) { 8771 xmlFreeParserCtxt(ctxt); 8772 return(NULL); 8773 } 8774 inputPush(ctxt, inputStream); 8775 8776 return(ctxt); 8777} 8778 8779/************************************************************************ 8780 * * 8781 * Front ends when parsing a Dtd * 8782 * * 8783 ************************************************************************/ 8784 8785/** 8786 * xmlIOParseDTD: 8787 * @sax: the SAX handler block or NULL 8788 * @input: an Input Buffer 8789 * @enc: the charset encoding if known 8790 * 8791 * Load and parse a DTD 8792 * 8793 * Returns the resulting xmlDtdPtr or NULL in case of error. 8794 * @input will be freed at parsing end. 8795 */ 8796 8797xmlDtdPtr 8798xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 8799 xmlCharEncoding enc) { 8800 xmlDtdPtr ret = NULL; 8801 xmlParserCtxtPtr ctxt; 8802 xmlParserInputPtr pinput = NULL; 8803 xmlChar start[4]; 8804 8805 if (input == NULL) 8806 return(NULL); 8807 8808 ctxt = xmlNewParserCtxt(); 8809 if (ctxt == NULL) { 8810 return(NULL); 8811 } 8812 8813 /* 8814 * Set-up the SAX context 8815 */ 8816 if (sax != NULL) { 8817 if (ctxt->sax != NULL) 8818 xmlFree(ctxt->sax); 8819 ctxt->sax = sax; 8820 ctxt->userData = NULL; 8821 } 8822 8823 /* 8824 * generate a parser input from the I/O handler 8825 */ 8826 8827 pinput = xmlNewIOInputStream(ctxt, input, enc); 8828 if (pinput == NULL) { 8829 if (sax != NULL) ctxt->sax = NULL; 8830 xmlFreeParserCtxt(ctxt); 8831 return(NULL); 8832 } 8833 8834 /* 8835 * plug some encoding conversion routines here. 8836 */ 8837 xmlPushInput(ctxt, pinput); 8838 8839 pinput->filename = NULL; 8840 pinput->line = 1; 8841 pinput->col = 1; 8842 pinput->base = ctxt->input->cur; 8843 pinput->cur = ctxt->input->cur; 8844 pinput->free = NULL; 8845 8846 /* 8847 * let's parse that entity knowing it's an external subset. 8848 */ 8849 ctxt->inSubset = 2; 8850 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8851 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8852 BAD_CAST "none", BAD_CAST "none"); 8853 8854 if (enc == XML_CHAR_ENCODING_NONE) { 8855 /* 8856 * Get the 4 first bytes and decode the charset 8857 * if enc != XML_CHAR_ENCODING_NONE 8858 * plug some encoding conversion routines. 8859 */ 8860 start[0] = RAW; 8861 start[1] = NXT(1); 8862 start[2] = NXT(2); 8863 start[3] = NXT(3); 8864 enc = xmlDetectCharEncoding(start, 4); 8865 if (enc != XML_CHAR_ENCODING_NONE) { 8866 xmlSwitchEncoding(ctxt, enc); 8867 } 8868 } 8869 8870 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 8871 8872 if (ctxt->myDoc != NULL) { 8873 if (ctxt->wellFormed) { 8874 ret = ctxt->myDoc->extSubset; 8875 ctxt->myDoc->extSubset = NULL; 8876 } else { 8877 ret = NULL; 8878 } 8879 xmlFreeDoc(ctxt->myDoc); 8880 ctxt->myDoc = NULL; 8881 } 8882 if (sax != NULL) ctxt->sax = NULL; 8883 xmlFreeParserCtxt(ctxt); 8884 8885 return(ret); 8886} 8887 8888/** 8889 * xmlSAXParseDTD: 8890 * @sax: the SAX handler block 8891 * @ExternalID: a NAME* containing the External ID of the DTD 8892 * @SystemID: a NAME* containing the URL to the DTD 8893 * 8894 * Load and parse an external subset. 8895 * 8896 * Returns the resulting xmlDtdPtr or NULL in case of error. 8897 */ 8898 8899xmlDtdPtr 8900xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 8901 const xmlChar *SystemID) { 8902 xmlDtdPtr ret = NULL; 8903 xmlParserCtxtPtr ctxt; 8904 xmlParserInputPtr input = NULL; 8905 xmlCharEncoding enc; 8906 8907 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 8908 8909 ctxt = xmlNewParserCtxt(); 8910 if (ctxt == NULL) { 8911 return(NULL); 8912 } 8913 8914 /* 8915 * Set-up the SAX context 8916 */ 8917 if (sax != NULL) { 8918 if (ctxt->sax != NULL) 8919 xmlFree(ctxt->sax); 8920 ctxt->sax = sax; 8921 ctxt->userData = NULL; 8922 } 8923 8924 /* 8925 * Ask the Entity resolver to load the damn thing 8926 */ 8927 8928 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8929 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8930 if (input == NULL) { 8931 if (sax != NULL) ctxt->sax = NULL; 8932 xmlFreeParserCtxt(ctxt); 8933 return(NULL); 8934 } 8935 8936 /* 8937 * plug some encoding conversion routines here. 8938 */ 8939 xmlPushInput(ctxt, input); 8940 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 8941 xmlSwitchEncoding(ctxt, enc); 8942 8943 if (input->filename == NULL) 8944 input->filename = (char *) xmlStrdup(SystemID); 8945 input->line = 1; 8946 input->col = 1; 8947 input->base = ctxt->input->cur; 8948 input->cur = ctxt->input->cur; 8949 input->free = NULL; 8950 8951 /* 8952 * let's parse that entity knowing it's an external subset. 8953 */ 8954 ctxt->inSubset = 2; 8955 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8956 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8957 ExternalID, SystemID); 8958 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8959 8960 if (ctxt->myDoc != NULL) { 8961 if (ctxt->wellFormed) { 8962 ret = ctxt->myDoc->extSubset; 8963 ctxt->myDoc->extSubset = NULL; 8964 } else { 8965 ret = NULL; 8966 } 8967 xmlFreeDoc(ctxt->myDoc); 8968 ctxt->myDoc = NULL; 8969 } 8970 if (sax != NULL) ctxt->sax = NULL; 8971 xmlFreeParserCtxt(ctxt); 8972 8973 return(ret); 8974} 8975 8976/** 8977 * xmlParseDTD: 8978 * @ExternalID: a NAME* containing the External ID of the DTD 8979 * @SystemID: a NAME* containing the URL to the DTD 8980 * 8981 * Load and parse an external subset. 8982 * 8983 * Returns the resulting xmlDtdPtr or NULL in case of error. 8984 */ 8985 8986xmlDtdPtr 8987xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 8988 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 8989} 8990 8991/************************************************************************ 8992 * * 8993 * Front ends when parsing an Entity * 8994 * * 8995 ************************************************************************/ 8996 8997/** 8998 * xmlParseCtxtExternalEntity: 8999 * @ctx: the existing parsing context 9000 * @URL: the URL for the entity to load 9001 * @ID: the System ID for the entity to load 9002 * @lst: the return value for the set of parsed nodes 9003 * 9004 * Parse an external general entity within an existing parsing context 9005 * An external general parsed entity is well-formed if it matches the 9006 * production labeled extParsedEnt. 9007 * 9008 * [78] extParsedEnt ::= TextDecl? content 9009 * 9010 * Returns 0 if the entity is well formed, -1 in case of args problem and 9011 * the parser error code otherwise 9012 */ 9013 9014int 9015xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 9016 const xmlChar *ID, xmlNodePtr *lst) { 9017 xmlParserCtxtPtr ctxt; 9018 xmlDocPtr newDoc; 9019 xmlSAXHandlerPtr oldsax = NULL; 9020 int ret = 0; 9021 xmlChar start[4]; 9022 xmlCharEncoding enc; 9023 9024 if (ctx->depth > 40) { 9025 return(XML_ERR_ENTITY_LOOP); 9026 } 9027 9028 if (lst != NULL) 9029 *lst = NULL; 9030 if ((URL == NULL) && (ID == NULL)) 9031 return(-1); 9032 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 9033 return(-1); 9034 9035 9036 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9037 if (ctxt == NULL) return(-1); 9038 ctxt->userData = ctxt; 9039 oldsax = ctxt->sax; 9040 ctxt->sax = ctx->sax; 9041 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9042 if (newDoc == NULL) { 9043 xmlFreeParserCtxt(ctxt); 9044 return(-1); 9045 } 9046 if (ctx->myDoc != NULL) { 9047 newDoc->intSubset = ctx->myDoc->intSubset; 9048 newDoc->extSubset = ctx->myDoc->extSubset; 9049 } 9050 if (ctx->myDoc->URL != NULL) { 9051 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 9052 } 9053 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9054 if (newDoc->children == NULL) { 9055 ctxt->sax = oldsax; 9056 xmlFreeParserCtxt(ctxt); 9057 newDoc->intSubset = NULL; 9058 newDoc->extSubset = NULL; 9059 xmlFreeDoc(newDoc); 9060 return(-1); 9061 } 9062 nodePush(ctxt, newDoc->children); 9063 if (ctx->myDoc == NULL) { 9064 ctxt->myDoc = newDoc; 9065 } else { 9066 ctxt->myDoc = ctx->myDoc; 9067 newDoc->children->doc = ctx->myDoc; 9068 } 9069 9070 /* 9071 * Get the 4 first bytes and decode the charset 9072 * if enc != XML_CHAR_ENCODING_NONE 9073 * plug some encoding conversion routines. 9074 */ 9075 GROW 9076 start[0] = RAW; 9077 start[1] = NXT(1); 9078 start[2] = NXT(2); 9079 start[3] = NXT(3); 9080 enc = xmlDetectCharEncoding(start, 4); 9081 if (enc != XML_CHAR_ENCODING_NONE) { 9082 xmlSwitchEncoding(ctxt, enc); 9083 } 9084 9085 /* 9086 * Parse a possible text declaration first 9087 */ 9088 if ((RAW == '<') && (NXT(1) == '?') && 9089 (NXT(2) == 'x') && (NXT(3) == 'm') && 9090 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9091 xmlParseTextDecl(ctxt); 9092 } 9093 9094 /* 9095 * Doing validity checking on chunk doesn't make sense 9096 */ 9097 ctxt->instate = XML_PARSER_CONTENT; 9098 ctxt->validate = ctx->validate; 9099 ctxt->loadsubset = ctx->loadsubset; 9100 ctxt->depth = ctx->depth + 1; 9101 ctxt->replaceEntities = ctx->replaceEntities; 9102 if (ctxt->validate) { 9103 ctxt->vctxt.error = ctx->vctxt.error; 9104 ctxt->vctxt.warning = ctx->vctxt.warning; 9105 } else { 9106 ctxt->vctxt.error = NULL; 9107 ctxt->vctxt.warning = NULL; 9108 } 9109 ctxt->vctxt.nodeTab = NULL; 9110 ctxt->vctxt.nodeNr = 0; 9111 ctxt->vctxt.nodeMax = 0; 9112 ctxt->vctxt.node = NULL; 9113 9114 xmlParseContent(ctxt); 9115 9116 if ((RAW == '<') && (NXT(1) == '/')) { 9117 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9119 ctxt->sax->error(ctxt->userData, 9120 "chunk is not well balanced\n"); 9121 ctxt->wellFormed = 0; 9122 ctxt->disableSAX = 1; 9123 } else if (RAW != 0) { 9124 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9126 ctxt->sax->error(ctxt->userData, 9127 "extra content at the end of well balanced chunk\n"); 9128 ctxt->wellFormed = 0; 9129 ctxt->disableSAX = 1; 9130 } 9131 if (ctxt->node != newDoc->children) { 9132 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9134 ctxt->sax->error(ctxt->userData, 9135 "chunk is not well balanced\n"); 9136 ctxt->wellFormed = 0; 9137 ctxt->disableSAX = 1; 9138 } 9139 9140 if (!ctxt->wellFormed) { 9141 if (ctxt->errNo == 0) 9142 ret = 1; 9143 else 9144 ret = ctxt->errNo; 9145 } else { 9146 if (lst != NULL) { 9147 xmlNodePtr cur; 9148 9149 /* 9150 * Return the newly created nodeset after unlinking it from 9151 * they pseudo parent. 9152 */ 9153 cur = newDoc->children->children; 9154 *lst = cur; 9155 while (cur != NULL) { 9156 cur->parent = NULL; 9157 cur = cur->next; 9158 } 9159 newDoc->children->children = NULL; 9160 } 9161 ret = 0; 9162 } 9163 ctxt->sax = oldsax; 9164 xmlFreeParserCtxt(ctxt); 9165 newDoc->intSubset = NULL; 9166 newDoc->extSubset = NULL; 9167 xmlFreeDoc(newDoc); 9168 9169 return(ret); 9170} 9171 9172/** 9173 * xmlParseExternalEntityPrivate: 9174 * @doc: the document the chunk pertains to 9175 * @oldctxt: the previous parser context if available 9176 * @sax: the SAX handler bloc (possibly NULL) 9177 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9178 * @depth: Used for loop detection, use 0 9179 * @URL: the URL for the entity to load 9180 * @ID: the System ID for the entity to load 9181 * @list: the return value for the set of parsed nodes 9182 * 9183 * Private version of xmlParseExternalEntity() 9184 * 9185 * Returns 0 if the entity is well formed, -1 in case of args problem and 9186 * the parser error code otherwise 9187 */ 9188 9189static int 9190xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 9191 xmlSAXHandlerPtr sax, 9192 void *user_data, int depth, const xmlChar *URL, 9193 const xmlChar *ID, xmlNodePtr *list) { 9194 xmlParserCtxtPtr ctxt; 9195 xmlDocPtr newDoc; 9196 xmlSAXHandlerPtr oldsax = NULL; 9197 int ret = 0; 9198 xmlChar start[4]; 9199 xmlCharEncoding enc; 9200 9201 if (depth > 40) { 9202 return(XML_ERR_ENTITY_LOOP); 9203 } 9204 9205 9206 9207 if (list != NULL) 9208 *list = NULL; 9209 if ((URL == NULL) && (ID == NULL)) 9210 return(-1); 9211 if (doc == NULL) /* @@ relax but check for dereferences */ 9212 return(-1); 9213 9214 9215 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 9216 if (ctxt == NULL) return(-1); 9217 ctxt->userData = ctxt; 9218 if (oldctxt != NULL) { 9219 ctxt->_private = oldctxt->_private; 9220 ctxt->loadsubset = oldctxt->loadsubset; 9221 ctxt->validate = oldctxt->validate; 9222 ctxt->external = oldctxt->external; 9223 } else { 9224 /* 9225 * Doing validity checking on chunk without context 9226 * doesn't make sense 9227 */ 9228 ctxt->_private = NULL; 9229 ctxt->validate = 0; 9230 ctxt->external = 2; 9231 ctxt->loadsubset = 0; 9232 } 9233 if (sax != NULL) { 9234 oldsax = ctxt->sax; 9235 ctxt->sax = sax; 9236 if (user_data != NULL) 9237 ctxt->userData = user_data; 9238 } 9239 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9240 if (newDoc == NULL) { 9241 xmlFreeParserCtxt(ctxt); 9242 return(-1); 9243 } 9244 if (doc != NULL) { 9245 newDoc->intSubset = doc->intSubset; 9246 newDoc->extSubset = doc->extSubset; 9247 } 9248 if (doc->URL != NULL) { 9249 newDoc->URL = xmlStrdup(doc->URL); 9250 } 9251 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9252 if (newDoc->children == NULL) { 9253 if (sax != NULL) 9254 ctxt->sax = oldsax; 9255 xmlFreeParserCtxt(ctxt); 9256 newDoc->intSubset = NULL; 9257 newDoc->extSubset = NULL; 9258 xmlFreeDoc(newDoc); 9259 return(-1); 9260 } 9261 nodePush(ctxt, newDoc->children); 9262 if (doc == NULL) { 9263 ctxt->myDoc = newDoc; 9264 } else { 9265 ctxt->myDoc = doc; 9266 newDoc->children->doc = doc; 9267 } 9268 9269 /* 9270 * Get the 4 first bytes and decode the charset 9271 * if enc != XML_CHAR_ENCODING_NONE 9272 * plug some encoding conversion routines. 9273 */ 9274 GROW; 9275 start[0] = RAW; 9276 start[1] = NXT(1); 9277 start[2] = NXT(2); 9278 start[3] = NXT(3); 9279 enc = xmlDetectCharEncoding(start, 4); 9280 if (enc != XML_CHAR_ENCODING_NONE) { 9281 xmlSwitchEncoding(ctxt, enc); 9282 } 9283 9284 /* 9285 * Parse a possible text declaration first 9286 */ 9287 if ((RAW == '<') && (NXT(1) == '?') && 9288 (NXT(2) == 'x') && (NXT(3) == 'm') && 9289 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9290 xmlParseTextDecl(ctxt); 9291 } 9292 9293 ctxt->instate = XML_PARSER_CONTENT; 9294 ctxt->depth = depth; 9295 9296 xmlParseContent(ctxt); 9297 9298 if ((RAW == '<') && (NXT(1) == '/')) { 9299 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9301 ctxt->sax->error(ctxt->userData, 9302 "chunk is not well balanced\n"); 9303 ctxt->wellFormed = 0; 9304 ctxt->disableSAX = 1; 9305 } else if (RAW != 0) { 9306 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9308 ctxt->sax->error(ctxt->userData, 9309 "extra content at the end of well balanced chunk\n"); 9310 ctxt->wellFormed = 0; 9311 ctxt->disableSAX = 1; 9312 } 9313 if (ctxt->node != newDoc->children) { 9314 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9316 ctxt->sax->error(ctxt->userData, 9317 "chunk is not well balanced\n"); 9318 ctxt->wellFormed = 0; 9319 ctxt->disableSAX = 1; 9320 } 9321 9322 if (!ctxt->wellFormed) { 9323 if (ctxt->errNo == 0) 9324 ret = 1; 9325 else 9326 ret = ctxt->errNo; 9327 } else { 9328 if (list != NULL) { 9329 xmlNodePtr cur; 9330 9331 /* 9332 * Return the newly created nodeset after unlinking it from 9333 * they pseudo parent. 9334 */ 9335 cur = newDoc->children->children; 9336 *list = cur; 9337 while (cur != NULL) { 9338 cur->parent = NULL; 9339 cur = cur->next; 9340 } 9341 newDoc->children->children = NULL; 9342 } 9343 ret = 0; 9344 } 9345 if (sax != NULL) 9346 ctxt->sax = oldsax; 9347 xmlFreeParserCtxt(ctxt); 9348 newDoc->intSubset = NULL; 9349 newDoc->extSubset = NULL; 9350 xmlFreeDoc(newDoc); 9351 9352 return(ret); 9353} 9354 9355/** 9356 * xmlParseExternalEntity: 9357 * @doc: the document the chunk pertains to 9358 * @sax: the SAX handler bloc (possibly NULL) 9359 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9360 * @depth: Used for loop detection, use 0 9361 * @URL: the URL for the entity to load 9362 * @ID: the System ID for the entity to load 9363 * @lst: the return value for the set of parsed nodes 9364 * 9365 * Parse an external general entity 9366 * An external general parsed entity is well-formed if it matches the 9367 * production labeled extParsedEnt. 9368 * 9369 * [78] extParsedEnt ::= TextDecl? content 9370 * 9371 * Returns 0 if the entity is well formed, -1 in case of args problem and 9372 * the parser error code otherwise 9373 */ 9374 9375int 9376xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 9377 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 9378 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 9379 ID, lst)); 9380} 9381 9382/** 9383 * xmlParseBalancedChunkMemory: 9384 * @doc: the document the chunk pertains to 9385 * @sax: the SAX handler bloc (possibly NULL) 9386 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9387 * @depth: Used for loop detection, use 0 9388 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9389 * @lst: the return value for the set of parsed nodes 9390 * 9391 * Parse a well-balanced chunk of an XML document 9392 * called by the parser 9393 * The allowed sequence for the Well Balanced Chunk is the one defined by 9394 * the content production in the XML grammar: 9395 * 9396 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9397 * 9398 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9399 * the parser error code otherwise 9400 */ 9401 9402int 9403xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9404 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 9405 xmlParserCtxtPtr ctxt; 9406 xmlDocPtr newDoc; 9407 xmlSAXHandlerPtr oldsax = NULL; 9408 int size; 9409 int ret = 0; 9410 9411 if (depth > 40) { 9412 return(XML_ERR_ENTITY_LOOP); 9413 } 9414 9415 9416 if (lst != NULL) 9417 *lst = NULL; 9418 if (string == NULL) 9419 return(-1); 9420 9421 size = xmlStrlen(string); 9422 9423 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9424 if (ctxt == NULL) return(-1); 9425 ctxt->userData = ctxt; 9426 if (sax != NULL) { 9427 oldsax = ctxt->sax; 9428 ctxt->sax = sax; 9429 if (user_data != NULL) 9430 ctxt->userData = user_data; 9431 } 9432 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9433 if (newDoc == NULL) { 9434 xmlFreeParserCtxt(ctxt); 9435 return(-1); 9436 } 9437 if (doc != NULL) { 9438 newDoc->intSubset = doc->intSubset; 9439 newDoc->extSubset = doc->extSubset; 9440 } 9441 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9442 if (newDoc->children == NULL) { 9443 if (sax != NULL) 9444 ctxt->sax = oldsax; 9445 xmlFreeParserCtxt(ctxt); 9446 newDoc->intSubset = NULL; 9447 newDoc->extSubset = NULL; 9448 xmlFreeDoc(newDoc); 9449 return(-1); 9450 } 9451 nodePush(ctxt, newDoc->children); 9452 if (doc == NULL) { 9453 ctxt->myDoc = newDoc; 9454 } else { 9455 ctxt->myDoc = doc; 9456 newDoc->children->doc = doc; 9457 } 9458 ctxt->instate = XML_PARSER_CONTENT; 9459 ctxt->depth = depth; 9460 9461 /* 9462 * Doing validity checking on chunk doesn't make sense 9463 */ 9464 ctxt->validate = 0; 9465 ctxt->loadsubset = 0; 9466 9467 xmlParseContent(ctxt); 9468 9469 if ((RAW == '<') && (NXT(1) == '/')) { 9470 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9472 ctxt->sax->error(ctxt->userData, 9473 "chunk is not well balanced\n"); 9474 ctxt->wellFormed = 0; 9475 ctxt->disableSAX = 1; 9476 } else if (RAW != 0) { 9477 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9479 ctxt->sax->error(ctxt->userData, 9480 "extra content at the end of well balanced chunk\n"); 9481 ctxt->wellFormed = 0; 9482 ctxt->disableSAX = 1; 9483 } 9484 if (ctxt->node != newDoc->children) { 9485 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9487 ctxt->sax->error(ctxt->userData, 9488 "chunk is not well balanced\n"); 9489 ctxt->wellFormed = 0; 9490 ctxt->disableSAX = 1; 9491 } 9492 9493 if (!ctxt->wellFormed) { 9494 if (ctxt->errNo == 0) 9495 ret = 1; 9496 else 9497 ret = ctxt->errNo; 9498 } else { 9499 if (lst != NULL) { 9500 xmlNodePtr cur; 9501 9502 /* 9503 * Return the newly created nodeset after unlinking it from 9504 * they pseudo parent. 9505 */ 9506 cur = newDoc->children->children; 9507 *lst = cur; 9508 while (cur != NULL) { 9509 cur->parent = NULL; 9510 cur = cur->next; 9511 } 9512 newDoc->children->children = NULL; 9513 } 9514 ret = 0; 9515 } 9516 if (sax != NULL) 9517 ctxt->sax = oldsax; 9518 xmlFreeParserCtxt(ctxt); 9519 newDoc->intSubset = NULL; 9520 newDoc->extSubset = NULL; 9521 xmlFreeDoc(newDoc); 9522 9523 return(ret); 9524} 9525 9526/** 9527 * xmlSAXParseEntity: 9528 * @sax: the SAX handler block 9529 * @filename: the filename 9530 * 9531 * parse an XML external entity out of context and build a tree. 9532 * It use the given SAX function block to handle the parsing callback. 9533 * If sax is NULL, fallback to the default DOM tree building routines. 9534 * 9535 * [78] extParsedEnt ::= TextDecl? content 9536 * 9537 * This correspond to a "Well Balanced" chunk 9538 * 9539 * Returns the resulting document tree 9540 */ 9541 9542xmlDocPtr 9543xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9544 xmlDocPtr ret; 9545 xmlParserCtxtPtr ctxt; 9546 char *directory = NULL; 9547 9548 ctxt = xmlCreateFileParserCtxt(filename); 9549 if (ctxt == NULL) { 9550 return(NULL); 9551 } 9552 if (sax != NULL) { 9553 if (ctxt->sax != NULL) 9554 xmlFree(ctxt->sax); 9555 ctxt->sax = sax; 9556 ctxt->userData = NULL; 9557 } 9558 9559 if ((ctxt->directory == NULL) && (directory == NULL)) 9560 directory = xmlParserGetDirectory(filename); 9561 9562 xmlParseExtParsedEnt(ctxt); 9563 9564 if (ctxt->wellFormed) 9565 ret = ctxt->myDoc; 9566 else { 9567 ret = NULL; 9568 xmlFreeDoc(ctxt->myDoc); 9569 ctxt->myDoc = NULL; 9570 } 9571 if (sax != NULL) 9572 ctxt->sax = NULL; 9573 xmlFreeParserCtxt(ctxt); 9574 9575 return(ret); 9576} 9577 9578/** 9579 * xmlParseEntity: 9580 * @filename: the filename 9581 * 9582 * parse an XML external entity out of context and build a tree. 9583 * 9584 * [78] extParsedEnt ::= TextDecl? content 9585 * 9586 * This correspond to a "Well Balanced" chunk 9587 * 9588 * Returns the resulting document tree 9589 */ 9590 9591xmlDocPtr 9592xmlParseEntity(const char *filename) { 9593 return(xmlSAXParseEntity(NULL, filename)); 9594} 9595 9596/** 9597 * xmlCreateEntityParserCtxt: 9598 * @URL: the entity URL 9599 * @ID: the entity PUBLIC ID 9600 * @base: a posible base for the target URI 9601 * 9602 * Create a parser context for an external entity 9603 * Automatic support for ZLIB/Compress compressed document is provided 9604 * by default if found at compile-time. 9605 * 9606 * Returns the new parser context or NULL 9607 */ 9608xmlParserCtxtPtr 9609xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9610 const xmlChar *base) { 9611 xmlParserCtxtPtr ctxt; 9612 xmlParserInputPtr inputStream; 9613 char *directory = NULL; 9614 xmlChar *uri; 9615 9616 ctxt = xmlNewParserCtxt(); 9617 if (ctxt == NULL) { 9618 return(NULL); 9619 } 9620 9621 uri = xmlBuildURI(URL, base); 9622 9623 if (uri == NULL) { 9624 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9625 if (inputStream == NULL) { 9626 xmlFreeParserCtxt(ctxt); 9627 return(NULL); 9628 } 9629 9630 inputPush(ctxt, inputStream); 9631 9632 if ((ctxt->directory == NULL) && (directory == NULL)) 9633 directory = xmlParserGetDirectory((char *)URL); 9634 if ((ctxt->directory == NULL) && (directory != NULL)) 9635 ctxt->directory = directory; 9636 } else { 9637 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9638 if (inputStream == NULL) { 9639 xmlFree(uri); 9640 xmlFreeParserCtxt(ctxt); 9641 return(NULL); 9642 } 9643 9644 inputPush(ctxt, inputStream); 9645 9646 if ((ctxt->directory == NULL) && (directory == NULL)) 9647 directory = xmlParserGetDirectory((char *)uri); 9648 if ((ctxt->directory == NULL) && (directory != NULL)) 9649 ctxt->directory = directory; 9650 xmlFree(uri); 9651 } 9652 9653 return(ctxt); 9654} 9655 9656/************************************************************************ 9657 * * 9658 * Front ends when parsing from a file * 9659 * * 9660 ************************************************************************/ 9661 9662/** 9663 * xmlCreateFileParserCtxt: 9664 * @filename: the filename 9665 * 9666 * Create a parser context for a file content. 9667 * Automatic support for ZLIB/Compress compressed document is provided 9668 * by default if found at compile-time. 9669 * 9670 * Returns the new parser context or NULL 9671 */ 9672xmlParserCtxtPtr 9673xmlCreateFileParserCtxt(const char *filename) 9674{ 9675 xmlParserCtxtPtr ctxt; 9676 xmlParserInputPtr inputStream; 9677 char *directory = NULL; 9678 9679 ctxt = xmlNewParserCtxt(); 9680 if (ctxt == NULL) { 9681 if (xmlDefaultSAXHandler.error != NULL) { 9682 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9683 } 9684 return(NULL); 9685 } 9686 9687 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 9688 if (inputStream == NULL) { 9689 xmlFreeParserCtxt(ctxt); 9690 return(NULL); 9691 } 9692 9693 inputPush(ctxt, inputStream); 9694 if ((ctxt->directory == NULL) && (directory == NULL)) 9695 directory = xmlParserGetDirectory(filename); 9696 if ((ctxt->directory == NULL) && (directory != NULL)) 9697 ctxt->directory = directory; 9698 9699 return(ctxt); 9700} 9701 9702/** 9703 * xmlSAXParseFile: 9704 * @sax: the SAX handler block 9705 * @filename: the filename 9706 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9707 * documents 9708 * 9709 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9710 * compressed document is provided by default if found at compile-time. 9711 * It use the given SAX function block to handle the parsing callback. 9712 * If sax is NULL, fallback to the default DOM tree building routines. 9713 * 9714 * Returns the resulting document tree 9715 */ 9716 9717xmlDocPtr 9718xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9719 int recovery) { 9720 xmlDocPtr ret; 9721 xmlParserCtxtPtr ctxt; 9722 char *directory = NULL; 9723 9724 ctxt = xmlCreateFileParserCtxt(filename); 9725 if (ctxt == NULL) { 9726 return(NULL); 9727 } 9728 if (sax != NULL) { 9729 if (ctxt->sax != NULL) 9730 xmlFree(ctxt->sax); 9731 ctxt->sax = sax; 9732 } 9733 9734 if ((ctxt->directory == NULL) && (directory == NULL)) 9735 directory = xmlParserGetDirectory(filename); 9736 if ((ctxt->directory == NULL) && (directory != NULL)) 9737 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9738 9739 xmlParseDocument(ctxt); 9740 9741 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9742 else { 9743 ret = NULL; 9744 xmlFreeDoc(ctxt->myDoc); 9745 ctxt->myDoc = NULL; 9746 } 9747 if (sax != NULL) 9748 ctxt->sax = NULL; 9749 xmlFreeParserCtxt(ctxt); 9750 9751 return(ret); 9752} 9753 9754/** 9755 * xmlRecoverDoc: 9756 * @cur: a pointer to an array of xmlChar 9757 * 9758 * parse an XML in-memory document and build a tree. 9759 * In the case the document is not Well Formed, a tree is built anyway 9760 * 9761 * Returns the resulting document tree 9762 */ 9763 9764xmlDocPtr 9765xmlRecoverDoc(xmlChar *cur) { 9766 return(xmlSAXParseDoc(NULL, cur, 1)); 9767} 9768 9769/** 9770 * xmlParseFile: 9771 * @filename: the filename 9772 * 9773 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9774 * compressed document is provided by default if found at compile-time. 9775 * 9776 * Returns the resulting document tree 9777 */ 9778 9779xmlDocPtr 9780xmlParseFile(const char *filename) { 9781 return(xmlSAXParseFile(NULL, filename, 0)); 9782} 9783 9784/** 9785 * xmlRecoverFile: 9786 * @filename: the filename 9787 * 9788 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9789 * compressed document is provided by default if found at compile-time. 9790 * In the case the document is not Well Formed, a tree is built anyway 9791 * 9792 * Returns the resulting document tree 9793 */ 9794 9795xmlDocPtr 9796xmlRecoverFile(const char *filename) { 9797 return(xmlSAXParseFile(NULL, filename, 1)); 9798} 9799 9800 9801/** 9802 * xmlSetupParserForBuffer: 9803 * @ctxt: an XML parser context 9804 * @buffer: a xmlChar * buffer 9805 * @filename: a file name 9806 * 9807 * Setup the parser context to parse a new buffer; Clears any prior 9808 * contents from the parser context. The buffer parameter must not be 9809 * NULL, but the filename parameter can be 9810 */ 9811void 9812xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9813 const char* filename) 9814{ 9815 xmlParserInputPtr input; 9816 9817 input = xmlNewInputStream(ctxt); 9818 if (input == NULL) { 9819 perror("malloc"); 9820 xmlFree(ctxt); 9821 return; 9822 } 9823 9824 xmlClearParserCtxt(ctxt); 9825 if (filename != NULL) 9826 input->filename = xmlMemStrdup(filename); 9827 input->base = buffer; 9828 input->cur = buffer; 9829 input->end = &buffer[xmlStrlen(buffer)]; 9830 inputPush(ctxt, input); 9831} 9832 9833/** 9834 * xmlSAXUserParseFile: 9835 * @sax: a SAX handler 9836 * @user_data: The user data returned on SAX callbacks 9837 * @filename: a file name 9838 * 9839 * parse an XML file and call the given SAX handler routines. 9840 * Automatic support for ZLIB/Compress compressed document is provided 9841 * 9842 * Returns 0 in case of success or a error number otherwise 9843 */ 9844int 9845xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 9846 const char *filename) { 9847 int ret = 0; 9848 xmlParserCtxtPtr ctxt; 9849 9850 ctxt = xmlCreateFileParserCtxt(filename); 9851 if (ctxt == NULL) return -1; 9852 if (ctxt->sax != &xmlDefaultSAXHandler) 9853 xmlFree(ctxt->sax); 9854 ctxt->sax = sax; 9855 if (user_data != NULL) 9856 ctxt->userData = user_data; 9857 9858 xmlParseDocument(ctxt); 9859 9860 if (ctxt->wellFormed) 9861 ret = 0; 9862 else { 9863 if (ctxt->errNo != 0) 9864 ret = ctxt->errNo; 9865 else 9866 ret = -1; 9867 } 9868 if (sax != NULL) 9869 ctxt->sax = NULL; 9870 xmlFreeParserCtxt(ctxt); 9871 9872 return ret; 9873} 9874 9875/************************************************************************ 9876 * * 9877 * Front ends when parsing from memory * 9878 * * 9879 ************************************************************************/ 9880 9881/** 9882 * xmlCreateMemoryParserCtxt: 9883 * @buffer: a pointer to a char array 9884 * @size: the size of the array 9885 * 9886 * Create a parser context for an XML in-memory document. 9887 * 9888 * Returns the new parser context or NULL 9889 */ 9890xmlParserCtxtPtr 9891xmlCreateMemoryParserCtxt(const char *buffer, int size) { 9892 xmlParserCtxtPtr ctxt; 9893 xmlParserInputPtr input; 9894 xmlParserInputBufferPtr buf; 9895 9896 if (buffer == NULL) 9897 return(NULL); 9898 if (size <= 0) 9899 return(NULL); 9900 9901 ctxt = xmlNewParserCtxt(); 9902 if (ctxt == NULL) 9903 return(NULL); 9904 9905 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 9906 if (buf == NULL) return(NULL); 9907 9908 input = xmlNewInputStream(ctxt); 9909 if (input == NULL) { 9910 xmlFreeParserCtxt(ctxt); 9911 return(NULL); 9912 } 9913 9914 input->filename = NULL; 9915 input->buf = buf; 9916 input->base = input->buf->buffer->content; 9917 input->cur = input->buf->buffer->content; 9918 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 9919 9920 inputPush(ctxt, input); 9921 return(ctxt); 9922} 9923 9924/** 9925 * xmlSAXParseMemory: 9926 * @sax: the SAX handler block 9927 * @buffer: an pointer to a char array 9928 * @size: the size of the array 9929 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 9930 * documents 9931 * 9932 * parse an XML in-memory block and use the given SAX function block 9933 * to handle the parsing callback. If sax is NULL, fallback to the default 9934 * DOM tree building routines. 9935 * 9936 * Returns the resulting document tree 9937 */ 9938xmlDocPtr 9939xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 9940 int size, int recovery) { 9941 xmlDocPtr ret; 9942 xmlParserCtxtPtr ctxt; 9943 9944 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9945 if (ctxt == NULL) return(NULL); 9946 if (sax != NULL) { 9947 ctxt->sax = sax; 9948 } 9949 9950 xmlParseDocument(ctxt); 9951 9952 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9953 else { 9954 ret = NULL; 9955 xmlFreeDoc(ctxt->myDoc); 9956 ctxt->myDoc = NULL; 9957 } 9958 if (sax != NULL) 9959 ctxt->sax = NULL; 9960 xmlFreeParserCtxt(ctxt); 9961 9962 return(ret); 9963} 9964 9965/** 9966 * xmlParseMemory: 9967 * @buffer: an pointer to a char array 9968 * @size: the size of the array 9969 * 9970 * parse an XML in-memory block and build a tree. 9971 * 9972 * Returns the resulting document tree 9973 */ 9974 9975xmlDocPtr xmlParseMemory(const char *buffer, int size) { 9976 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 9977} 9978 9979/** 9980 * xmlRecoverMemory: 9981 * @buffer: an pointer to a char array 9982 * @size: the size of the array 9983 * 9984 * parse an XML in-memory block and build a tree. 9985 * In the case the document is not Well Formed, a tree is built anyway 9986 * 9987 * Returns the resulting document tree 9988 */ 9989 9990xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 9991 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 9992} 9993 9994/** 9995 * xmlSAXUserParseMemory: 9996 * @sax: a SAX handler 9997 * @user_data: The user data returned on SAX callbacks 9998 * @buffer: an in-memory XML document input 9999 * @size: the length of the XML document in bytes 10000 * 10001 * A better SAX parsing routine. 10002 * parse an XML in-memory buffer and call the given SAX handler routines. 10003 * 10004 * Returns 0 in case of success or a error number otherwise 10005 */ 10006int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 10007 const char *buffer, int size) { 10008 int ret = 0; 10009 xmlParserCtxtPtr ctxt; 10010 xmlSAXHandlerPtr oldsax = NULL; 10011 10012 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 10013 if (ctxt == NULL) return -1; 10014 if (sax != NULL) { 10015 oldsax = ctxt->sax; 10016 ctxt->sax = sax; 10017 } 10018 if (user_data != NULL) 10019 ctxt->userData = user_data; 10020 10021 xmlParseDocument(ctxt); 10022 10023 if (ctxt->wellFormed) 10024 ret = 0; 10025 else { 10026 if (ctxt->errNo != 0) 10027 ret = ctxt->errNo; 10028 else 10029 ret = -1; 10030 } 10031 if (sax != NULL) { 10032 ctxt->sax = oldsax; 10033 } 10034 xmlFreeParserCtxt(ctxt); 10035 10036 return ret; 10037} 10038 10039/** 10040 * xmlCreateDocParserCtxt: 10041 * @cur: a pointer to an array of xmlChar 10042 * 10043 * Creates a parser context for an XML in-memory document. 10044 * 10045 * Returns the new parser context or NULL 10046 */ 10047xmlParserCtxtPtr 10048xmlCreateDocParserCtxt(xmlChar *cur) { 10049 int len; 10050 10051 if (cur == NULL) 10052 return(NULL); 10053 len = xmlStrlen(cur); 10054 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 10055} 10056 10057/** 10058 * xmlSAXParseDoc: 10059 * @sax: the SAX handler block 10060 * @cur: a pointer to an array of xmlChar 10061 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 10062 * documents 10063 * 10064 * parse an XML in-memory document and build a tree. 10065 * It use the given SAX function block to handle the parsing callback. 10066 * If sax is NULL, fallback to the default DOM tree building routines. 10067 * 10068 * Returns the resulting document tree 10069 */ 10070 10071xmlDocPtr 10072xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 10073 xmlDocPtr ret; 10074 xmlParserCtxtPtr ctxt; 10075 10076 if (cur == NULL) return(NULL); 10077 10078 10079 ctxt = xmlCreateDocParserCtxt(cur); 10080 if (ctxt == NULL) return(NULL); 10081 if (sax != NULL) { 10082 ctxt->sax = sax; 10083 ctxt->userData = NULL; 10084 } 10085 10086 xmlParseDocument(ctxt); 10087 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 10088 else { 10089 ret = NULL; 10090 xmlFreeDoc(ctxt->myDoc); 10091 ctxt->myDoc = NULL; 10092 } 10093 if (sax != NULL) 10094 ctxt->sax = NULL; 10095 xmlFreeParserCtxt(ctxt); 10096 10097 return(ret); 10098} 10099 10100/** 10101 * xmlParseDoc: 10102 * @cur: a pointer to an array of xmlChar 10103 * 10104 * parse an XML in-memory document and build a tree. 10105 * 10106 * Returns the resulting document tree 10107 */ 10108 10109xmlDocPtr 10110xmlParseDoc(xmlChar *cur) { 10111 return(xmlSAXParseDoc(NULL, cur, 0)); 10112} 10113 10114 10115/************************************************************************ 10116 * * 10117 * Miscellaneous * 10118 * * 10119 ************************************************************************/ 10120 10121#ifdef LIBXML_XPATH_ENABLED 10122#include <libxml/xpath.h> 10123#endif 10124 10125static int xmlParserInitialized = 0; 10126 10127/** 10128 * xmlInitParser: 10129 * 10130 * Initialization function for the XML parser. 10131 * This is not reentrant. Call once before processing in case of 10132 * use in multithreaded programs. 10133 */ 10134 10135void 10136xmlInitParser(void) { 10137 if (xmlParserInitialized) return; 10138 10139 xmlInitCharEncodingHandlers(); 10140 xmlInitializePredefinedEntities(); 10141 xmlDefaultSAXHandlerInit(); 10142 xmlRegisterDefaultInputCallbacks(); 10143 xmlRegisterDefaultOutputCallbacks(); 10144#ifdef LIBXML_HTML_ENABLED 10145 htmlInitAutoClose(); 10146 htmlDefaultSAXHandlerInit(); 10147#endif 10148#ifdef LIBXML_XPATH_ENABLED 10149 xmlXPathInit(); 10150#endif 10151 xmlParserInitialized = 1; 10152} 10153 10154/** 10155 * xmlCleanupParser: 10156 * 10157 * Cleanup function for the XML parser. It tries to reclaim all 10158 * parsing related global memory allocated for the parser processing. 10159 * It doesn't deallocate any document related memory. Calling this 10160 * function should not prevent reusing the parser. 10161 */ 10162 10163void 10164xmlCleanupParser(void) { 10165 xmlParserInitialized = 0; 10166 xmlCleanupCharEncodingHandlers(); 10167 xmlCleanupPredefinedEntities(); 10168#ifdef LIBXML_CATALOG_ENABLED 10169 xmlCatalogCleanup(); 10170#endif 10171} 10172 10173