parser.c revision 82ab81e92ba902bf82b58d04f0b6924d2061b696
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscelaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAx callbacks or as standalones functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * Daniel.Veillard@w3.org 31 * 32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue 33 * and xmlDoValidityCheckingDefaultValue for VMS 34 */ 35 36#ifdef WIN32 37#include "win32config.h" 38#define XML_DIR_SEP '\\' 39#else 40#include "config.h" 41#define XML_DIR_SEP '/' 42#endif 43 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <libxml/xmlmemory.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57 58#ifdef HAVE_CTYPE_H 59#include <ctype.h> 60#endif 61#ifdef HAVE_STDLIB_H 62#include <stdlib.h> 63#endif 64#ifdef HAVE_SYS_STAT_H 65#include <sys/stat.h> 66#endif 67#ifdef HAVE_FCNTL_H 68#include <fcntl.h> 69#endif 70#ifdef HAVE_UNISTD_H 71#include <unistd.h> 72#endif 73#ifdef HAVE_ZLIB_H 74#include <zlib.h> 75#endif 76 77 78#define XML_PARSER_BIG_BUFFER_SIZE 300 79#define XML_PARSER_BUFFER_SIZE 100 80 81/* 82 * Various global defaults for parsing 83 */ 84int xmlGetWarningsDefaultValue = 1; 85int xmlParserDebugEntities = 0; 86#ifdef VMS 87int xmlSubstituteEntitiesDefaultVal = 0; 88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal 89int xmlDoValidityCheckingDefaultVal = 0; 90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal 91#else 92int xmlSubstituteEntitiesDefaultValue = 0; 93int xmlDoValidityCheckingDefaultValue = 0; 94#endif 95int xmlLoadExtDtdDefaultValue = 0; 96int xmlPedanticParserDefaultValue = 0; 97int xmlKeepBlanksDefaultValue = 1; 98 99/* 100 * List of XML prefixed PI allowed by W3C specs 101 */ 102 103const char *xmlW3CPIs[] = { 104 "xml-stylesheet", 105 NULL 106}; 107 108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 111 const xmlChar **str); 112 113 114/************************************************************************ 115 * * 116 * Parser stacks related functions and macros * 117 * * 118 ************************************************************************/ 119 120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 121 const xmlChar ** str); 122 123/* 124 * Generic function for accessing stacks in the Parser Context 125 */ 126 127#define PUSH_AND_POP(scope, type, name) \ 128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 129 if (ctxt->name##Nr >= ctxt->name##Max) { \ 130 ctxt->name##Max *= 2; \ 131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ 132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 133 if (ctxt->name##Tab == NULL) { \ 134 xmlGenericError(xmlGenericErrorContext, \ 135 "realloc failed !\n"); \ 136 return(0); \ 137 } \ 138 } \ 139 ctxt->name##Tab[ctxt->name##Nr] = value; \ 140 ctxt->name = value; \ 141 return(ctxt->name##Nr++); \ 142} \ 143scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 144 type ret; \ 145 if (ctxt->name##Nr <= 0) return(0); \ 146 ctxt->name##Nr--; \ 147 if (ctxt->name##Nr > 0) \ 148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 149 else \ 150 ctxt->name = NULL; \ 151 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 152 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 153 return(ret); \ 154} \ 155 156/* 157 * Those macros actually generate the functions 158 */ 159PUSH_AND_POP(extern, xmlParserInputPtr, input) 160PUSH_AND_POP(extern, xmlNodePtr, node) 161PUSH_AND_POP(extern, xmlChar*, name) 162 163int spacePush(xmlParserCtxtPtr ctxt, int val) { 164 if (ctxt->spaceNr >= ctxt->spaceMax) { 165 ctxt->spaceMax *= 2; 166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 168 if (ctxt->spaceTab == NULL) { 169 xmlGenericError(xmlGenericErrorContext, 170 "realloc failed !\n"); 171 return(0); 172 } 173 } 174 ctxt->spaceTab[ctxt->spaceNr] = val; 175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 176 return(ctxt->spaceNr++); 177} 178 179int spacePop(xmlParserCtxtPtr ctxt) { 180 int ret; 181 if (ctxt->spaceNr <= 0) return(0); 182 ctxt->spaceNr--; 183 if (ctxt->spaceNr > 0) 184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 185 else 186 ctxt->space = NULL; 187 ret = ctxt->spaceTab[ctxt->spaceNr]; 188 ctxt->spaceTab[ctxt->spaceNr] = -1; 189 return(ret); 190} 191 192/* 193 * Macros for accessing the content. Those should be used only by the parser, 194 * and not exported. 195 * 196 * Dirty macros, i.e. one often need to make assumption on the context to 197 * use them 198 * 199 * CUR_PTR return the current pointer to the xmlChar to be parsed. 200 * To be used with extreme caution since operations consuming 201 * characters may move the input buffer to a different location ! 202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 203 * This should be used internally by the parser 204 * only to compare to ASCII values otherwise it would break when 205 * running with UTF-8 encoding. 206 * RAW same as CUR but in the input buffer, bypass any token 207 * extraction that may have been done 208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 209 * to compare on ASCII based substring. 210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 211 * strings within the parser. 212 * 213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 214 * 215 * NEXT Skip to the next character, this does the proper decoding 216 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 217 * NEXTL(l) Skip l xmlChars in the input buffer 218 * CUR_CHAR(l) returns the current unicode character (int), set l 219 * to the number of xmlChars used for the encoding [0-5]. 220 * CUR_SCHAR same but operate on a string instead of the context 221 * COPY_BUF copy the current unicode char to the target buffer, increment 222 * the index 223 * GROW, SHRINK handling of input buffers 224 */ 225 226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 228#define NXT(val) ctxt->input->cur[(val)] 229#define CUR_PTR ctxt->input->cur 230 231#define SKIP(val) do { \ 232 ctxt->nbChars += (val),ctxt->input->cur += (val); \ 233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 234 if ((*ctxt->input->cur == 0) && \ 235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 236 xmlPopInput(ctxt); \ 237 } while (0) 238 239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ 240 xmlParserInputShrink(ctxt->input); \ 241 if ((*ctxt->input->cur == 0) && \ 242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 243 xmlPopInput(ctxt); \ 244 } 245 246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ 247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 248 if ((*ctxt->input->cur == 0) && \ 249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 250 xmlPopInput(ctxt); \ 251 } 252 253#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 254 255#define NEXT xmlNextChar(ctxt) 256 257#define NEXT1 { \ 258 ctxt->input->cur++; \ 259 ctxt->nbChars++; \ 260 if (*ctxt->input->cur == 0) \ 261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 262 } 263 264#define NEXTL(l) do { \ 265 if (*(ctxt->input->cur) == '\n') { \ 266 ctxt->input->line++; ctxt->input->col = 1; \ 267 } else ctxt->input->col++; \ 268 ctxt->token = 0; ctxt->input->cur += l; \ 269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 270 } while (0) 271 272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 274 275#define COPY_BUF(l,b,i,v) \ 276 if (l == 1) b[i++] = (xmlChar) v; \ 277 else i += xmlCopyChar(l,&b[i],v) 278 279/** 280 * xmlSkipBlankChars: 281 * @ctxt: the XML parser context 282 * 283 * skip all blanks character found at that point in the input streams. 284 * It pops up finished entities in the process if allowable at that point. 285 * 286 * Returns the number of space chars skipped 287 */ 288 289int 290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 291 int cur, res = 0; 292 293 /* 294 * It's Okay to use CUR/NEXT here since all the blanks are on 295 * the ASCII range. 296 */ 297 do { 298 cur = CUR; 299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 300 NEXT; 301 cur = CUR; 302 res++; 303 } 304 while ((cur == 0) && (ctxt->inputNr > 1) && 305 (ctxt->instate != XML_PARSER_COMMENT)) { 306 xmlPopInput(ctxt); 307 cur = CUR; 308 } 309 /* 310 * Need to handle support of entities branching here 311 */ 312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */ 314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 315 return(res); 316} 317 318/************************************************************************ 319 * * 320 * Commodity functions to handle entities * 321 * * 322 ************************************************************************/ 323 324/** 325 * xmlPopInput: 326 * @ctxt: an XML parser context 327 * 328 * xmlPopInput: the current input pointed by ctxt->input came to an end 329 * pop it and return the next char. 330 * 331 * Returns the current xmlChar in the parser context 332 */ 333xmlChar 334xmlPopInput(xmlParserCtxtPtr ctxt) { 335 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 336 if (xmlParserDebugEntities) 337 xmlGenericError(xmlGenericErrorContext, 338 "Popping input %d\n", ctxt->inputNr); 339 xmlFreeInputStream(inputPop(ctxt)); 340 if ((*ctxt->input->cur == 0) && 341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 342 return(xmlPopInput(ctxt)); 343 return(CUR); 344} 345 346/** 347 * xmlPushInput: 348 * @ctxt: an XML parser context 349 * @input: an XML parser input fragment (entity, XML fragment ...). 350 * 351 * xmlPushInput: switch to a new input stream which is stacked on top 352 * of the previous one(s). 353 */ 354void 355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 356 if (input == NULL) return; 357 358 if (xmlParserDebugEntities) { 359 if ((ctxt->input != NULL) && (ctxt->input->filename)) 360 xmlGenericError(xmlGenericErrorContext, 361 "%s(%d): ", ctxt->input->filename, 362 ctxt->input->line); 363 xmlGenericError(xmlGenericErrorContext, 364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 365 } 366 inputPush(ctxt, input); 367 GROW; 368} 369 370/** 371 * xmlParseCharRef: 372 * @ctxt: an XML parser context 373 * 374 * parse Reference declarations 375 * 376 * [66] CharRef ::= '&#' [0-9]+ ';' | 377 * '&#x' [0-9a-fA-F]+ ';' 378 * 379 * [ WFC: Legal Character ] 380 * Characters referred to using character references must match the 381 * production for Char. 382 * 383 * Returns the value parsed (as an int), 0 in case of error 384 */ 385int 386xmlParseCharRef(xmlParserCtxtPtr ctxt) { 387 int val = 0; 388 int count = 0; 389 390 if (ctxt->token != 0) { 391 val = ctxt->token; 392 ctxt->token = 0; 393 return(val); 394 } 395 /* 396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 397 */ 398 if ((RAW == '&') && (NXT(1) == '#') && 399 (NXT(2) == 'x')) { 400 SKIP(3); 401 GROW; 402 while (RAW != ';') { /* loop blocked by count */ 403 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 404 val = val * 16 + (CUR - '0'); 405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 406 val = val * 16 + (CUR - 'a') + 10; 407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 408 val = val * 16 + (CUR - 'A') + 10; 409 else { 410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 412 ctxt->sax->error(ctxt->userData, 413 "xmlParseCharRef: invalid hexadecimal value\n"); 414 ctxt->wellFormed = 0; 415 ctxt->disableSAX = 1; 416 val = 0; 417 break; 418 } 419 NEXT; 420 count++; 421 } 422 if (RAW == ';') { 423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 424 ctxt->nbChars ++; 425 ctxt->input->cur++; 426 } 427 } else if ((RAW == '&') && (NXT(1) == '#')) { 428 SKIP(2); 429 GROW; 430 while (RAW != ';') { /* loop blocked by count */ 431 if ((RAW >= '0') && (RAW <= '9') && (count < 20)) 432 val = val * 10 + (CUR - '0'); 433 else { 434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 436 ctxt->sax->error(ctxt->userData, 437 "xmlParseCharRef: invalid decimal value\n"); 438 ctxt->wellFormed = 0; 439 ctxt->disableSAX = 1; 440 val = 0; 441 break; 442 } 443 NEXT; 444 count++; 445 } 446 if (RAW == ';') { 447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 448 ctxt->nbChars ++; 449 ctxt->input->cur++; 450 } 451 } else { 452 ctxt->errNo = XML_ERR_INVALID_CHARREF; 453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 454 ctxt->sax->error(ctxt->userData, 455 "xmlParseCharRef: invalid value\n"); 456 ctxt->wellFormed = 0; 457 ctxt->disableSAX = 1; 458 } 459 460 /* 461 * [ WFC: Legal Character ] 462 * Characters referred to using character references must match the 463 * production for Char. 464 */ 465 if (IS_CHAR(val)) { 466 return(val); 467 } else { 468 ctxt->errNo = XML_ERR_INVALID_CHAR; 469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 471 val); 472 ctxt->wellFormed = 0; 473 ctxt->disableSAX = 1; 474 } 475 return(0); 476} 477 478/** 479 * xmlParseStringCharRef: 480 * @ctxt: an XML parser context 481 * @str: a pointer to an index in the string 482 * 483 * parse Reference declarations, variant parsing from a string rather 484 * than an an input flow. 485 * 486 * [66] CharRef ::= '&#' [0-9]+ ';' | 487 * '&#x' [0-9a-fA-F]+ ';' 488 * 489 * [ WFC: Legal Character ] 490 * Characters referred to using character references must match the 491 * production for Char. 492 * 493 * Returns the value parsed (as an int), 0 in case of error, str will be 494 * updated to the current value of the index 495 */ 496int 497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 498 const xmlChar *ptr; 499 xmlChar cur; 500 int val = 0; 501 502 if ((str == NULL) || (*str == NULL)) return(0); 503 ptr = *str; 504 cur = *ptr; 505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 506 ptr += 3; 507 cur = *ptr; 508 while (cur != ';') { /* Non input consuming loop */ 509 if ((cur >= '0') && (cur <= '9')) 510 val = val * 16 + (cur - '0'); 511 else if ((cur >= 'a') && (cur <= 'f')) 512 val = val * 16 + (cur - 'a') + 10; 513 else if ((cur >= 'A') && (cur <= 'F')) 514 val = val * 16 + (cur - 'A') + 10; 515 else { 516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 518 ctxt->sax->error(ctxt->userData, 519 "xmlParseStringCharRef: invalid hexadecimal value\n"); 520 ctxt->wellFormed = 0; 521 ctxt->disableSAX = 1; 522 val = 0; 523 break; 524 } 525 ptr++; 526 cur = *ptr; 527 } 528 if (cur == ';') 529 ptr++; 530 } else if ((cur == '&') && (ptr[1] == '#')){ 531 ptr += 2; 532 cur = *ptr; 533 while (cur != ';') { /* Non input consuming loops */ 534 if ((cur >= '0') && (cur <= '9')) 535 val = val * 10 + (cur - '0'); 536 else { 537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 539 ctxt->sax->error(ctxt->userData, 540 "xmlParseStringCharRef: invalid decimal value\n"); 541 ctxt->wellFormed = 0; 542 ctxt->disableSAX = 1; 543 val = 0; 544 break; 545 } 546 ptr++; 547 cur = *ptr; 548 } 549 if (cur == ';') 550 ptr++; 551 } else { 552 ctxt->errNo = XML_ERR_INVALID_CHARREF; 553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 554 ctxt->sax->error(ctxt->userData, 555 "xmlParseCharRef: invalid value\n"); 556 ctxt->wellFormed = 0; 557 ctxt->disableSAX = 1; 558 return(0); 559 } 560 *str = ptr; 561 562 /* 563 * [ WFC: Legal Character ] 564 * Characters referred to using character references must match the 565 * production for Char. 566 */ 567 if (IS_CHAR(val)) { 568 return(val); 569 } else { 570 ctxt->errNo = XML_ERR_INVALID_CHAR; 571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 572 ctxt->sax->error(ctxt->userData, 573 "CharRef: invalid xmlChar value %d\n", val); 574 ctxt->wellFormed = 0; 575 ctxt->disableSAX = 1; 576 } 577 return(0); 578} 579 580/** 581 * xmlParserHandlePEReference: 582 * @ctxt: the parser context 583 * 584 * [69] PEReference ::= '%' Name ';' 585 * 586 * [ WFC: No Recursion ] 587 * A parsed entity must not contain a recursive 588 * reference to itself, either directly or indirectly. 589 * 590 * [ WFC: Entity Declared ] 591 * In a document without any DTD, a document with only an internal DTD 592 * subset which contains no parameter entity references, or a document 593 * with "standalone='yes'", ... ... The declaration of a parameter 594 * entity must precede any reference to it... 595 * 596 * [ VC: Entity Declared ] 597 * In a document with an external subset or external parameter entities 598 * with "standalone='no'", ... ... The declaration of a parameter entity 599 * must precede any reference to it... 600 * 601 * [ WFC: In DTD ] 602 * Parameter-entity references may only appear in the DTD. 603 * NOTE: misleading but this is handled. 604 * 605 * A PEReference may have been detected in the current input stream 606 * the handling is done accordingly to 607 * http://www.w3.org/TR/REC-xml#entproc 608 * i.e. 609 * - Included in literal in entity values 610 * - Included as Paraemeter Entity reference within DTDs 611 */ 612void 613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 614 xmlChar *name; 615 xmlEntityPtr entity = NULL; 616 xmlParserInputPtr input; 617 618 if (ctxt->token != 0) { 619 return; 620 } 621 if (RAW != '%') return; 622 switch(ctxt->instate) { 623 case XML_PARSER_CDATA_SECTION: 624 return; 625 case XML_PARSER_COMMENT: 626 return; 627 case XML_PARSER_START_TAG: 628 return; 629 case XML_PARSER_END_TAG: 630 return; 631 case XML_PARSER_EOF: 632 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 635 ctxt->wellFormed = 0; 636 ctxt->disableSAX = 1; 637 return; 638 case XML_PARSER_PROLOG: 639 case XML_PARSER_START: 640 case XML_PARSER_MISC: 641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 644 ctxt->wellFormed = 0; 645 ctxt->disableSAX = 1; 646 return; 647 case XML_PARSER_ENTITY_DECL: 648 case XML_PARSER_CONTENT: 649 case XML_PARSER_ATTRIBUTE_VALUE: 650 case XML_PARSER_PI: 651 case XML_PARSER_SYSTEM_LITERAL: 652 /* we just ignore it there */ 653 return; 654 case XML_PARSER_EPILOG: 655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 658 ctxt->wellFormed = 0; 659 ctxt->disableSAX = 1; 660 return; 661 case XML_PARSER_ENTITY_VALUE: 662 /* 663 * NOTE: in the case of entity values, we don't do the 664 * substitution here since we need the literal 665 * entity value to be able to save the internal 666 * subset of the document. 667 * This will be handled by xmlStringDecodeEntities 668 */ 669 return; 670 case XML_PARSER_DTD: 671 /* 672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 673 * In the internal DTD subset, parameter-entity references 674 * can occur only where markup declarations can occur, not 675 * within markup declarations. 676 * In that case this is handled in xmlParseMarkupDecl 677 */ 678 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 679 return; 680 break; 681 case XML_PARSER_IGNORE: 682 return; 683 } 684 685 NEXT; 686 name = xmlParseName(ctxt); 687 if (xmlParserDebugEntities) 688 xmlGenericError(xmlGenericErrorContext, 689 "PE Reference: %s\n", name); 690 if (name == NULL) { 691 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 694 ctxt->wellFormed = 0; 695 ctxt->disableSAX = 1; 696 } else { 697 if (RAW == ';') { 698 NEXT; 699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 701 if (entity == NULL) { 702 703 /* 704 * [ WFC: Entity Declared ] 705 * In a document without any DTD, a document with only an 706 * internal DTD subset which contains no parameter entity 707 * references, or a document with "standalone='yes'", ... 708 * ... The declaration of a parameter entity must precede 709 * any reference to it... 710 */ 711 if ((ctxt->standalone == 1) || 712 ((ctxt->hasExternalSubset == 0) && 713 (ctxt->hasPErefs == 0))) { 714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 715 ctxt->sax->error(ctxt->userData, 716 "PEReference: %%%s; not found\n", name); 717 ctxt->wellFormed = 0; 718 ctxt->disableSAX = 1; 719 } else { 720 /* 721 * [ VC: Entity Declared ] 722 * In a document with an external subset or external 723 * parameter entities with "standalone='no'", ... 724 * ... The declaration of a parameter entity must precede 725 * any reference to it... 726 */ 727 if ((!ctxt->disableSAX) && 728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) { 729 ctxt->vctxt.error(ctxt->vctxt.userData, 730 "PEReference: %%%s; not found\n", name); 731 } else if ((!ctxt->disableSAX) && 732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 733 ctxt->sax->warning(ctxt->userData, 734 "PEReference: %%%s; not found\n", name); 735 ctxt->valid = 0; 736 } 737 } else { 738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 740 /* 741 * handle the extra spaces added before and after 742 * c.f. http://www.w3.org/TR/REC-xml#as-PE 743 * this is done independantly. 744 */ 745 input = xmlNewEntityInputStream(ctxt, entity); 746 xmlPushInput(ctxt, input); 747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 748 (RAW == '<') && (NXT(1) == '?') && 749 (NXT(2) == 'x') && (NXT(3) == 'm') && 750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 751 xmlParseTextDecl(ctxt); 752 } 753 if (ctxt->token == 0) 754 ctxt->token = ' '; 755 } else { 756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 757 ctxt->sax->error(ctxt->userData, 758 "xmlHandlePEReference: %s is not a parameter entity\n", 759 name); 760 ctxt->wellFormed = 0; 761 ctxt->disableSAX = 1; 762 } 763 } 764 } else { 765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 767 ctxt->sax->error(ctxt->userData, 768 "xmlHandlePEReference: expecting ';'\n"); 769 ctxt->wellFormed = 0; 770 ctxt->disableSAX = 1; 771 } 772 xmlFree(name); 773 } 774} 775 776/* 777 * Macro used to grow the current buffer. 778 */ 779#define growBuffer(buffer) { \ 780 buffer##_size *= 2; \ 781 buffer = (xmlChar *) \ 782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 783 if (buffer == NULL) { \ 784 perror("realloc failed"); \ 785 return(NULL); \ 786 } \ 787} 788 789/** 790 * xmlStringDecodeEntities: 791 * @ctxt: the parser context 792 * @str: the input string 793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 794 * @end: an end marker xmlChar, 0 if none 795 * @end2: an end marker xmlChar, 0 if none 796 * @end3: an end marker xmlChar, 0 if none 797 * 798 * Takes a entity string content and process to do the adequate subtitutions. 799 * 800 * [67] Reference ::= EntityRef | CharRef 801 * 802 * [69] PEReference ::= '%' Name ';' 803 * 804 * Returns A newly allocated string with the substitution done. The caller 805 * must deallocate it ! 806 */ 807xmlChar * 808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 809 xmlChar end, xmlChar end2, xmlChar end3) { 810 xmlChar *buffer = NULL; 811 int buffer_size = 0; 812 813 xmlChar *current = NULL; 814 xmlEntityPtr ent; 815 int c,l; 816 int nbchars = 0; 817 818 if (str == NULL) 819 return(NULL); 820 821 if (ctxt->depth > 40) { 822 ctxt->errNo = XML_ERR_ENTITY_LOOP; 823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 824 ctxt->sax->error(ctxt->userData, 825 "Detected entity reference loop\n"); 826 ctxt->wellFormed = 0; 827 ctxt->disableSAX = 1; 828 return(NULL); 829 } 830 831 /* 832 * allocate a translation buffer. 833 */ 834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 836 if (buffer == NULL) { 837 perror("xmlDecodeEntities: malloc failed"); 838 return(NULL); 839 } 840 841 /* 842 * Ok loop until we reach one of the ending char or a size limit. 843 * we are operating on already parsed values. 844 */ 845 c = CUR_SCHAR(str, l); 846 while ((c != 0) && (c != end) && /* non input consuming loop */ 847 (c != end2) && (c != end3)) { 848 849 if (c == 0) break; 850 if ((c == '&') && (str[1] == '#')) { 851 int val = xmlParseStringCharRef(ctxt, &str); 852 if (val != 0) { 853 COPY_BUF(0,buffer,nbchars,val); 854 } 855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 856 if (xmlParserDebugEntities) 857 xmlGenericError(xmlGenericErrorContext, 858 "String decoding Entity Reference: %.30s\n", 859 str); 860 ent = xmlParseStringEntityRef(ctxt, &str); 861 if ((ent != NULL) && 862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 863 if (ent->content != NULL) { 864 COPY_BUF(0,buffer,nbchars,ent->content[0]); 865 } else { 866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 867 ctxt->sax->error(ctxt->userData, 868 "internal error entity has no content\n"); 869 } 870 } else if ((ent != NULL) && (ent->content != NULL)) { 871 xmlChar *rep; 872 873 ctxt->depth++; 874 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 875 0, 0, 0); 876 ctxt->depth--; 877 if (rep != NULL) { 878 current = rep; 879 while (*current != 0) { /* non input consuming loop */ 880 buffer[nbchars++] = *current++; 881 if (nbchars > 882 buffer_size - XML_PARSER_BUFFER_SIZE) { 883 growBuffer(buffer); 884 } 885 } 886 xmlFree(rep); 887 } 888 } else if (ent != NULL) { 889 int i = xmlStrlen(ent->name); 890 const xmlChar *cur = ent->name; 891 892 buffer[nbchars++] = '&'; 893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 894 growBuffer(buffer); 895 } 896 for (;i > 0;i--) 897 buffer[nbchars++] = *cur++; 898 buffer[nbchars++] = ';'; 899 } 900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 901 if (xmlParserDebugEntities) 902 xmlGenericError(xmlGenericErrorContext, 903 "String decoding PE Reference: %.30s\n", str); 904 ent = xmlParseStringPEReference(ctxt, &str); 905 if (ent != NULL) { 906 xmlChar *rep; 907 908 ctxt->depth++; 909 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 910 0, 0, 0); 911 ctxt->depth--; 912 if (rep != NULL) { 913 current = rep; 914 while (*current != 0) { /* non input consuming loop */ 915 buffer[nbchars++] = *current++; 916 if (nbchars > 917 buffer_size - XML_PARSER_BUFFER_SIZE) { 918 growBuffer(buffer); 919 } 920 } 921 xmlFree(rep); 922 } 923 } 924 } else { 925 COPY_BUF(l,buffer,nbchars,c); 926 str += l; 927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 928 growBuffer(buffer); 929 } 930 } 931 c = CUR_SCHAR(str, l); 932 } 933 buffer[nbchars++] = 0; 934 return(buffer); 935} 936 937 938/************************************************************************ 939 * * 940 * Commodity functions to handle xmlChars * 941 * * 942 ************************************************************************/ 943 944/** 945 * xmlStrndup: 946 * @cur: the input xmlChar * 947 * @len: the len of @cur 948 * 949 * a strndup for array of xmlChar's 950 * 951 * Returns a new xmlChar * or NULL 952 */ 953xmlChar * 954xmlStrndup(const xmlChar *cur, int len) { 955 xmlChar *ret; 956 957 if ((cur == NULL) || (len < 0)) return(NULL); 958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 959 if (ret == NULL) { 960 xmlGenericError(xmlGenericErrorContext, 961 "malloc of %ld byte failed\n", 962 (len + 1) * (long)sizeof(xmlChar)); 963 return(NULL); 964 } 965 memcpy(ret, cur, len * sizeof(xmlChar)); 966 ret[len] = 0; 967 return(ret); 968} 969 970/** 971 * xmlStrdup: 972 * @cur: the input xmlChar * 973 * 974 * a strdup for array of xmlChar's. Since they are supposed to be 975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 976 * a termination mark of '0'. 977 * 978 * Returns a new xmlChar * or NULL 979 */ 980xmlChar * 981xmlStrdup(const xmlChar *cur) { 982 const xmlChar *p = cur; 983 984 if (cur == NULL) return(NULL); 985 while (*p != 0) p++; /* non input consuming */ 986 return(xmlStrndup(cur, p - cur)); 987} 988 989/** 990 * xmlCharStrndup: 991 * @cur: the input char * 992 * @len: the len of @cur 993 * 994 * a strndup for char's to xmlChar's 995 * 996 * Returns a new xmlChar * or NULL 997 */ 998 999xmlChar * 1000xmlCharStrndup(const char *cur, int len) { 1001 int i; 1002 xmlChar *ret; 1003 1004 if ((cur == NULL) || (len < 0)) return(NULL); 1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); 1006 if (ret == NULL) { 1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", 1008 (len + 1) * (long)sizeof(xmlChar)); 1009 return(NULL); 1010 } 1011 for (i = 0;i < len;i++) 1012 ret[i] = (xmlChar) cur[i]; 1013 ret[len] = 0; 1014 return(ret); 1015} 1016 1017/** 1018 * xmlCharStrdup: 1019 * @cur: the input char * 1020 * @len: the len of @cur 1021 * 1022 * a strdup for char's to xmlChar's 1023 * 1024 * Returns a new xmlChar * or NULL 1025 */ 1026 1027xmlChar * 1028xmlCharStrdup(const char *cur) { 1029 const char *p = cur; 1030 1031 if (cur == NULL) return(NULL); 1032 while (*p != '\0') p++; /* non input consuming */ 1033 return(xmlCharStrndup(cur, p - cur)); 1034} 1035 1036/** 1037 * xmlStrcmp: 1038 * @str1: the first xmlChar * 1039 * @str2: the second xmlChar * 1040 * 1041 * a strcmp for xmlChar's 1042 * 1043 * Returns the integer result of the comparison 1044 */ 1045 1046int 1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1048 register int tmp; 1049 1050 if (str1 == str2) return(0); 1051 if (str1 == NULL) return(-1); 1052 if (str2 == NULL) return(1); 1053 do { 1054 tmp = *str1++ - *str2; 1055 if (tmp != 0) return(tmp); 1056 } while (*str2++ != 0); 1057 return 0; 1058} 1059 1060/** 1061 * xmlStrEqual: 1062 * @str1: the first xmlChar * 1063 * @str2: the second xmlChar * 1064 * 1065 * Check if both string are equal of have same content 1066 * Should be a bit more readable and faster than xmlStrEqual() 1067 * 1068 * Returns 1 if they are equal, 0 if they are different 1069 */ 1070 1071int 1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 1073 if (str1 == str2) return(1); 1074 if (str1 == NULL) return(0); 1075 if (str2 == NULL) return(0); 1076 do { 1077 if (*str1++ != *str2) return(0); 1078 } while (*str2++); 1079 return(1); 1080} 1081 1082/** 1083 * xmlStrncmp: 1084 * @str1: the first xmlChar * 1085 * @str2: the second xmlChar * 1086 * @len: the max comparison length 1087 * 1088 * a strncmp for xmlChar's 1089 * 1090 * Returns the integer result of the comparison 1091 */ 1092 1093int 1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1095 register int tmp; 1096 1097 if (len <= 0) return(0); 1098 if (str1 == str2) return(0); 1099 if (str1 == NULL) return(-1); 1100 if (str2 == NULL) return(1); 1101 do { 1102 tmp = *str1++ - *str2; 1103 if (tmp != 0 || --len == 0) return(tmp); 1104 } while (*str2++ != 0); 1105 return 0; 1106} 1107 1108static xmlChar casemap[256] = { 1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 1141}; 1142 1143/** 1144 * xmlStrcasecmp: 1145 * @str1: the first xmlChar * 1146 * @str2: the second xmlChar * 1147 * 1148 * a strcasecmp for xmlChar's 1149 * 1150 * Returns the integer result of the comparison 1151 */ 1152 1153int 1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 1155 register int tmp; 1156 1157 if (str1 == str2) return(0); 1158 if (str1 == NULL) return(-1); 1159 if (str2 == NULL) return(1); 1160 do { 1161 tmp = casemap[*str1++] - casemap[*str2]; 1162 if (tmp != 0) return(tmp); 1163 } while (*str2++ != 0); 1164 return 0; 1165} 1166 1167/** 1168 * xmlStrncasecmp: 1169 * @str1: the first xmlChar * 1170 * @str2: the second xmlChar * 1171 * @len: the max comparison length 1172 * 1173 * a strncasecmp for xmlChar's 1174 * 1175 * Returns the integer result of the comparison 1176 */ 1177 1178int 1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 1180 register int tmp; 1181 1182 if (len <= 0) return(0); 1183 if (str1 == str2) return(0); 1184 if (str1 == NULL) return(-1); 1185 if (str2 == NULL) return(1); 1186 do { 1187 tmp = casemap[*str1++] - casemap[*str2]; 1188 if (tmp != 0 || --len == 0) return(tmp); 1189 } while (*str2++ != 0); 1190 return 0; 1191} 1192 1193/** 1194 * xmlStrchr: 1195 * @str: the xmlChar * array 1196 * @val: the xmlChar to search 1197 * 1198 * a strchr for xmlChar's 1199 * 1200 * Returns the xmlChar * for the first occurence or NULL. 1201 */ 1202 1203const xmlChar * 1204xmlStrchr(const xmlChar *str, xmlChar val) { 1205 if (str == NULL) return(NULL); 1206 while (*str != 0) { /* non input consuming */ 1207 if (*str == val) return((xmlChar *) str); 1208 str++; 1209 } 1210 return(NULL); 1211} 1212 1213/** 1214 * xmlStrstr: 1215 * @str: the xmlChar * array (haystack) 1216 * @val: the xmlChar to search (needle) 1217 * 1218 * a strstr for xmlChar's 1219 * 1220 * Returns the xmlChar * for the first occurence or NULL. 1221 */ 1222 1223const xmlChar * 1224xmlStrstr(const xmlChar *str, xmlChar *val) { 1225 int n; 1226 1227 if (str == NULL) return(NULL); 1228 if (val == NULL) return(NULL); 1229 n = xmlStrlen(val); 1230 1231 if (n == 0) return(str); 1232 while (*str != 0) { /* non input consuming */ 1233 if (*str == *val) { 1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1235 } 1236 str++; 1237 } 1238 return(NULL); 1239} 1240 1241/** 1242 * xmlStrcasestr: 1243 * @str: the xmlChar * array (haystack) 1244 * @val: the xmlChar to search (needle) 1245 * 1246 * a case-ignoring strstr for xmlChar's 1247 * 1248 * Returns the xmlChar * for the first occurence or NULL. 1249 */ 1250 1251const xmlChar * 1252xmlStrcasestr(const xmlChar *str, xmlChar *val) { 1253 int n; 1254 1255 if (str == NULL) return(NULL); 1256 if (val == NULL) return(NULL); 1257 n = xmlStrlen(val); 1258 1259 if (n == 0) return(str); 1260 while (*str != 0) { /* non input consuming */ 1261 if (casemap[*str] == casemap[*val]) 1262 if (!xmlStrncasecmp(str, val, n)) return(str); 1263 str++; 1264 } 1265 return(NULL); 1266} 1267 1268/** 1269 * xmlStrsub: 1270 * @str: the xmlChar * array (haystack) 1271 * @start: the index of the first char (zero based) 1272 * @len: the length of the substring 1273 * 1274 * Extract a substring of a given string 1275 * 1276 * Returns the xmlChar * for the first occurence or NULL. 1277 */ 1278 1279xmlChar * 1280xmlStrsub(const xmlChar *str, int start, int len) { 1281 int i; 1282 1283 if (str == NULL) return(NULL); 1284 if (start < 0) return(NULL); 1285 if (len < 0) return(NULL); 1286 1287 for (i = 0;i < start;i++) { 1288 if (*str == 0) return(NULL); 1289 str++; 1290 } 1291 if (*str == 0) return(NULL); 1292 return(xmlStrndup(str, len)); 1293} 1294 1295/** 1296 * xmlStrlen: 1297 * @str: the xmlChar * array 1298 * 1299 * length of a xmlChar's string 1300 * 1301 * Returns the number of xmlChar contained in the ARRAY. 1302 */ 1303 1304int 1305xmlStrlen(const xmlChar *str) { 1306 int len = 0; 1307 1308 if (str == NULL) return(0); 1309 while (*str != 0) { /* non input consuming */ 1310 str++; 1311 len++; 1312 } 1313 return(len); 1314} 1315 1316/** 1317 * xmlStrncat: 1318 * @cur: the original xmlChar * array 1319 * @add: the xmlChar * array added 1320 * @len: the length of @add 1321 * 1322 * a strncat for array of xmlChar's, it will extend cur with the len 1323 * first bytes of @add. 1324 * 1325 * Returns a new xmlChar *, the original @cur is reallocated if needed 1326 * and should not be freed 1327 */ 1328 1329xmlChar * 1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 1331 int size; 1332 xmlChar *ret; 1333 1334 if ((add == NULL) || (len == 0)) 1335 return(cur); 1336 if (cur == NULL) 1337 return(xmlStrndup(add, len)); 1338 1339 size = xmlStrlen(cur); 1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 1341 if (ret == NULL) { 1342 xmlGenericError(xmlGenericErrorContext, 1343 "xmlStrncat: realloc of %ld byte failed\n", 1344 (size + len + 1) * (long)sizeof(xmlChar)); 1345 return(cur); 1346 } 1347 memcpy(&ret[size], add, len * sizeof(xmlChar)); 1348 ret[size + len] = 0; 1349 return(ret); 1350} 1351 1352/** 1353 * xmlStrcat: 1354 * @cur: the original xmlChar * array 1355 * @add: the xmlChar * array added 1356 * 1357 * a strcat for array of xmlChar's. Since they are supposed to be 1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 1359 * a termination mark of '0'. 1360 * 1361 * Returns a new xmlChar * containing the concatenated string. 1362 */ 1363xmlChar * 1364xmlStrcat(xmlChar *cur, const xmlChar *add) { 1365 const xmlChar *p = add; 1366 1367 if (add == NULL) return(cur); 1368 if (cur == NULL) 1369 return(xmlStrdup(add)); 1370 1371 while (*p != 0) p++; /* non input consuming */ 1372 return(xmlStrncat(cur, add, p - add)); 1373} 1374 1375/************************************************************************ 1376 * * 1377 * Commodity functions, cleanup needed ? * 1378 * * 1379 ************************************************************************/ 1380 1381/** 1382 * areBlanks: 1383 * @ctxt: an XML parser context 1384 * @str: a xmlChar * 1385 * @len: the size of @str 1386 * 1387 * Is this a sequence of blank chars that one can ignore ? 1388 * 1389 * Returns 1 if ignorable 0 otherwise. 1390 */ 1391 1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 1393 int i, ret; 1394 xmlNodePtr lastChild; 1395 1396 if (ctxt->keepBlanks) 1397 return(0); 1398 1399 /* 1400 * Check for xml:space value. 1401 */ 1402 if (*(ctxt->space) == 1) 1403 return(0); 1404 1405 /* 1406 * Check that the string is made of blanks 1407 */ 1408 for (i = 0;i < len;i++) 1409 if (!(IS_BLANK(str[i]))) return(0); 1410 1411 /* 1412 * Look if the element is mixed content in the Dtd if available 1413 */ 1414 if (ctxt->myDoc != NULL) { 1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 1416 if (ret == 0) return(1); 1417 if (ret == 1) return(0); 1418 } 1419 1420 /* 1421 * Otherwise, heuristic :-\ 1422 */ 1423 if (RAW != '<') return(0); 1424 if (ctxt->node == NULL) return(0); 1425 if ((ctxt->node->children == NULL) && 1426 (RAW == '<') && (NXT(1) == '/')) return(0); 1427 1428 lastChild = xmlGetLastChild(ctxt->node); 1429 if (lastChild == NULL) { 1430 if (ctxt->node->content != NULL) return(0); 1431 } else if (xmlNodeIsText(lastChild)) 1432 return(0); 1433 else if ((ctxt->node->children != NULL) && 1434 (xmlNodeIsText(ctxt->node->children))) 1435 return(0); 1436 return(1); 1437} 1438 1439/* 1440 * Forward definition for recusive behaviour. 1441 */ 1442void xmlParsePEReference(xmlParserCtxtPtr ctxt); 1443void xmlParseReference(xmlParserCtxtPtr ctxt); 1444 1445/************************************************************************ 1446 * * 1447 * Extra stuff for namespace support * 1448 * Relates to http://www.w3.org/TR/WD-xml-names * 1449 * * 1450 ************************************************************************/ 1451 1452/** 1453 * xmlSplitQName: 1454 * @ctxt: an XML parser context 1455 * @name: an XML parser context 1456 * @prefix: a xmlChar ** 1457 * 1458 * parse an UTF8 encoded XML qualified name string 1459 * 1460 * [NS 5] QName ::= (Prefix ':')? LocalPart 1461 * 1462 * [NS 6] Prefix ::= NCName 1463 * 1464 * [NS 7] LocalPart ::= NCName 1465 * 1466 * Returns the local part, and prefix is updated 1467 * to get the Prefix if any. 1468 */ 1469 1470xmlChar * 1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 1472 xmlChar buf[XML_MAX_NAMELEN + 5]; 1473 xmlChar *buffer = NULL; 1474 int len = 0; 1475 int max = XML_MAX_NAMELEN; 1476 xmlChar *ret = NULL; 1477 const xmlChar *cur = name; 1478 int c; 1479 1480 *prefix = NULL; 1481 1482#ifndef XML_XML_NAMESPACE 1483 /* xml: prefix is not really a namespace */ 1484 if ((cur[0] == 'x') && (cur[1] == 'm') && 1485 (cur[2] == 'l') && (cur[3] == ':')) 1486 return(xmlStrdup(name)); 1487#endif 1488 1489 /* nasty but valid */ 1490 if (cur[0] == ':') 1491 return(xmlStrdup(name)); 1492 1493 c = *cur++; 1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 1495 buf[len++] = c; 1496 c = *cur++; 1497 } 1498 if (len >= max) { 1499 /* 1500 * Okay someone managed to make a huge name, so he's ready to pay 1501 * for the processing speed. 1502 */ 1503 max = len * 2; 1504 1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1506 if (buffer == NULL) { 1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1508 ctxt->sax->error(ctxt->userData, 1509 "xmlSplitQName: out of memory\n"); 1510 return(NULL); 1511 } 1512 memcpy(buffer, buf, len); 1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 1514 if (len + 10 > max) { 1515 max *= 2; 1516 buffer = (xmlChar *) xmlRealloc(buffer, 1517 max * sizeof(xmlChar)); 1518 if (buffer == NULL) { 1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1520 ctxt->sax->error(ctxt->userData, 1521 "xmlSplitQName: out of memory\n"); 1522 return(NULL); 1523 } 1524 } 1525 buffer[len++] = c; 1526 c = *cur++; 1527 } 1528 buffer[len] = 0; 1529 } 1530 1531 if (buffer == NULL) 1532 ret = xmlStrndup(buf, len); 1533 else { 1534 ret = buffer; 1535 buffer = NULL; 1536 max = XML_MAX_NAMELEN; 1537 } 1538 1539 1540 if (c == ':') { 1541 c = *cur++; 1542 if (c == 0) return(ret); 1543 *prefix = ret; 1544 len = 0; 1545 1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 1547 buf[len++] = c; 1548 c = *cur++; 1549 } 1550 if (len >= max) { 1551 /* 1552 * Okay someone managed to make a huge name, so he's ready to pay 1553 * for the processing speed. 1554 */ 1555 max = len * 2; 1556 1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1558 if (buffer == NULL) { 1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1560 ctxt->sax->error(ctxt->userData, 1561 "xmlSplitQName: out of memory\n"); 1562 return(NULL); 1563 } 1564 memcpy(buffer, buf, len); 1565 while (c != 0) { /* tested bigname2.xml */ 1566 if (len + 10 > max) { 1567 max *= 2; 1568 buffer = (xmlChar *) xmlRealloc(buffer, 1569 max * sizeof(xmlChar)); 1570 if (buffer == NULL) { 1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1572 ctxt->sax->error(ctxt->userData, 1573 "xmlSplitQName: out of memory\n"); 1574 return(NULL); 1575 } 1576 } 1577 buffer[len++] = c; 1578 c = *cur++; 1579 } 1580 buffer[len] = 0; 1581 } 1582 1583 if (buffer == NULL) 1584 ret = xmlStrndup(buf, len); 1585 else { 1586 ret = buffer; 1587 } 1588 } 1589 1590 return(ret); 1591} 1592 1593/************************************************************************ 1594 * * 1595 * The parser itself * 1596 * Relates to http://www.w3.org/TR/REC-xml * 1597 * * 1598 ************************************************************************/ 1599 1600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt); 1601/** 1602 * xmlParseName: 1603 * @ctxt: an XML parser context 1604 * 1605 * parse an XML name. 1606 * 1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1608 * CombiningChar | Extender 1609 * 1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1611 * 1612 * [6] Names ::= Name (S Name)* 1613 * 1614 * Returns the Name parsed or NULL 1615 */ 1616 1617xmlChar * 1618xmlParseName(xmlParserCtxtPtr ctxt) { 1619 const xmlChar *in; 1620 xmlChar *ret; 1621 int count = 0; 1622 1623 GROW; 1624 1625 /* 1626 * Accelerator for simple ASCII names 1627 */ 1628 in = ctxt->input->cur; 1629 if (((*in >= 0x61) && (*in <= 0x7A)) || 1630 ((*in >= 0x41) && (*in <= 0x5A)) || 1631 (*in == '_') || (*in == ':')) { 1632 in++; 1633 while (((*in >= 0x61) && (*in <= 0x7A)) || 1634 ((*in >= 0x41) && (*in <= 0x5A)) || 1635 ((*in >= 0x30) && (*in <= 0x39)) || 1636 (*in == '_') || (*in == ':')) 1637 in++; 1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) { 1639 count = in - ctxt->input->cur; 1640 ret = xmlStrndup(ctxt->input->cur, count); 1641 ctxt->input->cur = in; 1642 return(ret); 1643 } 1644 } 1645 return(xmlParseNameComplex(ctxt)); 1646} 1647 1648xmlChar * 1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 1650 xmlChar buf[XML_MAX_NAMELEN + 5]; 1651 int len = 0, l; 1652 int c; 1653 int count = 0; 1654 1655 /* 1656 * Handler for more complex cases 1657 */ 1658 GROW; 1659 c = CUR_CHAR(l); 1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 1661 (!IS_LETTER(c) && (c != '_') && 1662 (c != ':'))) { 1663 return(NULL); 1664 } 1665 1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 1668 (c == '.') || (c == '-') || 1669 (c == '_') || (c == ':') || 1670 (IS_COMBINING(c)) || 1671 (IS_EXTENDER(c)))) { 1672 if (count++ > 100) { 1673 count = 0; 1674 GROW; 1675 } 1676 COPY_BUF(l,buf,len,c); 1677 NEXTL(l); 1678 c = CUR_CHAR(l); 1679 if (len >= XML_MAX_NAMELEN) { 1680 /* 1681 * Okay someone managed to make a huge name, so he's ready to pay 1682 * for the processing speed. 1683 */ 1684 xmlChar *buffer; 1685 int max = len * 2; 1686 1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1688 if (buffer == NULL) { 1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1690 ctxt->sax->error(ctxt->userData, 1691 "xmlParseNameComplex: out of memory\n"); 1692 return(NULL); 1693 } 1694 memcpy(buffer, buf, len); 1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ 1696 (c == '.') || (c == '-') || 1697 (c == '_') || (c == ':') || 1698 (IS_COMBINING(c)) || 1699 (IS_EXTENDER(c))) { 1700 if (count++ > 100) { 1701 count = 0; 1702 GROW; 1703 } 1704 if (len + 10 > max) { 1705 max *= 2; 1706 buffer = (xmlChar *) xmlRealloc(buffer, 1707 max * sizeof(xmlChar)); 1708 if (buffer == NULL) { 1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1710 ctxt->sax->error(ctxt->userData, 1711 "xmlParseNameComplex: out of memory\n"); 1712 return(NULL); 1713 } 1714 } 1715 COPY_BUF(l,buffer,len,c); 1716 NEXTL(l); 1717 c = CUR_CHAR(l); 1718 } 1719 buffer[len] = 0; 1720 return(buffer); 1721 } 1722 } 1723 return(xmlStrndup(buf, len)); 1724} 1725 1726/** 1727 * xmlParseStringName: 1728 * @ctxt: an XML parser context 1729 * @str: a pointer to the string pointer (IN/OUT) 1730 * 1731 * parse an XML name. 1732 * 1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1734 * CombiningChar | Extender 1735 * 1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 1737 * 1738 * [6] Names ::= Name (S Name)* 1739 * 1740 * Returns the Name parsed or NULL. The str pointer 1741 * is updated to the current location in the string. 1742 */ 1743 1744xmlChar * 1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 1746 xmlChar buf[XML_MAX_NAMELEN + 5]; 1747 const xmlChar *cur = *str; 1748 int len = 0, l; 1749 int c; 1750 1751 c = CUR_SCHAR(cur, l); 1752 if (!IS_LETTER(c) && (c != '_') && 1753 (c != ':')) { 1754 return(NULL); 1755 } 1756 1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1758 (c == '.') || (c == '-') || 1759 (c == '_') || (c == ':') || 1760 (IS_COMBINING(c)) || 1761 (IS_EXTENDER(c))) { 1762 COPY_BUF(l,buf,len,c); 1763 cur += l; 1764 c = CUR_SCHAR(cur, l); 1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 1766 /* 1767 * Okay someone managed to make a huge name, so he's ready to pay 1768 * for the processing speed. 1769 */ 1770 xmlChar *buffer; 1771 int max = len * 2; 1772 1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1774 if (buffer == NULL) { 1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1776 ctxt->sax->error(ctxt->userData, 1777 "xmlParseStringName: out of memory\n"); 1778 return(NULL); 1779 } 1780 memcpy(buffer, buf, len); 1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 1782 (c == '.') || (c == '-') || 1783 (c == '_') || (c == ':') || 1784 (IS_COMBINING(c)) || 1785 (IS_EXTENDER(c))) { 1786 if (len + 10 > max) { 1787 max *= 2; 1788 buffer = (xmlChar *) xmlRealloc(buffer, 1789 max * sizeof(xmlChar)); 1790 if (buffer == NULL) { 1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1792 ctxt->sax->error(ctxt->userData, 1793 "xmlParseStringName: out of memory\n"); 1794 return(NULL); 1795 } 1796 } 1797 COPY_BUF(l,buffer,len,c); 1798 cur += l; 1799 c = CUR_SCHAR(cur, l); 1800 } 1801 buffer[len] = 0; 1802 *str = cur; 1803 return(buffer); 1804 } 1805 } 1806 *str = cur; 1807 return(xmlStrndup(buf, len)); 1808} 1809 1810/** 1811 * xmlParseNmtoken: 1812 * @ctxt: an XML parser context 1813 * 1814 * parse an XML Nmtoken. 1815 * 1816 * [7] Nmtoken ::= (NameChar)+ 1817 * 1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 1819 * 1820 * Returns the Nmtoken parsed or NULL 1821 */ 1822 1823xmlChar * 1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 1825 xmlChar buf[XML_MAX_NAMELEN + 5]; 1826 int len = 0, l; 1827 int c; 1828 int count = 0; 1829 1830 GROW; 1831 c = CUR_CHAR(l); 1832 1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1834 (c == '.') || (c == '-') || 1835 (c == '_') || (c == ':') || 1836 (IS_COMBINING(c)) || 1837 (IS_EXTENDER(c))) { 1838 if (count++ > 100) { 1839 count = 0; 1840 GROW; 1841 } 1842 COPY_BUF(l,buf,len,c); 1843 NEXTL(l); 1844 c = CUR_CHAR(l); 1845 if (len >= XML_MAX_NAMELEN) { 1846 /* 1847 * Okay someone managed to make a huge token, so he's ready to pay 1848 * for the processing speed. 1849 */ 1850 xmlChar *buffer; 1851 int max = len * 2; 1852 1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); 1854 if (buffer == NULL) { 1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1856 ctxt->sax->error(ctxt->userData, 1857 "xmlParseNmtoken: out of memory\n"); 1858 return(NULL); 1859 } 1860 memcpy(buffer, buf, len); 1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 1862 (c == '.') || (c == '-') || 1863 (c == '_') || (c == ':') || 1864 (IS_COMBINING(c)) || 1865 (IS_EXTENDER(c))) { 1866 if (count++ > 100) { 1867 count = 0; 1868 GROW; 1869 } 1870 if (len + 10 > max) { 1871 max *= 2; 1872 buffer = (xmlChar *) xmlRealloc(buffer, 1873 max * sizeof(xmlChar)); 1874 if (buffer == NULL) { 1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1876 ctxt->sax->error(ctxt->userData, 1877 "xmlParseNameComplex: out of memory\n"); 1878 return(NULL); 1879 } 1880 } 1881 COPY_BUF(l,buffer,len,c); 1882 NEXTL(l); 1883 c = CUR_CHAR(l); 1884 } 1885 buffer[len] = 0; 1886 return(buffer); 1887 } 1888 } 1889 if (len == 0) 1890 return(NULL); 1891 return(xmlStrndup(buf, len)); 1892} 1893 1894/** 1895 * xmlParseEntityValue: 1896 * @ctxt: an XML parser context 1897 * @orig: if non-NULL store a copy of the original entity value 1898 * 1899 * parse a value for ENTITY declarations 1900 * 1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 1902 * "'" ([^%&'] | PEReference | Reference)* "'" 1903 * 1904 * Returns the EntityValue parsed with reference substitued or NULL 1905 */ 1906 1907xmlChar * 1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 1909 xmlChar *buf = NULL; 1910 int len = 0; 1911 int size = XML_PARSER_BUFFER_SIZE; 1912 int c, l; 1913 xmlChar stop; 1914 xmlChar *ret = NULL; 1915 const xmlChar *cur = NULL; 1916 xmlParserInputPtr input; 1917 1918 if (RAW == '"') stop = '"'; 1919 else if (RAW == '\'') stop = '\''; 1920 else { 1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 1924 ctxt->wellFormed = 0; 1925 ctxt->disableSAX = 1; 1926 return(NULL); 1927 } 1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 1929 if (buf == NULL) { 1930 xmlGenericError(xmlGenericErrorContext, 1931 "malloc of %d byte failed\n", size); 1932 return(NULL); 1933 } 1934 1935 /* 1936 * The content of the entity definition is copied in a buffer. 1937 */ 1938 1939 ctxt->instate = XML_PARSER_ENTITY_VALUE; 1940 input = ctxt->input; 1941 GROW; 1942 NEXT; 1943 c = CUR_CHAR(l); 1944 /* 1945 * NOTE: 4.4.5 Included in Literal 1946 * When a parameter entity reference appears in a literal entity 1947 * value, ... a single or double quote character in the replacement 1948 * text is always treated as a normal data character and will not 1949 * terminate the literal. 1950 * In practice it means we stop the loop only when back at parsing 1951 * the initial entity and the quote is found 1952 */ 1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 1954 (ctxt->input != input))) { 1955 if (len + 5 >= size) { 1956 size *= 2; 1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 1958 if (buf == NULL) { 1959 xmlGenericError(xmlGenericErrorContext, 1960 "realloc of %d byte failed\n", size); 1961 return(NULL); 1962 } 1963 } 1964 COPY_BUF(l,buf,len,c); 1965 NEXTL(l); 1966 /* 1967 * Pop-up of finished entities. 1968 */ 1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 1970 xmlPopInput(ctxt); 1971 1972 GROW; 1973 c = CUR_CHAR(l); 1974 if (c == 0) { 1975 GROW; 1976 c = CUR_CHAR(l); 1977 } 1978 } 1979 buf[len] = 0; 1980 1981 /* 1982 * Raise problem w.r.t. '&' and '%' being used in non-entities 1983 * reference constructs. Note Charref will be handled in 1984 * xmlStringDecodeEntities() 1985 */ 1986 cur = buf; 1987 while (*cur != 0) { /* non input consuming */ 1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 1989 xmlChar *name; 1990 xmlChar tmp = *cur; 1991 1992 cur++; 1993 name = xmlParseStringName(ctxt, &cur); 1994 if ((name == NULL) || (*cur != ';')) { 1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; 1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1997 ctxt->sax->error(ctxt->userData, 1998 "EntityValue: '%c' forbidden except for entities references\n", 1999 tmp); 2000 ctxt->wellFormed = 0; 2001 ctxt->disableSAX = 1; 2002 } 2003 if ((ctxt->inSubset == 1) && (tmp == '%')) { 2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; 2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2006 ctxt->sax->error(ctxt->userData, 2007 "EntityValue: PEReferences forbidden in internal subset\n", 2008 tmp); 2009 ctxt->wellFormed = 0; 2010 ctxt->disableSAX = 1; 2011 } 2012 if (name != NULL) 2013 xmlFree(name); 2014 } 2015 cur++; 2016 } 2017 2018 /* 2019 * Then PEReference entities are substituted. 2020 */ 2021 if (c != stop) { 2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2025 ctxt->wellFormed = 0; 2026 ctxt->disableSAX = 1; 2027 xmlFree(buf); 2028 } else { 2029 NEXT; 2030 /* 2031 * NOTE: 4.4.7 Bypassed 2032 * When a general entity reference appears in the EntityValue in 2033 * an entity declaration, it is bypassed and left as is. 2034 * so XML_SUBSTITUTE_REF is not set here. 2035 */ 2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2037 0, 0, 0); 2038 if (orig != NULL) 2039 *orig = buf; 2040 else 2041 xmlFree(buf); 2042 } 2043 2044 return(ret); 2045} 2046 2047/** 2048 * xmlParseAttValue: 2049 * @ctxt: an XML parser context 2050 * 2051 * parse a value for an attribute 2052 * Note: the parser won't do substitution of entities here, this 2053 * will be handled later in xmlStringGetNodeList 2054 * 2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2056 * "'" ([^<&'] | Reference)* "'" 2057 * 2058 * 3.3.3 Attribute-Value Normalization: 2059 * Before the value of an attribute is passed to the application or 2060 * checked for validity, the XML processor must normalize it as follows: 2061 * - a character reference is processed by appending the referenced 2062 * character to the attribute value 2063 * - an entity reference is processed by recursively processing the 2064 * replacement text of the entity 2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2066 * appending #x20 to the normalized value, except that only a single 2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2068 * parsed entity or the literal entity value of an internal parsed entity 2069 * - other characters are processed by appending them to the normalized value 2070 * If the declared value is not CDATA, then the XML processor must further 2071 * process the normalized attribute value by discarding any leading and 2072 * trailing space (#x20) characters, and by replacing sequences of space 2073 * (#x20) characters by a single space (#x20) character. 2074 * All attributes for which no declaration has been read should be treated 2075 * by a non-validating parser as if declared CDATA. 2076 * 2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2078 */ 2079 2080xmlChar * 2081xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2082 xmlChar limit = 0; 2083 xmlChar *buf = NULL; 2084 int len = 0; 2085 int buf_size = 0; 2086 int c, l; 2087 xmlChar *current = NULL; 2088 xmlEntityPtr ent; 2089 2090 2091 SHRINK; 2092 if (NXT(0) == '"') { 2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2094 limit = '"'; 2095 NEXT; 2096 } else if (NXT(0) == '\'') { 2097 limit = '\''; 2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2099 NEXT; 2100 } else { 2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2104 ctxt->wellFormed = 0; 2105 ctxt->disableSAX = 1; 2106 return(NULL); 2107 } 2108 2109 /* 2110 * allocate a translation buffer. 2111 */ 2112 buf_size = XML_PARSER_BUFFER_SIZE; 2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); 2114 if (buf == NULL) { 2115 perror("xmlParseAttValue: malloc failed"); 2116 return(NULL); 2117 } 2118 2119 /* 2120 * Ok loop until we reach one of the ending char or a size limit. 2121 */ 2122 c = CUR_CHAR(l); 2123 while (((NXT(0) != limit) && /* checked */ 2124 (c != '<')) || (ctxt->token != 0)) { 2125 if (c == 0) break; 2126 if (ctxt->token == '&') { 2127 /* 2128 * The reparsing will be done in xmlStringGetNodeList() 2129 * called by the attribute() function in SAX.c 2130 */ 2131 static xmlChar buffer[6] = "&"; 2132 2133 if (len > buf_size - 10) { 2134 growBuffer(buf); 2135 } 2136 current = &buffer[0]; 2137 while (*current != 0) { /* non input consuming */ 2138 buf[len++] = *current++; 2139 } 2140 ctxt->token = 0; 2141 } else if (c == '&') { 2142 if (NXT(1) == '#') { 2143 int val = xmlParseCharRef(ctxt); 2144 if (val == '&') { 2145 /* 2146 * The reparsing will be done in xmlStringGetNodeList() 2147 * called by the attribute() function in SAX.c 2148 */ 2149 static xmlChar buffer[6] = "&"; 2150 2151 if (len > buf_size - 10) { 2152 growBuffer(buf); 2153 } 2154 current = &buffer[0]; 2155 while (*current != 0) { /* non input consuming */ 2156 buf[len++] = *current++; 2157 } 2158 } else { 2159 len += xmlCopyChar(0, &buf[len], val); 2160 } 2161 } else { 2162 ent = xmlParseEntityRef(ctxt); 2163 if ((ent != NULL) && 2164 (ctxt->replaceEntities != 0)) { 2165 xmlChar *rep; 2166 2167 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2168 rep = xmlStringDecodeEntities(ctxt, ent->content, 2169 XML_SUBSTITUTE_REF, 0, 0, 0); 2170 if (rep != NULL) { 2171 current = rep; 2172 while (*current != 0) { /* non input consuming */ 2173 buf[len++] = *current++; 2174 if (len > buf_size - 10) { 2175 growBuffer(buf); 2176 } 2177 } 2178 xmlFree(rep); 2179 } 2180 } else { 2181 if (ent->content != NULL) 2182 buf[len++] = ent->content[0]; 2183 } 2184 } else if (ent != NULL) { 2185 int i = xmlStrlen(ent->name); 2186 const xmlChar *cur = ent->name; 2187 2188 /* 2189 * This may look absurd but is needed to detect 2190 * entities problems 2191 */ 2192 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2193 (ent->content != NULL)) { 2194 xmlChar *rep; 2195 rep = xmlStringDecodeEntities(ctxt, ent->content, 2196 XML_SUBSTITUTE_REF, 0, 0, 0); 2197 if (rep != NULL) 2198 xmlFree(rep); 2199 } 2200 2201 /* 2202 * Just output the reference 2203 */ 2204 buf[len++] = '&'; 2205 if (len > buf_size - i - 10) { 2206 growBuffer(buf); 2207 } 2208 for (;i > 0;i--) 2209 buf[len++] = *cur++; 2210 buf[len++] = ';'; 2211 } 2212 } 2213 } else { 2214 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2215 COPY_BUF(l,buf,len,0x20); 2216 if (len > buf_size - 10) { 2217 growBuffer(buf); 2218 } 2219 } else { 2220 COPY_BUF(l,buf,len,c); 2221 if (len > buf_size - 10) { 2222 growBuffer(buf); 2223 } 2224 } 2225 NEXTL(l); 2226 } 2227 GROW; 2228 c = CUR_CHAR(l); 2229 } 2230 buf[len++] = 0; 2231 if (RAW == '<') { 2232 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2234 ctxt->sax->error(ctxt->userData, 2235 "Unescaped '<' not allowed in attributes values\n"); 2236 ctxt->wellFormed = 0; 2237 ctxt->disableSAX = 1; 2238 } else if (RAW != limit) { 2239 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2241 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2242 ctxt->wellFormed = 0; 2243 ctxt->disableSAX = 1; 2244 } else 2245 NEXT; 2246 return(buf); 2247} 2248 2249/** 2250 * xmlParseSystemLiteral: 2251 * @ctxt: an XML parser context 2252 * 2253 * parse an XML Literal 2254 * 2255 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2256 * 2257 * Returns the SystemLiteral parsed or NULL 2258 */ 2259 2260xmlChar * 2261xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2262 xmlChar *buf = NULL; 2263 int len = 0; 2264 int size = XML_PARSER_BUFFER_SIZE; 2265 int cur, l; 2266 xmlChar stop; 2267 int state = ctxt->instate; 2268 int count = 0; 2269 2270 SHRINK; 2271 if (RAW == '"') { 2272 NEXT; 2273 stop = '"'; 2274 } else if (RAW == '\'') { 2275 NEXT; 2276 stop = '\''; 2277 } else { 2278 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2280 ctxt->sax->error(ctxt->userData, 2281 "SystemLiteral \" or ' expected\n"); 2282 ctxt->wellFormed = 0; 2283 ctxt->disableSAX = 1; 2284 return(NULL); 2285 } 2286 2287 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2288 if (buf == NULL) { 2289 xmlGenericError(xmlGenericErrorContext, 2290 "malloc of %d byte failed\n", size); 2291 return(NULL); 2292 } 2293 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2294 cur = CUR_CHAR(l); 2295 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2296 if (len + 5 >= size) { 2297 size *= 2; 2298 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2299 if (buf == NULL) { 2300 xmlGenericError(xmlGenericErrorContext, 2301 "realloc of %d byte failed\n", size); 2302 ctxt->instate = (xmlParserInputState) state; 2303 return(NULL); 2304 } 2305 } 2306 count++; 2307 if (count > 50) { 2308 GROW; 2309 count = 0; 2310 } 2311 COPY_BUF(l,buf,len,cur); 2312 NEXTL(l); 2313 cur = CUR_CHAR(l); 2314 if (cur == 0) { 2315 GROW; 2316 SHRINK; 2317 cur = CUR_CHAR(l); 2318 } 2319 } 2320 buf[len] = 0; 2321 ctxt->instate = (xmlParserInputState) state; 2322 if (!IS_CHAR(cur)) { 2323 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2325 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 2326 ctxt->wellFormed = 0; 2327 ctxt->disableSAX = 1; 2328 } else { 2329 NEXT; 2330 } 2331 return(buf); 2332} 2333 2334/** 2335 * xmlParsePubidLiteral: 2336 * @ctxt: an XML parser context 2337 * 2338 * parse an XML public literal 2339 * 2340 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2341 * 2342 * Returns the PubidLiteral parsed or NULL. 2343 */ 2344 2345xmlChar * 2346xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 2347 xmlChar *buf = NULL; 2348 int len = 0; 2349 int size = XML_PARSER_BUFFER_SIZE; 2350 xmlChar cur; 2351 xmlChar stop; 2352 int count = 0; 2353 2354 SHRINK; 2355 if (RAW == '"') { 2356 NEXT; 2357 stop = '"'; 2358 } else if (RAW == '\'') { 2359 NEXT; 2360 stop = '\''; 2361 } else { 2362 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 2363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2364 ctxt->sax->error(ctxt->userData, 2365 "SystemLiteral \" or ' expected\n"); 2366 ctxt->wellFormed = 0; 2367 ctxt->disableSAX = 1; 2368 return(NULL); 2369 } 2370 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2371 if (buf == NULL) { 2372 xmlGenericError(xmlGenericErrorContext, 2373 "malloc of %d byte failed\n", size); 2374 return(NULL); 2375 } 2376 cur = CUR; 2377 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ 2378 if (len + 1 >= size) { 2379 size *= 2; 2380 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2381 if (buf == NULL) { 2382 xmlGenericError(xmlGenericErrorContext, 2383 "realloc of %d byte failed\n", size); 2384 return(NULL); 2385 } 2386 } 2387 buf[len++] = cur; 2388 count++; 2389 if (count > 50) { 2390 GROW; 2391 count = 0; 2392 } 2393 NEXT; 2394 cur = CUR; 2395 if (cur == 0) { 2396 GROW; 2397 SHRINK; 2398 cur = CUR; 2399 } 2400 } 2401 buf[len] = 0; 2402 if (cur != stop) { 2403 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 2404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2405 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 2406 ctxt->wellFormed = 0; 2407 ctxt->disableSAX = 1; 2408 } else { 2409 NEXT; 2410 } 2411 return(buf); 2412} 2413 2414void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 2415/** 2416 * xmlParseCharData: 2417 * @ctxt: an XML parser context 2418 * @cdata: int indicating whether we are within a CDATA section 2419 * 2420 * parse a CharData section. 2421 * if we are within a CDATA section ']]>' marks an end of section. 2422 * 2423 * The right angle bracket (>) may be represented using the string ">", 2424 * and must, for compatibility, be escaped using ">" or a character 2425 * reference when it appears in the string "]]>" in content, when that 2426 * string is not marking the end of a CDATA section. 2427 * 2428 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2429 */ 2430 2431void 2432xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 2433 const xmlChar *in; 2434 int nbchar = 0; 2435 2436 SHRINK; 2437 GROW; 2438 /* 2439 * Accelerated common case where input don't need to be 2440 * modified before passing it to the handler. 2441 */ 2442 if ((ctxt->token == 0) && (!cdata)) { 2443 in = ctxt->input->cur; 2444 do { 2445 while (((*in >= 0x20) && (*in != '<') && 2446 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 2447 in++; 2448 if (*in == 0xA) { 2449 ctxt->input->line++; 2450 continue; /* while */ 2451 } 2452 nbchar = in - ctxt->input->cur; 2453 if (nbchar > 0) { 2454 if (IS_BLANK(*ctxt->input->cur) && 2455 areBlanks(ctxt, ctxt->input->cur, nbchar)) { 2456 if (ctxt->sax->ignorableWhitespace != NULL) 2457 ctxt->sax->ignorableWhitespace(ctxt->userData, 2458 ctxt->input->cur, nbchar); 2459 } else { 2460 if (ctxt->sax->characters != NULL) 2461 ctxt->sax->characters(ctxt->userData, 2462 ctxt->input->cur, nbchar); 2463 } 2464 } 2465 ctxt->input->cur = in; 2466 if (*in == 0xD) { 2467 in++; 2468 if (*in == 0xA) { 2469 ctxt->input->cur = in; 2470 in++; 2471 ctxt->input->line++; 2472 continue; /* while */ 2473 } 2474 in--; 2475 } 2476 if (*in == '<') { 2477 return; 2478 } 2479 if (*in == '&') { 2480 return; 2481 } 2482 SHRINK; 2483 GROW; 2484 in = ctxt->input->cur; 2485 } while ((*in >= 0x20) && (*in <= 0x7F)); 2486 nbchar = 0; 2487 } 2488 xmlParseCharDataComplex(ctxt, cdata); 2489} 2490 2491void 2492xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 2493 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 2494 int nbchar = 0; 2495 int cur, l; 2496 int count = 0; 2497 2498 SHRINK; 2499 GROW; 2500 cur = CUR_CHAR(l); 2501 while (((cur != '<') || (ctxt->token == '<')) && /* checked */ 2502 ((cur != '&') || (ctxt->token == '&')) && 2503 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 2504 if ((cur == ']') && (NXT(1) == ']') && 2505 (NXT(2) == '>')) { 2506 if (cdata) break; 2507 else { 2508 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 2509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2510 ctxt->sax->error(ctxt->userData, 2511 "Sequence ']]>' not allowed in content\n"); 2512 /* Should this be relaxed ??? I see a "must here */ 2513 ctxt->wellFormed = 0; 2514 ctxt->disableSAX = 1; 2515 } 2516 } 2517 COPY_BUF(l,buf,nbchar,cur); 2518 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 2519 /* 2520 * Ok the segment is to be consumed as chars. 2521 */ 2522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2523 if (areBlanks(ctxt, buf, nbchar)) { 2524 if (ctxt->sax->ignorableWhitespace != NULL) 2525 ctxt->sax->ignorableWhitespace(ctxt->userData, 2526 buf, nbchar); 2527 } else { 2528 if (ctxt->sax->characters != NULL) 2529 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2530 } 2531 } 2532 nbchar = 0; 2533 } 2534 count++; 2535 if (count > 50) { 2536 GROW; 2537 count = 0; 2538 } 2539 NEXTL(l); 2540 cur = CUR_CHAR(l); 2541 } 2542 if (nbchar != 0) { 2543 /* 2544 * Ok the segment is to be consumed as chars. 2545 */ 2546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2547 if (areBlanks(ctxt, buf, nbchar)) { 2548 if (ctxt->sax->ignorableWhitespace != NULL) 2549 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 2550 } else { 2551 if (ctxt->sax->characters != NULL) 2552 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2553 } 2554 } 2555 } 2556} 2557 2558/** 2559 * xmlParseExternalID: 2560 * @ctxt: an XML parser context 2561 * @publicID: a xmlChar** receiving PubidLiteral 2562 * @strict: indicate whether we should restrict parsing to only 2563 * production [75], see NOTE below 2564 * 2565 * Parse an External ID or a Public ID 2566 * 2567 * NOTE: Productions [75] and [83] interract badly since [75] can generate 2568 * 'PUBLIC' S PubidLiteral S SystemLiteral 2569 * 2570 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 2571 * | 'PUBLIC' S PubidLiteral S SystemLiteral 2572 * 2573 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 2574 * 2575 * Returns the function returns SystemLiteral and in the second 2576 * case publicID receives PubidLiteral, is strict is off 2577 * it is possible to return NULL and have publicID set. 2578 */ 2579 2580xmlChar * 2581xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 2582 xmlChar *URI = NULL; 2583 2584 SHRINK; 2585 if ((RAW == 'S') && (NXT(1) == 'Y') && 2586 (NXT(2) == 'S') && (NXT(3) == 'T') && 2587 (NXT(4) == 'E') && (NXT(5) == 'M')) { 2588 SKIP(6); 2589 if (!IS_BLANK(CUR)) { 2590 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2592 ctxt->sax->error(ctxt->userData, 2593 "Space required after 'SYSTEM'\n"); 2594 ctxt->wellFormed = 0; 2595 ctxt->disableSAX = 1; 2596 } 2597 SKIP_BLANKS; 2598 URI = xmlParseSystemLiteral(ctxt); 2599 if (URI == NULL) { 2600 ctxt->errNo = XML_ERR_URI_REQUIRED; 2601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2602 ctxt->sax->error(ctxt->userData, 2603 "xmlParseExternalID: SYSTEM, no URI\n"); 2604 ctxt->wellFormed = 0; 2605 ctxt->disableSAX = 1; 2606 } 2607 } else if ((RAW == 'P') && (NXT(1) == 'U') && 2608 (NXT(2) == 'B') && (NXT(3) == 'L') && 2609 (NXT(4) == 'I') && (NXT(5) == 'C')) { 2610 SKIP(6); 2611 if (!IS_BLANK(CUR)) { 2612 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2614 ctxt->sax->error(ctxt->userData, 2615 "Space required after 'PUBLIC'\n"); 2616 ctxt->wellFormed = 0; 2617 ctxt->disableSAX = 1; 2618 } 2619 SKIP_BLANKS; 2620 *publicID = xmlParsePubidLiteral(ctxt); 2621 if (*publicID == NULL) { 2622 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2624 ctxt->sax->error(ctxt->userData, 2625 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 2626 ctxt->wellFormed = 0; 2627 ctxt->disableSAX = 1; 2628 } 2629 if (strict) { 2630 /* 2631 * We don't handle [83] so "S SystemLiteral" is required. 2632 */ 2633 if (!IS_BLANK(CUR)) { 2634 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2636 ctxt->sax->error(ctxt->userData, 2637 "Space required after the Public Identifier\n"); 2638 ctxt->wellFormed = 0; 2639 ctxt->disableSAX = 1; 2640 } 2641 } else { 2642 /* 2643 * We handle [83] so we return immediately, if 2644 * "S SystemLiteral" is not detected. From a purely parsing 2645 * point of view that's a nice mess. 2646 */ 2647 const xmlChar *ptr; 2648 GROW; 2649 2650 ptr = CUR_PTR; 2651 if (!IS_BLANK(*ptr)) return(NULL); 2652 2653 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 2654 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 2655 } 2656 SKIP_BLANKS; 2657 URI = xmlParseSystemLiteral(ctxt); 2658 if (URI == NULL) { 2659 ctxt->errNo = XML_ERR_URI_REQUIRED; 2660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2661 ctxt->sax->error(ctxt->userData, 2662 "xmlParseExternalID: PUBLIC, no URI\n"); 2663 ctxt->wellFormed = 0; 2664 ctxt->disableSAX = 1; 2665 } 2666 } 2667 return(URI); 2668} 2669 2670/** 2671 * xmlParseComment: 2672 * @ctxt: an XML parser context 2673 * 2674 * Skip an XML (SGML) comment <!-- .... --> 2675 * The spec says that "For compatibility, the string "--" (double-hyphen) 2676 * must not occur within comments. " 2677 * 2678 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 2679 */ 2680void 2681xmlParseComment(xmlParserCtxtPtr ctxt) { 2682 xmlChar *buf = NULL; 2683 int len; 2684 int size = XML_PARSER_BUFFER_SIZE; 2685 int q, ql; 2686 int r, rl; 2687 int cur, l; 2688 xmlParserInputState state; 2689 xmlParserInputPtr input = ctxt->input; 2690 int count = 0; 2691 2692 /* 2693 * Check that there is a comment right here. 2694 */ 2695 if ((RAW != '<') || (NXT(1) != '!') || 2696 (NXT(2) != '-') || (NXT(3) != '-')) return; 2697 2698 state = ctxt->instate; 2699 ctxt->instate = XML_PARSER_COMMENT; 2700 SHRINK; 2701 SKIP(4); 2702 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2703 if (buf == NULL) { 2704 xmlGenericError(xmlGenericErrorContext, 2705 "malloc of %d byte failed\n", size); 2706 ctxt->instate = state; 2707 return; 2708 } 2709 q = CUR_CHAR(ql); 2710 NEXTL(ql); 2711 r = CUR_CHAR(rl); 2712 NEXTL(rl); 2713 cur = CUR_CHAR(l); 2714 len = 0; 2715 while (IS_CHAR(cur) && /* checked */ 2716 ((cur != '>') || 2717 (r != '-') || (q != '-'))) { 2718 if ((r == '-') && (q == '-') && (len > 1)) { 2719 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 2720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2721 ctxt->sax->error(ctxt->userData, 2722 "Comment must not contain '--' (double-hyphen)`\n"); 2723 ctxt->wellFormed = 0; 2724 ctxt->disableSAX = 1; 2725 } 2726 if (len + 5 >= size) { 2727 size *= 2; 2728 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2729 if (buf == NULL) { 2730 xmlGenericError(xmlGenericErrorContext, 2731 "realloc of %d byte failed\n", size); 2732 ctxt->instate = state; 2733 return; 2734 } 2735 } 2736 COPY_BUF(ql,buf,len,q); 2737 q = r; 2738 ql = rl; 2739 r = cur; 2740 rl = l; 2741 2742 count++; 2743 if (count > 50) { 2744 GROW; 2745 count = 0; 2746 } 2747 NEXTL(l); 2748 cur = CUR_CHAR(l); 2749 if (cur == 0) { 2750 SHRINK; 2751 GROW; 2752 cur = CUR_CHAR(l); 2753 } 2754 } 2755 buf[len] = 0; 2756 if (!IS_CHAR(cur)) { 2757 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 2758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2759 ctxt->sax->error(ctxt->userData, 2760 "Comment not terminated \n<!--%.50s\n", buf); 2761 ctxt->wellFormed = 0; 2762 ctxt->disableSAX = 1; 2763 xmlFree(buf); 2764 } else { 2765 if (input != ctxt->input) { 2766 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2768 ctxt->sax->error(ctxt->userData, 2769"Comment doesn't start and stop in the same entity\n"); 2770 ctxt->wellFormed = 0; 2771 ctxt->disableSAX = 1; 2772 } 2773 NEXT; 2774 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 2775 (!ctxt->disableSAX)) 2776 ctxt->sax->comment(ctxt->userData, buf); 2777 xmlFree(buf); 2778 } 2779 ctxt->instate = state; 2780} 2781 2782/** 2783 * xmlParsePITarget: 2784 * @ctxt: an XML parser context 2785 * 2786 * parse the name of a PI 2787 * 2788 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 2789 * 2790 * Returns the PITarget name or NULL 2791 */ 2792 2793xmlChar * 2794xmlParsePITarget(xmlParserCtxtPtr ctxt) { 2795 xmlChar *name; 2796 2797 name = xmlParseName(ctxt); 2798 if ((name != NULL) && 2799 ((name[0] == 'x') || (name[0] == 'X')) && 2800 ((name[1] == 'm') || (name[1] == 'M')) && 2801 ((name[2] == 'l') || (name[2] == 'L'))) { 2802 int i; 2803 if ((name[0] == 'x') && (name[1] == 'm') && 2804 (name[2] == 'l') && (name[3] == 0)) { 2805 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2807 ctxt->sax->error(ctxt->userData, 2808 "XML declaration allowed only at the start of the document\n"); 2809 ctxt->wellFormed = 0; 2810 ctxt->disableSAX = 1; 2811 return(name); 2812 } else if (name[3] == 0) { 2813 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2815 ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); 2816 ctxt->wellFormed = 0; 2817 ctxt->disableSAX = 1; 2818 return(name); 2819 } 2820 for (i = 0;;i++) { 2821 if (xmlW3CPIs[i] == NULL) break; 2822 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 2823 return(name); 2824 } 2825 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 2826 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 2827 ctxt->sax->warning(ctxt->userData, 2828 "xmlParsePItarget: invalid name prefix 'xml'\n"); 2829 } 2830 } 2831 return(name); 2832} 2833 2834/** 2835 * xmlParsePI: 2836 * @ctxt: an XML parser context 2837 * 2838 * parse an XML Processing Instruction. 2839 * 2840 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 2841 * 2842 * The processing is transfered to SAX once parsed. 2843 */ 2844 2845void 2846xmlParsePI(xmlParserCtxtPtr ctxt) { 2847 xmlChar *buf = NULL; 2848 int len = 0; 2849 int size = XML_PARSER_BUFFER_SIZE; 2850 int cur, l; 2851 xmlChar *target; 2852 xmlParserInputState state; 2853 int count = 0; 2854 2855 if ((RAW == '<') && (NXT(1) == '?')) { 2856 xmlParserInputPtr input = ctxt->input; 2857 state = ctxt->instate; 2858 ctxt->instate = XML_PARSER_PI; 2859 /* 2860 * this is a Processing Instruction. 2861 */ 2862 SKIP(2); 2863 SHRINK; 2864 2865 /* 2866 * Parse the target name and check for special support like 2867 * namespace. 2868 */ 2869 target = xmlParsePITarget(ctxt); 2870 if (target != NULL) { 2871 if ((RAW == '?') && (NXT(1) == '>')) { 2872 if (input != ctxt->input) { 2873 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2875 ctxt->sax->error(ctxt->userData, 2876 "PI declaration doesn't start and stop in the same entity\n"); 2877 ctxt->wellFormed = 0; 2878 ctxt->disableSAX = 1; 2879 } 2880 SKIP(2); 2881 2882 /* 2883 * SAX: PI detected. 2884 */ 2885 if ((ctxt->sax) && (!ctxt->disableSAX) && 2886 (ctxt->sax->processingInstruction != NULL)) 2887 ctxt->sax->processingInstruction(ctxt->userData, 2888 target, NULL); 2889 ctxt->instate = state; 2890 xmlFree(target); 2891 return; 2892 } 2893 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2894 if (buf == NULL) { 2895 xmlGenericError(xmlGenericErrorContext, 2896 "malloc of %d byte failed\n", size); 2897 ctxt->instate = state; 2898 return; 2899 } 2900 cur = CUR; 2901 if (!IS_BLANK(cur)) { 2902 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 2903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2904 ctxt->sax->error(ctxt->userData, 2905 "xmlParsePI: PI %s space expected\n", target); 2906 ctxt->wellFormed = 0; 2907 ctxt->disableSAX = 1; 2908 } 2909 SKIP_BLANKS; 2910 cur = CUR_CHAR(l); 2911 while (IS_CHAR(cur) && /* checked */ 2912 ((cur != '?') || (NXT(1) != '>'))) { 2913 if (len + 5 >= size) { 2914 size *= 2; 2915 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2916 if (buf == NULL) { 2917 xmlGenericError(xmlGenericErrorContext, 2918 "realloc of %d byte failed\n", size); 2919 ctxt->instate = state; 2920 return; 2921 } 2922 } 2923 count++; 2924 if (count > 50) { 2925 GROW; 2926 count = 0; 2927 } 2928 COPY_BUF(l,buf,len,cur); 2929 NEXTL(l); 2930 cur = CUR_CHAR(l); 2931 if (cur == 0) { 2932 SHRINK; 2933 GROW; 2934 cur = CUR_CHAR(l); 2935 } 2936 } 2937 buf[len] = 0; 2938 if (cur != '?') { 2939 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 2940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2941 ctxt->sax->error(ctxt->userData, 2942 "xmlParsePI: PI %s never end ...\n", target); 2943 ctxt->wellFormed = 0; 2944 ctxt->disableSAX = 1; 2945 } else { 2946 if (input != ctxt->input) { 2947 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2949 ctxt->sax->error(ctxt->userData, 2950 "PI declaration doesn't start and stop in the same entity\n"); 2951 ctxt->wellFormed = 0; 2952 ctxt->disableSAX = 1; 2953 } 2954 SKIP(2); 2955 2956 /* 2957 * SAX: PI detected. 2958 */ 2959 if ((ctxt->sax) && (!ctxt->disableSAX) && 2960 (ctxt->sax->processingInstruction != NULL)) 2961 ctxt->sax->processingInstruction(ctxt->userData, 2962 target, buf); 2963 } 2964 xmlFree(buf); 2965 xmlFree(target); 2966 } else { 2967 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2969 ctxt->sax->error(ctxt->userData, 2970 "xmlParsePI : no target name\n"); 2971 ctxt->wellFormed = 0; 2972 ctxt->disableSAX = 1; 2973 } 2974 ctxt->instate = state; 2975 } 2976} 2977 2978/** 2979 * xmlParseNotationDecl: 2980 * @ctxt: an XML parser context 2981 * 2982 * parse a notation declaration 2983 * 2984 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 2985 * 2986 * Hence there is actually 3 choices: 2987 * 'PUBLIC' S PubidLiteral 2988 * 'PUBLIC' S PubidLiteral S SystemLiteral 2989 * and 'SYSTEM' S SystemLiteral 2990 * 2991 * See the NOTE on xmlParseExternalID(). 2992 */ 2993 2994void 2995xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 2996 xmlChar *name; 2997 xmlChar *Pubid; 2998 xmlChar *Systemid; 2999 3000 if ((RAW == '<') && (NXT(1) == '!') && 3001 (NXT(2) == 'N') && (NXT(3) == 'O') && 3002 (NXT(4) == 'T') && (NXT(5) == 'A') && 3003 (NXT(6) == 'T') && (NXT(7) == 'I') && 3004 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3005 xmlParserInputPtr input = ctxt->input; 3006 SHRINK; 3007 SKIP(10); 3008 if (!IS_BLANK(CUR)) { 3009 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3011 ctxt->sax->error(ctxt->userData, 3012 "Space required after '<!NOTATION'\n"); 3013 ctxt->wellFormed = 0; 3014 ctxt->disableSAX = 1; 3015 return; 3016 } 3017 SKIP_BLANKS; 3018 3019 name = xmlParseNameComplex(ctxt); 3020 if (name == NULL) { 3021 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3023 ctxt->sax->error(ctxt->userData, 3024 "NOTATION: Name expected here\n"); 3025 ctxt->wellFormed = 0; 3026 ctxt->disableSAX = 1; 3027 return; 3028 } 3029 if (!IS_BLANK(CUR)) { 3030 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3032 ctxt->sax->error(ctxt->userData, 3033 "Space required after the NOTATION name'\n"); 3034 ctxt->wellFormed = 0; 3035 ctxt->disableSAX = 1; 3036 return; 3037 } 3038 SKIP_BLANKS; 3039 3040 /* 3041 * Parse the IDs. 3042 */ 3043 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3044 SKIP_BLANKS; 3045 3046 if (RAW == '>') { 3047 if (input != ctxt->input) { 3048 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3050 ctxt->sax->error(ctxt->userData, 3051"Notation declaration doesn't start and stop in the same entity\n"); 3052 ctxt->wellFormed = 0; 3053 ctxt->disableSAX = 1; 3054 } 3055 NEXT; 3056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3057 (ctxt->sax->notationDecl != NULL)) 3058 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3059 } else { 3060 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3062 ctxt->sax->error(ctxt->userData, 3063 "'>' required to close NOTATION declaration\n"); 3064 ctxt->wellFormed = 0; 3065 ctxt->disableSAX = 1; 3066 } 3067 xmlFree(name); 3068 if (Systemid != NULL) xmlFree(Systemid); 3069 if (Pubid != NULL) xmlFree(Pubid); 3070 } 3071} 3072 3073/** 3074 * xmlParseEntityDecl: 3075 * @ctxt: an XML parser context 3076 * 3077 * parse <!ENTITY declarations 3078 * 3079 * [70] EntityDecl ::= GEDecl | PEDecl 3080 * 3081 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3082 * 3083 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3084 * 3085 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3086 * 3087 * [74] PEDef ::= EntityValue | ExternalID 3088 * 3089 * [76] NDataDecl ::= S 'NDATA' S Name 3090 * 3091 * [ VC: Notation Declared ] 3092 * The Name must match the declared name of a notation. 3093 */ 3094 3095void 3096xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3097 xmlChar *name = NULL; 3098 xmlChar *value = NULL; 3099 xmlChar *URI = NULL, *literal = NULL; 3100 xmlChar *ndata = NULL; 3101 int isParameter = 0; 3102 xmlChar *orig = NULL; 3103 3104 GROW; 3105 if ((RAW == '<') && (NXT(1) == '!') && 3106 (NXT(2) == 'E') && (NXT(3) == 'N') && 3107 (NXT(4) == 'T') && (NXT(5) == 'I') && 3108 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3109 xmlParserInputPtr input = ctxt->input; 3110 ctxt->instate = XML_PARSER_ENTITY_DECL; 3111 SHRINK; 3112 SKIP(8); 3113 if (!IS_BLANK(CUR)) { 3114 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3116 ctxt->sax->error(ctxt->userData, 3117 "Space required after '<!ENTITY'\n"); 3118 ctxt->wellFormed = 0; 3119 ctxt->disableSAX = 1; 3120 } 3121 SKIP_BLANKS; 3122 3123 if (RAW == '%') { 3124 NEXT; 3125 if (!IS_BLANK(CUR)) { 3126 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3128 ctxt->sax->error(ctxt->userData, 3129 "Space required after '%'\n"); 3130 ctxt->wellFormed = 0; 3131 ctxt->disableSAX = 1; 3132 } 3133 SKIP_BLANKS; 3134 isParameter = 1; 3135 } 3136 3137 name = xmlParseNameComplex(ctxt); 3138 if (name == NULL) { 3139 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3141 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3142 ctxt->wellFormed = 0; 3143 ctxt->disableSAX = 1; 3144 return; 3145 } 3146 if (!IS_BLANK(CUR)) { 3147 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3149 ctxt->sax->error(ctxt->userData, 3150 "Space required after the entity name\n"); 3151 ctxt->wellFormed = 0; 3152 ctxt->disableSAX = 1; 3153 } 3154 SKIP_BLANKS; 3155 3156 /* 3157 * handle the various case of definitions... 3158 */ 3159 if (isParameter) { 3160 if ((RAW == '"') || (RAW == '\'')) { 3161 value = xmlParseEntityValue(ctxt, &orig); 3162 if (value) { 3163 if ((ctxt->sax != NULL) && 3164 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3165 ctxt->sax->entityDecl(ctxt->userData, name, 3166 XML_INTERNAL_PARAMETER_ENTITY, 3167 NULL, NULL, value); 3168 } 3169 } else { 3170 URI = xmlParseExternalID(ctxt, &literal, 1); 3171 if ((URI == NULL) && (literal == NULL)) { 3172 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3174 ctxt->sax->error(ctxt->userData, 3175 "Entity value required\n"); 3176 ctxt->wellFormed = 0; 3177 ctxt->disableSAX = 1; 3178 } 3179 if (URI) { 3180 xmlURIPtr uri; 3181 3182 uri = xmlParseURI((const char *) URI); 3183 if (uri == NULL) { 3184 ctxt->errNo = XML_ERR_INVALID_URI; 3185 if ((ctxt->sax != NULL) && 3186 (!ctxt->disableSAX) && 3187 (ctxt->sax->error != NULL)) 3188 ctxt->sax->error(ctxt->userData, 3189 "Invalid URI: %s\n", URI); 3190 ctxt->wellFormed = 0; 3191 } else { 3192 if (uri->fragment != NULL) { 3193 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3194 if ((ctxt->sax != NULL) && 3195 (!ctxt->disableSAX) && 3196 (ctxt->sax->error != NULL)) 3197 ctxt->sax->error(ctxt->userData, 3198 "Fragment not allowed: %s\n", URI); 3199 ctxt->wellFormed = 0; 3200 } else { 3201 if ((ctxt->sax != NULL) && 3202 (!ctxt->disableSAX) && 3203 (ctxt->sax->entityDecl != NULL)) 3204 ctxt->sax->entityDecl(ctxt->userData, name, 3205 XML_EXTERNAL_PARAMETER_ENTITY, 3206 literal, URI, NULL); 3207 } 3208 xmlFreeURI(uri); 3209 } 3210 } 3211 } 3212 } else { 3213 if ((RAW == '"') || (RAW == '\'')) { 3214 value = xmlParseEntityValue(ctxt, &orig); 3215 if ((ctxt->sax != NULL) && 3216 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3217 ctxt->sax->entityDecl(ctxt->userData, name, 3218 XML_INTERNAL_GENERAL_ENTITY, 3219 NULL, NULL, value); 3220 } else { 3221 URI = xmlParseExternalID(ctxt, &literal, 1); 3222 if ((URI == NULL) && (literal == NULL)) { 3223 ctxt->errNo = XML_ERR_VALUE_REQUIRED; 3224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3225 ctxt->sax->error(ctxt->userData, 3226 "Entity value required\n"); 3227 ctxt->wellFormed = 0; 3228 ctxt->disableSAX = 1; 3229 } 3230 if (URI) { 3231 xmlURIPtr uri; 3232 3233 uri = xmlParseURI((const char *)URI); 3234 if (uri == NULL) { 3235 ctxt->errNo = XML_ERR_INVALID_URI; 3236 if ((ctxt->sax != NULL) && 3237 (!ctxt->disableSAX) && 3238 (ctxt->sax->error != NULL)) 3239 ctxt->sax->error(ctxt->userData, 3240 "Invalid URI: %s\n", URI); 3241 ctxt->wellFormed = 0; 3242 } else { 3243 if (uri->fragment != NULL) { 3244 ctxt->errNo = XML_ERR_URI_FRAGMENT; 3245 if ((ctxt->sax != NULL) && 3246 (!ctxt->disableSAX) && 3247 (ctxt->sax->error != NULL)) 3248 ctxt->sax->error(ctxt->userData, 3249 "Fragment not allowed: %s\n", URI); 3250 ctxt->wellFormed = 0; 3251 } 3252 xmlFreeURI(uri); 3253 } 3254 } 3255 if ((RAW != '>') && (!IS_BLANK(CUR))) { 3256 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3258 ctxt->sax->error(ctxt->userData, 3259 "Space required before 'NDATA'\n"); 3260 ctxt->wellFormed = 0; 3261 ctxt->disableSAX = 1; 3262 } 3263 SKIP_BLANKS; 3264 if ((RAW == 'N') && (NXT(1) == 'D') && 3265 (NXT(2) == 'A') && (NXT(3) == 'T') && 3266 (NXT(4) == 'A')) { 3267 SKIP(5); 3268 if (!IS_BLANK(CUR)) { 3269 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3271 ctxt->sax->error(ctxt->userData, 3272 "Space required after 'NDATA'\n"); 3273 ctxt->wellFormed = 0; 3274 ctxt->disableSAX = 1; 3275 } 3276 SKIP_BLANKS; 3277 ndata = xmlParseNameComplex(ctxt); 3278 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3279 (ctxt->sax->unparsedEntityDecl != NULL)) 3280 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3281 literal, URI, ndata); 3282 } else { 3283 if ((ctxt->sax != NULL) && 3284 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3285 ctxt->sax->entityDecl(ctxt->userData, name, 3286 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3287 literal, URI, NULL); 3288 } 3289 } 3290 } 3291 SKIP_BLANKS; 3292 if (RAW != '>') { 3293 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3295 ctxt->sax->error(ctxt->userData, 3296 "xmlParseEntityDecl: entity %s not terminated\n", name); 3297 ctxt->wellFormed = 0; 3298 ctxt->disableSAX = 1; 3299 } else { 3300 if (input != ctxt->input) { 3301 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3303 ctxt->sax->error(ctxt->userData, 3304"Entity declaration doesn't start and stop in the same entity\n"); 3305 ctxt->wellFormed = 0; 3306 ctxt->disableSAX = 1; 3307 } 3308 NEXT; 3309 } 3310 if (orig != NULL) { 3311 /* 3312 * Ugly mechanism to save the raw entity value. 3313 */ 3314 xmlEntityPtr cur = NULL; 3315 3316 if (isParameter) { 3317 if ((ctxt->sax != NULL) && 3318 (ctxt->sax->getParameterEntity != NULL)) 3319 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3320 } else { 3321 if ((ctxt->sax != NULL) && 3322 (ctxt->sax->getEntity != NULL)) 3323 cur = ctxt->sax->getEntity(ctxt->userData, name); 3324 } 3325 if (cur != NULL) { 3326 if (cur->orig != NULL) 3327 xmlFree(orig); 3328 else 3329 cur->orig = orig; 3330 } else 3331 xmlFree(orig); 3332 } 3333 if (name != NULL) xmlFree(name); 3334 if (value != NULL) xmlFree(value); 3335 if (URI != NULL) xmlFree(URI); 3336 if (literal != NULL) xmlFree(literal); 3337 if (ndata != NULL) xmlFree(ndata); 3338 } 3339} 3340 3341/** 3342 * xmlParseDefaultDecl: 3343 * @ctxt: an XML parser context 3344 * @value: Receive a possible fixed default value for the attribute 3345 * 3346 * Parse an attribute default declaration 3347 * 3348 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3349 * 3350 * [ VC: Required Attribute ] 3351 * if the default declaration is the keyword #REQUIRED, then the 3352 * attribute must be specified for all elements of the type in the 3353 * attribute-list declaration. 3354 * 3355 * [ VC: Attribute Default Legal ] 3356 * The declared default value must meet the lexical constraints of 3357 * the declared attribute type c.f. xmlValidateAttributeDecl() 3358 * 3359 * [ VC: Fixed Attribute Default ] 3360 * if an attribute has a default value declared with the #FIXED 3361 * keyword, instances of that attribute must match the default value. 3362 * 3363 * [ WFC: No < in Attribute Values ] 3364 * handled in xmlParseAttValue() 3365 * 3366 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3367 * or XML_ATTRIBUTE_FIXED. 3368 */ 3369 3370int 3371xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3372 int val; 3373 xmlChar *ret; 3374 3375 *value = NULL; 3376 if ((RAW == '#') && (NXT(1) == 'R') && 3377 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3378 (NXT(4) == 'U') && (NXT(5) == 'I') && 3379 (NXT(6) == 'R') && (NXT(7) == 'E') && 3380 (NXT(8) == 'D')) { 3381 SKIP(9); 3382 return(XML_ATTRIBUTE_REQUIRED); 3383 } 3384 if ((RAW == '#') && (NXT(1) == 'I') && 3385 (NXT(2) == 'M') && (NXT(3) == 'P') && 3386 (NXT(4) == 'L') && (NXT(5) == 'I') && 3387 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3388 SKIP(8); 3389 return(XML_ATTRIBUTE_IMPLIED); 3390 } 3391 val = XML_ATTRIBUTE_NONE; 3392 if ((RAW == '#') && (NXT(1) == 'F') && 3393 (NXT(2) == 'I') && (NXT(3) == 'X') && 3394 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3395 SKIP(6); 3396 val = XML_ATTRIBUTE_FIXED; 3397 if (!IS_BLANK(CUR)) { 3398 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3400 ctxt->sax->error(ctxt->userData, 3401 "Space required after '#FIXED'\n"); 3402 ctxt->wellFormed = 0; 3403 ctxt->disableSAX = 1; 3404 } 3405 SKIP_BLANKS; 3406 } 3407 ret = xmlParseAttValue(ctxt); 3408 ctxt->instate = XML_PARSER_DTD; 3409 if (ret == NULL) { 3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3411 ctxt->sax->error(ctxt->userData, 3412 "Attribute default value declaration error\n"); 3413 ctxt->wellFormed = 0; 3414 ctxt->disableSAX = 1; 3415 } else 3416 *value = ret; 3417 return(val); 3418} 3419 3420/** 3421 * xmlParseNotationType: 3422 * @ctxt: an XML parser context 3423 * 3424 * parse an Notation attribute type. 3425 * 3426 * Note: the leading 'NOTATION' S part has already being parsed... 3427 * 3428 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3429 * 3430 * [ VC: Notation Attributes ] 3431 * Values of this type must match one of the notation names included 3432 * in the declaration; all notation names in the declaration must be declared. 3433 * 3434 * Returns: the notation attribute tree built while parsing 3435 */ 3436 3437xmlEnumerationPtr 3438xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3439 xmlChar *name; 3440 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3441 3442 if (RAW != '(') { 3443 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3445 ctxt->sax->error(ctxt->userData, 3446 "'(' required to start 'NOTATION'\n"); 3447 ctxt->wellFormed = 0; 3448 ctxt->disableSAX = 1; 3449 return(NULL); 3450 } 3451 SHRINK; 3452 do { 3453 NEXT; 3454 SKIP_BLANKS; 3455 name = xmlParseNameComplex(ctxt); 3456 if (name == NULL) { 3457 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3459 ctxt->sax->error(ctxt->userData, 3460 "Name expected in NOTATION declaration\n"); 3461 ctxt->wellFormed = 0; 3462 ctxt->disableSAX = 1; 3463 return(ret); 3464 } 3465 cur = xmlCreateEnumeration(name); 3466 xmlFree(name); 3467 if (cur == NULL) return(ret); 3468 if (last == NULL) ret = last = cur; 3469 else { 3470 last->next = cur; 3471 last = cur; 3472 } 3473 SKIP_BLANKS; 3474 } while (RAW == '|'); 3475 if (RAW != ')') { 3476 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3478 ctxt->sax->error(ctxt->userData, 3479 "')' required to finish NOTATION declaration\n"); 3480 ctxt->wellFormed = 0; 3481 ctxt->disableSAX = 1; 3482 if ((last != NULL) && (last != ret)) 3483 xmlFreeEnumeration(last); 3484 return(ret); 3485 } 3486 NEXT; 3487 return(ret); 3488} 3489 3490/** 3491 * xmlParseEnumerationType: 3492 * @ctxt: an XML parser context 3493 * 3494 * parse an Enumeration attribute type. 3495 * 3496 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3497 * 3498 * [ VC: Enumeration ] 3499 * Values of this type must match one of the Nmtoken tokens in 3500 * the declaration 3501 * 3502 * Returns: the enumeration attribute tree built while parsing 3503 */ 3504 3505xmlEnumerationPtr 3506xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3507 xmlChar *name; 3508 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3509 3510 if (RAW != '(') { 3511 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3513 ctxt->sax->error(ctxt->userData, 3514 "'(' required to start ATTLIST enumeration\n"); 3515 ctxt->wellFormed = 0; 3516 ctxt->disableSAX = 1; 3517 return(NULL); 3518 } 3519 SHRINK; 3520 do { 3521 NEXT; 3522 SKIP_BLANKS; 3523 name = xmlParseNmtoken(ctxt); 3524 if (name == NULL) { 3525 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3527 ctxt->sax->error(ctxt->userData, 3528 "NmToken expected in ATTLIST enumeration\n"); 3529 ctxt->wellFormed = 0; 3530 ctxt->disableSAX = 1; 3531 return(ret); 3532 } 3533 cur = xmlCreateEnumeration(name); 3534 xmlFree(name); 3535 if (cur == NULL) return(ret); 3536 if (last == NULL) ret = last = cur; 3537 else { 3538 last->next = cur; 3539 last = cur; 3540 } 3541 SKIP_BLANKS; 3542 } while (RAW == '|'); 3543 if (RAW != ')') { 3544 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3546 ctxt->sax->error(ctxt->userData, 3547 "')' required to finish ATTLIST enumeration\n"); 3548 ctxt->wellFormed = 0; 3549 ctxt->disableSAX = 1; 3550 return(ret); 3551 } 3552 NEXT; 3553 return(ret); 3554} 3555 3556/** 3557 * xmlParseEnumeratedType: 3558 * @ctxt: an XML parser context 3559 * @tree: the enumeration tree built while parsing 3560 * 3561 * parse an Enumerated attribute type. 3562 * 3563 * [57] EnumeratedType ::= NotationType | Enumeration 3564 * 3565 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3566 * 3567 * 3568 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3569 */ 3570 3571int 3572xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3573 if ((RAW == 'N') && (NXT(1) == 'O') && 3574 (NXT(2) == 'T') && (NXT(3) == 'A') && 3575 (NXT(4) == 'T') && (NXT(5) == 'I') && 3576 (NXT(6) == 'O') && (NXT(7) == 'N')) { 3577 SKIP(8); 3578 if (!IS_BLANK(CUR)) { 3579 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3581 ctxt->sax->error(ctxt->userData, 3582 "Space required after 'NOTATION'\n"); 3583 ctxt->wellFormed = 0; 3584 ctxt->disableSAX = 1; 3585 return(0); 3586 } 3587 SKIP_BLANKS; 3588 *tree = xmlParseNotationType(ctxt); 3589 if (*tree == NULL) return(0); 3590 return(XML_ATTRIBUTE_NOTATION); 3591 } 3592 *tree = xmlParseEnumerationType(ctxt); 3593 if (*tree == NULL) return(0); 3594 return(XML_ATTRIBUTE_ENUMERATION); 3595} 3596 3597/** 3598 * xmlParseAttributeType: 3599 * @ctxt: an XML parser context 3600 * @tree: the enumeration tree built while parsing 3601 * 3602 * parse the Attribute list def for an element 3603 * 3604 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 3605 * 3606 * [55] StringType ::= 'CDATA' 3607 * 3608 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 3609 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 3610 * 3611 * Validity constraints for attribute values syntax are checked in 3612 * xmlValidateAttributeValue() 3613 * 3614 * [ VC: ID ] 3615 * Values of type ID must match the Name production. A name must not 3616 * appear more than once in an XML document as a value of this type; 3617 * i.e., ID values must uniquely identify the elements which bear them. 3618 * 3619 * [ VC: One ID per Element Type ] 3620 * No element type may have more than one ID attribute specified. 3621 * 3622 * [ VC: ID Attribute Default ] 3623 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 3624 * 3625 * [ VC: IDREF ] 3626 * Values of type IDREF must match the Name production, and values 3627 * of type IDREFS must match Names; each IDREF Name must match the value 3628 * of an ID attribute on some element in the XML document; i.e. IDREF 3629 * values must match the value of some ID attribute. 3630 * 3631 * [ VC: Entity Name ] 3632 * Values of type ENTITY must match the Name production, values 3633 * of type ENTITIES must match Names; each Entity Name must match the 3634 * name of an unparsed entity declared in the DTD. 3635 * 3636 * [ VC: Name Token ] 3637 * Values of type NMTOKEN must match the Nmtoken production; values 3638 * of type NMTOKENS must match Nmtokens. 3639 * 3640 * Returns the attribute type 3641 */ 3642int 3643xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3644 SHRINK; 3645 if ((RAW == 'C') && (NXT(1) == 'D') && 3646 (NXT(2) == 'A') && (NXT(3) == 'T') && 3647 (NXT(4) == 'A')) { 3648 SKIP(5); 3649 return(XML_ATTRIBUTE_CDATA); 3650 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3651 (NXT(2) == 'R') && (NXT(3) == 'E') && 3652 (NXT(4) == 'F') && (NXT(5) == 'S')) { 3653 SKIP(6); 3654 return(XML_ATTRIBUTE_IDREFS); 3655 } else if ((RAW == 'I') && (NXT(1) == 'D') && 3656 (NXT(2) == 'R') && (NXT(3) == 'E') && 3657 (NXT(4) == 'F')) { 3658 SKIP(5); 3659 return(XML_ATTRIBUTE_IDREF); 3660 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 3661 SKIP(2); 3662 return(XML_ATTRIBUTE_ID); 3663 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3664 (NXT(2) == 'T') && (NXT(3) == 'I') && 3665 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 3666 SKIP(6); 3667 return(XML_ATTRIBUTE_ENTITY); 3668 } else if ((RAW == 'E') && (NXT(1) == 'N') && 3669 (NXT(2) == 'T') && (NXT(3) == 'I') && 3670 (NXT(4) == 'T') && (NXT(5) == 'I') && 3671 (NXT(6) == 'E') && (NXT(7) == 'S')) { 3672 SKIP(8); 3673 return(XML_ATTRIBUTE_ENTITIES); 3674 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3675 (NXT(2) == 'T') && (NXT(3) == 'O') && 3676 (NXT(4) == 'K') && (NXT(5) == 'E') && 3677 (NXT(6) == 'N') && (NXT(7) == 'S')) { 3678 SKIP(8); 3679 return(XML_ATTRIBUTE_NMTOKENS); 3680 } else if ((RAW == 'N') && (NXT(1) == 'M') && 3681 (NXT(2) == 'T') && (NXT(3) == 'O') && 3682 (NXT(4) == 'K') && (NXT(5) == 'E') && 3683 (NXT(6) == 'N')) { 3684 SKIP(7); 3685 return(XML_ATTRIBUTE_NMTOKEN); 3686 } 3687 return(xmlParseEnumeratedType(ctxt, tree)); 3688} 3689 3690/** 3691 * xmlParseAttributeListDecl: 3692 * @ctxt: an XML parser context 3693 * 3694 * : parse the Attribute list def for an element 3695 * 3696 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 3697 * 3698 * [53] AttDef ::= S Name S AttType S DefaultDecl 3699 * 3700 */ 3701void 3702xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 3703 xmlChar *elemName; 3704 xmlChar *attrName; 3705 xmlEnumerationPtr tree; 3706 3707 if ((RAW == '<') && (NXT(1) == '!') && 3708 (NXT(2) == 'A') && (NXT(3) == 'T') && 3709 (NXT(4) == 'T') && (NXT(5) == 'L') && 3710 (NXT(6) == 'I') && (NXT(7) == 'S') && 3711 (NXT(8) == 'T')) { 3712 xmlParserInputPtr input = ctxt->input; 3713 3714 SKIP(9); 3715 if (!IS_BLANK(CUR)) { 3716 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3718 ctxt->sax->error(ctxt->userData, 3719 "Space required after '<!ATTLIST'\n"); 3720 ctxt->wellFormed = 0; 3721 ctxt->disableSAX = 1; 3722 } 3723 SKIP_BLANKS; 3724 elemName = xmlParseNameComplex(ctxt); 3725 if (elemName == NULL) { 3726 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3728 ctxt->sax->error(ctxt->userData, 3729 "ATTLIST: no name for Element\n"); 3730 ctxt->wellFormed = 0; 3731 ctxt->disableSAX = 1; 3732 return; 3733 } 3734 SKIP_BLANKS; 3735 GROW; 3736 while (RAW != '>') { 3737 const xmlChar *check = CUR_PTR; 3738 int type; 3739 int def; 3740 xmlChar *defaultValue = NULL; 3741 3742 GROW; 3743 tree = NULL; 3744 attrName = xmlParseNameComplex(ctxt); 3745 if (attrName == NULL) { 3746 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3748 ctxt->sax->error(ctxt->userData, 3749 "ATTLIST: no name for Attribute\n"); 3750 ctxt->wellFormed = 0; 3751 ctxt->disableSAX = 1; 3752 break; 3753 } 3754 GROW; 3755 if (!IS_BLANK(CUR)) { 3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3758 ctxt->sax->error(ctxt->userData, 3759 "Space required after the attribute name\n"); 3760 ctxt->wellFormed = 0; 3761 ctxt->disableSAX = 1; 3762 if (attrName != NULL) 3763 xmlFree(attrName); 3764 if (defaultValue != NULL) 3765 xmlFree(defaultValue); 3766 break; 3767 } 3768 SKIP_BLANKS; 3769 3770 type = xmlParseAttributeType(ctxt, &tree); 3771 if (type <= 0) { 3772 if (attrName != NULL) 3773 xmlFree(attrName); 3774 if (defaultValue != NULL) 3775 xmlFree(defaultValue); 3776 break; 3777 } 3778 3779 GROW; 3780 if (!IS_BLANK(CUR)) { 3781 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3783 ctxt->sax->error(ctxt->userData, 3784 "Space required after the attribute type\n"); 3785 ctxt->wellFormed = 0; 3786 ctxt->disableSAX = 1; 3787 if (attrName != NULL) 3788 xmlFree(attrName); 3789 if (defaultValue != NULL) 3790 xmlFree(defaultValue); 3791 if (tree != NULL) 3792 xmlFreeEnumeration(tree); 3793 break; 3794 } 3795 SKIP_BLANKS; 3796 3797 def = xmlParseDefaultDecl(ctxt, &defaultValue); 3798 if (def <= 0) { 3799 if (attrName != NULL) 3800 xmlFree(attrName); 3801 if (defaultValue != NULL) 3802 xmlFree(defaultValue); 3803 if (tree != NULL) 3804 xmlFreeEnumeration(tree); 3805 break; 3806 } 3807 3808 GROW; 3809 if (RAW != '>') { 3810 if (!IS_BLANK(CUR)) { 3811 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3813 ctxt->sax->error(ctxt->userData, 3814 "Space required after the attribute default value\n"); 3815 ctxt->wellFormed = 0; 3816 ctxt->disableSAX = 1; 3817 if (attrName != NULL) 3818 xmlFree(attrName); 3819 if (defaultValue != NULL) 3820 xmlFree(defaultValue); 3821 if (tree != NULL) 3822 xmlFreeEnumeration(tree); 3823 break; 3824 } 3825 SKIP_BLANKS; 3826 } 3827 if (check == CUR_PTR) { 3828 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 3829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3830 ctxt->sax->error(ctxt->userData, 3831 "xmlParseAttributeListDecl: detected internal error\n"); 3832 if (attrName != NULL) 3833 xmlFree(attrName); 3834 if (defaultValue != NULL) 3835 xmlFree(defaultValue); 3836 if (tree != NULL) 3837 xmlFreeEnumeration(tree); 3838 break; 3839 } 3840 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3841 (ctxt->sax->attributeDecl != NULL)) 3842 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 3843 type, def, defaultValue, tree); 3844 if (attrName != NULL) 3845 xmlFree(attrName); 3846 if (defaultValue != NULL) 3847 xmlFree(defaultValue); 3848 GROW; 3849 } 3850 if (RAW == '>') { 3851 if (input != ctxt->input) { 3852 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 3853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3854 ctxt->sax->error(ctxt->userData, 3855"Attribute list declaration doesn't start and stop in the same entity\n"); 3856 ctxt->wellFormed = 0; 3857 ctxt->disableSAX = 1; 3858 } 3859 NEXT; 3860 } 3861 3862 xmlFree(elemName); 3863 } 3864} 3865 3866/** 3867 * xmlParseElementMixedContentDecl: 3868 * @ctxt: an XML parser context 3869 * 3870 * parse the declaration for a Mixed Element content 3871 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3872 * 3873 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 3874 * '(' S? '#PCDATA' S? ')' 3875 * 3876 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 3877 * 3878 * [ VC: No Duplicate Types ] 3879 * The same name must not appear more than once in a single 3880 * mixed-content declaration. 3881 * 3882 * returns: the list of the xmlElementContentPtr describing the element choices 3883 */ 3884xmlElementContentPtr 3885xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 3886 xmlElementContentPtr ret = NULL, cur = NULL, n; 3887 xmlChar *elem = NULL; 3888 3889 GROW; 3890 if ((RAW == '#') && (NXT(1) == 'P') && 3891 (NXT(2) == 'C') && (NXT(3) == 'D') && 3892 (NXT(4) == 'A') && (NXT(5) == 'T') && 3893 (NXT(6) == 'A')) { 3894 SKIP(7); 3895 SKIP_BLANKS; 3896 SHRINK; 3897 if (RAW == ')') { 3898 ctxt->entity = ctxt->input; 3899 NEXT; 3900 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3901 if (RAW == '*') { 3902 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3903 NEXT; 3904 } 3905 return(ret); 3906 } 3907 if ((RAW == '(') || (RAW == '|')) { 3908 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 3909 if (ret == NULL) return(NULL); 3910 } 3911 while (RAW == '|') { 3912 NEXT; 3913 if (elem == NULL) { 3914 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3915 if (ret == NULL) return(NULL); 3916 ret->c1 = cur; 3917 cur = ret; 3918 } else { 3919 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 3920 if (n == NULL) return(NULL); 3921 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 3922 cur->c2 = n; 3923 cur = n; 3924 xmlFree(elem); 3925 } 3926 SKIP_BLANKS; 3927 elem = xmlParseNameComplex(ctxt); 3928 if (elem == NULL) { 3929 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3931 ctxt->sax->error(ctxt->userData, 3932 "xmlParseElementMixedContentDecl : Name expected\n"); 3933 ctxt->wellFormed = 0; 3934 ctxt->disableSAX = 1; 3935 xmlFreeElementContent(cur); 3936 return(NULL); 3937 } 3938 SKIP_BLANKS; 3939 GROW; 3940 } 3941 if ((RAW == ')') && (NXT(1) == '*')) { 3942 if (elem != NULL) { 3943 cur->c2 = xmlNewElementContent(elem, 3944 XML_ELEMENT_CONTENT_ELEMENT); 3945 xmlFree(elem); 3946 } 3947 ret->ocur = XML_ELEMENT_CONTENT_MULT; 3948 ctxt->entity = ctxt->input; 3949 SKIP(2); 3950 } else { 3951 if (elem != NULL) xmlFree(elem); 3952 xmlFreeElementContent(ret); 3953 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 3954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3955 ctxt->sax->error(ctxt->userData, 3956 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 3957 ctxt->wellFormed = 0; 3958 ctxt->disableSAX = 1; 3959 return(NULL); 3960 } 3961 3962 } else { 3963 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 3964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3965 ctxt->sax->error(ctxt->userData, 3966 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 3967 ctxt->wellFormed = 0; 3968 ctxt->disableSAX = 1; 3969 } 3970 return(ret); 3971} 3972 3973/** 3974 * xmlParseElementChildrenContentDecl: 3975 * @ctxt: an XML parser context 3976 * 3977 * parse the declaration for a Mixed Element content 3978 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 3979 * 3980 * 3981 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 3982 * 3983 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 3984 * 3985 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 3986 * 3987 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 3988 * 3989 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 3990 * TODO Parameter-entity replacement text must be properly nested 3991 * with parenthetized groups. That is to say, if either of the 3992 * opening or closing parentheses in a choice, seq, or Mixed 3993 * construct is contained in the replacement text for a parameter 3994 * entity, both must be contained in the same replacement text. For 3995 * interoperability, if a parameter-entity reference appears in a 3996 * choice, seq, or Mixed construct, its replacement text should not 3997 * be empty, and neither the first nor last non-blank character of 3998 * the replacement text should be a connector (| or ,). 3999 * 4000 * returns: the tree of xmlElementContentPtr describing the element 4001 * hierarchy. 4002 */ 4003xmlElementContentPtr 4004#ifdef VMS 4005xmlParseElementChildrenContentD 4006#else 4007xmlParseElementChildrenContentDecl 4008#endif 4009(xmlParserCtxtPtr ctxt) { 4010 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4011 xmlChar *elem; 4012 xmlChar type = 0; 4013 4014 SKIP_BLANKS; 4015 GROW; 4016 if (RAW == '(') { 4017 /* Recurse on first child */ 4018 NEXT; 4019 SKIP_BLANKS; 4020 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4021 SKIP_BLANKS; 4022 GROW; 4023 } else { 4024 elem = xmlParseNameComplex(ctxt); 4025 if (elem == NULL) { 4026 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4028 ctxt->sax->error(ctxt->userData, 4029 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4030 ctxt->wellFormed = 0; 4031 ctxt->disableSAX = 1; 4032 return(NULL); 4033 } 4034 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4035 GROW; 4036 if (RAW == '?') { 4037 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4038 NEXT; 4039 } else if (RAW == '*') { 4040 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4041 NEXT; 4042 } else if (RAW == '+') { 4043 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4044 NEXT; 4045 } else { 4046 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4047 } 4048 xmlFree(elem); 4049 GROW; 4050 } 4051 SKIP_BLANKS; 4052 SHRINK; 4053 while (RAW != ')') { 4054 /* 4055 * Each loop we parse one separator and one element. 4056 */ 4057 if (RAW == ',') { 4058 if (type == 0) type = CUR; 4059 4060 /* 4061 * Detect "Name | Name , Name" error 4062 */ 4063 else if (type != CUR) { 4064 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4066 ctxt->sax->error(ctxt->userData, 4067 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4068 type); 4069 ctxt->wellFormed = 0; 4070 ctxt->disableSAX = 1; 4071 if ((op != NULL) && (op != ret)) 4072 xmlFreeElementContent(op); 4073 if ((last != NULL) && (last != ret) && 4074 (last != ret->c1) && (last != ret->c2)) 4075 xmlFreeElementContent(last); 4076 if (ret != NULL) 4077 xmlFreeElementContent(ret); 4078 return(NULL); 4079 } 4080 NEXT; 4081 4082 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4083 if (op == NULL) { 4084 xmlFreeElementContent(ret); 4085 return(NULL); 4086 } 4087 if (last == NULL) { 4088 op->c1 = ret; 4089 ret = cur = op; 4090 } else { 4091 cur->c2 = op; 4092 op->c1 = last; 4093 cur =op; 4094 last = NULL; 4095 } 4096 } else if (RAW == '|') { 4097 if (type == 0) type = CUR; 4098 4099 /* 4100 * Detect "Name , Name | Name" error 4101 */ 4102 else if (type != CUR) { 4103 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4105 ctxt->sax->error(ctxt->userData, 4106 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4107 type); 4108 ctxt->wellFormed = 0; 4109 ctxt->disableSAX = 1; 4110 if ((op != NULL) && (op != ret) && (op != last)) 4111 xmlFreeElementContent(op); 4112 if ((last != NULL) && (last != ret) && 4113 (last != ret->c1) && (last != ret->c2)) 4114 xmlFreeElementContent(last); 4115 if (ret != NULL) 4116 xmlFreeElementContent(ret); 4117 return(NULL); 4118 } 4119 NEXT; 4120 4121 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4122 if (op == NULL) { 4123 if ((op != NULL) && (op != ret)) 4124 xmlFreeElementContent(op); 4125 if ((last != NULL) && (last != ret) && 4126 (last != ret->c1) && (last != ret->c2)) 4127 xmlFreeElementContent(last); 4128 if (ret != NULL) 4129 xmlFreeElementContent(ret); 4130 return(NULL); 4131 } 4132 if (last == NULL) { 4133 op->c1 = ret; 4134 ret = cur = op; 4135 } else { 4136 cur->c2 = op; 4137 op->c1 = last; 4138 cur =op; 4139 last = NULL; 4140 } 4141 } else { 4142 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4144 ctxt->sax->error(ctxt->userData, 4145 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4146 ctxt->wellFormed = 0; 4147 ctxt->disableSAX = 1; 4148 if ((op != NULL) && (op != ret)) 4149 xmlFreeElementContent(op); 4150 if ((last != NULL) && (last != ret) && 4151 (last != ret->c1) && (last != ret->c2)) 4152 xmlFreeElementContent(last); 4153 if (ret != NULL) 4154 xmlFreeElementContent(ret); 4155 return(NULL); 4156 } 4157 GROW; 4158 SKIP_BLANKS; 4159 GROW; 4160 if (RAW == '(') { 4161 /* Recurse on second child */ 4162 NEXT; 4163 SKIP_BLANKS; 4164 last = xmlParseElementChildrenContentDecl(ctxt); 4165 SKIP_BLANKS; 4166 } else { 4167 elem = xmlParseNameComplex(ctxt); 4168 if (elem == NULL) { 4169 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4171 ctxt->sax->error(ctxt->userData, 4172 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4173 ctxt->wellFormed = 0; 4174 ctxt->disableSAX = 1; 4175 if ((op != NULL) && (op != ret)) 4176 xmlFreeElementContent(op); 4177 if ((last != NULL) && (last != ret) && 4178 (last != ret->c1) && (last != ret->c2)) 4179 xmlFreeElementContent(last); 4180 if (ret != NULL) 4181 xmlFreeElementContent(ret); 4182 return(NULL); 4183 } 4184 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4185 xmlFree(elem); 4186 if (RAW == '?') { 4187 last->ocur = XML_ELEMENT_CONTENT_OPT; 4188 NEXT; 4189 } else if (RAW == '*') { 4190 last->ocur = XML_ELEMENT_CONTENT_MULT; 4191 NEXT; 4192 } else if (RAW == '+') { 4193 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4194 NEXT; 4195 } else { 4196 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4197 } 4198 } 4199 SKIP_BLANKS; 4200 GROW; 4201 } 4202 if ((cur != NULL) && (last != NULL)) { 4203 cur->c2 = last; 4204 } 4205 ctxt->entity = ctxt->input; 4206 NEXT; 4207 if (RAW == '?') { 4208 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4209 NEXT; 4210 } else if (RAW == '*') { 4211 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4212 NEXT; 4213 } else if (RAW == '+') { 4214 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4215 NEXT; 4216 } 4217 return(ret); 4218} 4219 4220/** 4221 * xmlParseElementContentDecl: 4222 * @ctxt: an XML parser context 4223 * @name: the name of the element being defined. 4224 * @result: the Element Content pointer will be stored here if any 4225 * 4226 * parse the declaration for an Element content either Mixed or Children, 4227 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4228 * 4229 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4230 * 4231 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4232 */ 4233 4234int 4235xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4236 xmlElementContentPtr *result) { 4237 4238 xmlElementContentPtr tree = NULL; 4239 xmlParserInputPtr input = ctxt->input; 4240 int res; 4241 4242 *result = NULL; 4243 4244 if (RAW != '(') { 4245 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4247 ctxt->sax->error(ctxt->userData, 4248 "xmlParseElementContentDecl : '(' expected\n"); 4249 ctxt->wellFormed = 0; 4250 ctxt->disableSAX = 1; 4251 return(-1); 4252 } 4253 NEXT; 4254 GROW; 4255 SKIP_BLANKS; 4256 if ((RAW == '#') && (NXT(1) == 'P') && 4257 (NXT(2) == 'C') && (NXT(3) == 'D') && 4258 (NXT(4) == 'A') && (NXT(5) == 'T') && 4259 (NXT(6) == 'A')) { 4260 tree = xmlParseElementMixedContentDecl(ctxt); 4261 res = XML_ELEMENT_TYPE_MIXED; 4262 } else { 4263 tree = xmlParseElementChildrenContentDecl(ctxt); 4264 res = XML_ELEMENT_TYPE_ELEMENT; 4265 } 4266 if ((ctxt->entity != NULL) && (input != ctxt->entity)) { 4267 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4269 ctxt->sax->error(ctxt->userData, 4270"Element content declaration doesn't start and stop in the same entity\n"); 4271 ctxt->wellFormed = 0; 4272 ctxt->disableSAX = 1; 4273 } 4274 SKIP_BLANKS; 4275 *result = tree; 4276 return(res); 4277} 4278 4279/** 4280 * xmlParseElementDecl: 4281 * @ctxt: an XML parser context 4282 * 4283 * parse an Element declaration. 4284 * 4285 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4286 * 4287 * [ VC: Unique Element Type Declaration ] 4288 * No element type may be declared more than once 4289 * 4290 * Returns the type of the element, or -1 in case of error 4291 */ 4292int 4293xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4294 xmlChar *name; 4295 int ret = -1; 4296 xmlElementContentPtr content = NULL; 4297 4298 GROW; 4299 if ((RAW == '<') && (NXT(1) == '!') && 4300 (NXT(2) == 'E') && (NXT(3) == 'L') && 4301 (NXT(4) == 'E') && (NXT(5) == 'M') && 4302 (NXT(6) == 'E') && (NXT(7) == 'N') && 4303 (NXT(8) == 'T')) { 4304 xmlParserInputPtr input = ctxt->input; 4305 4306 SKIP(9); 4307 if (!IS_BLANK(CUR)) { 4308 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4310 ctxt->sax->error(ctxt->userData, 4311 "Space required after 'ELEMENT'\n"); 4312 ctxt->wellFormed = 0; 4313 ctxt->disableSAX = 1; 4314 } 4315 SKIP_BLANKS; 4316 name = xmlParseNameComplex(ctxt); 4317 if (name == NULL) { 4318 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4320 ctxt->sax->error(ctxt->userData, 4321 "xmlParseElementDecl: no name for Element\n"); 4322 ctxt->wellFormed = 0; 4323 ctxt->disableSAX = 1; 4324 return(-1); 4325 } 4326 while ((RAW == 0) && (ctxt->inputNr > 1)) 4327 xmlPopInput(ctxt); 4328 if (!IS_BLANK(CUR)) { 4329 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4331 ctxt->sax->error(ctxt->userData, 4332 "Space required after the element name\n"); 4333 ctxt->wellFormed = 0; 4334 ctxt->disableSAX = 1; 4335 } 4336 SKIP_BLANKS; 4337 if ((RAW == 'E') && (NXT(1) == 'M') && 4338 (NXT(2) == 'P') && (NXT(3) == 'T') && 4339 (NXT(4) == 'Y')) { 4340 SKIP(5); 4341 /* 4342 * Element must always be empty. 4343 */ 4344 ret = XML_ELEMENT_TYPE_EMPTY; 4345 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4346 (NXT(2) == 'Y')) { 4347 SKIP(3); 4348 /* 4349 * Element is a generic container. 4350 */ 4351 ret = XML_ELEMENT_TYPE_ANY; 4352 } else if (RAW == '(') { 4353 ret = xmlParseElementContentDecl(ctxt, name, &content); 4354 } else { 4355 /* 4356 * [ WFC: PEs in Internal Subset ] error handling. 4357 */ 4358 if ((RAW == '%') && (ctxt->external == 0) && 4359 (ctxt->inputNr == 1)) { 4360 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4362 ctxt->sax->error(ctxt->userData, 4363 "PEReference: forbidden within markup decl in internal subset\n"); 4364 } else { 4365 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4367 ctxt->sax->error(ctxt->userData, 4368 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4369 } 4370 ctxt->wellFormed = 0; 4371 ctxt->disableSAX = 1; 4372 if (name != NULL) xmlFree(name); 4373 return(-1); 4374 } 4375 4376 SKIP_BLANKS; 4377 /* 4378 * Pop-up of finished entities. 4379 */ 4380 while ((RAW == 0) && (ctxt->inputNr > 1)) 4381 xmlPopInput(ctxt); 4382 SKIP_BLANKS; 4383 4384 if (RAW != '>') { 4385 ctxt->errNo = XML_ERR_GT_REQUIRED; 4386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4387 ctxt->sax->error(ctxt->userData, 4388 "xmlParseElementDecl: expected '>' at the end\n"); 4389 ctxt->wellFormed = 0; 4390 ctxt->disableSAX = 1; 4391 } else { 4392 if (input != ctxt->input) { 4393 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; 4394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4395 ctxt->sax->error(ctxt->userData, 4396"Element declaration doesn't start and stop in the same entity\n"); 4397 ctxt->wellFormed = 0; 4398 ctxt->disableSAX = 1; 4399 } 4400 4401 NEXT; 4402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4403 (ctxt->sax->elementDecl != NULL)) 4404 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4405 content); 4406 } 4407 if (content != NULL) { 4408 xmlFreeElementContent(content); 4409 } 4410 if (name != NULL) { 4411 xmlFree(name); 4412 } 4413 } 4414 return(ret); 4415} 4416 4417/** 4418 * xmlParseMarkupDecl: 4419 * @ctxt: an XML parser context 4420 * 4421 * parse Markup declarations 4422 * 4423 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4424 * NotationDecl | PI | Comment 4425 * 4426 * [ VC: Proper Declaration/PE Nesting ] 4427 * Parameter-entity replacement text must be properly nested with 4428 * markup declarations. That is to say, if either the first character 4429 * or the last character of a markup declaration (markupdecl above) is 4430 * contained in the replacement text for a parameter-entity reference, 4431 * both must be contained in the same replacement text. 4432 * 4433 * [ WFC: PEs in Internal Subset ] 4434 * In the internal DTD subset, parameter-entity references can occur 4435 * only where markup declarations can occur, not within markup declarations. 4436 * (This does not apply to references that occur in external parameter 4437 * entities or to the external subset.) 4438 */ 4439void 4440xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4441 GROW; 4442 xmlParseElementDecl(ctxt); 4443 xmlParseAttributeListDecl(ctxt); 4444 xmlParseEntityDecl(ctxt); 4445 xmlParseNotationDecl(ctxt); 4446 xmlParsePI(ctxt); 4447 xmlParseComment(ctxt); 4448 /* 4449 * This is only for internal subset. On external entities, 4450 * the replacement is done before parsing stage 4451 */ 4452 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4453 xmlParsePEReference(ctxt); 4454 ctxt->instate = XML_PARSER_DTD; 4455} 4456 4457/** 4458 * xmlParseTextDecl: 4459 * @ctxt: an XML parser context 4460 * 4461 * parse an XML declaration header for external entities 4462 * 4463 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4464 * 4465 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 4466 */ 4467 4468void 4469xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4470 xmlChar *version; 4471 4472 /* 4473 * We know that '<?xml' is here. 4474 */ 4475 if ((RAW == '<') && (NXT(1) == '?') && 4476 (NXT(2) == 'x') && (NXT(3) == 'm') && 4477 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 4478 SKIP(5); 4479 } else { 4480 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; 4481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4482 ctxt->sax->error(ctxt->userData, 4483 "Text declaration '<?xml' required\n"); 4484 ctxt->wellFormed = 0; 4485 ctxt->disableSAX = 1; 4486 4487 return; 4488 } 4489 4490 if (!IS_BLANK(CUR)) { 4491 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4493 ctxt->sax->error(ctxt->userData, 4494 "Space needed after '<?xml'\n"); 4495 ctxt->wellFormed = 0; 4496 ctxt->disableSAX = 1; 4497 } 4498 SKIP_BLANKS; 4499 4500 /* 4501 * We may have the VersionInfo here. 4502 */ 4503 version = xmlParseVersionInfo(ctxt); 4504 if (version == NULL) 4505 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4506 ctxt->input->version = version; 4507 4508 /* 4509 * We must have the encoding declaration 4510 */ 4511 if (!IS_BLANK(CUR)) { 4512 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4514 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4515 ctxt->wellFormed = 0; 4516 ctxt->disableSAX = 1; 4517 } 4518 xmlParseEncodingDecl(ctxt); 4519 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4520 /* 4521 * The XML REC instructs us to stop parsing right here 4522 */ 4523 return; 4524 } 4525 4526 SKIP_BLANKS; 4527 if ((RAW == '?') && (NXT(1) == '>')) { 4528 SKIP(2); 4529 } else if (RAW == '>') { 4530 /* Deprecated old WD ... */ 4531 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4533 ctxt->sax->error(ctxt->userData, 4534 "XML declaration must end-up with '?>'\n"); 4535 ctxt->wellFormed = 0; 4536 ctxt->disableSAX = 1; 4537 NEXT; 4538 } else { 4539 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4541 ctxt->sax->error(ctxt->userData, 4542 "parsing XML declaration: '?>' expected\n"); 4543 ctxt->wellFormed = 0; 4544 ctxt->disableSAX = 1; 4545 MOVETO_ENDTAG(CUR_PTR); 4546 NEXT; 4547 } 4548} 4549 4550/* 4551 * xmlParseConditionalSections 4552 * @ctxt: an XML parser context 4553 * 4554 * [61] conditionalSect ::= includeSect | ignoreSect 4555 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4556 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4557 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4558 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4559 */ 4560 4561void 4562xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4563 SKIP(3); 4564 SKIP_BLANKS; 4565 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && 4566 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && 4567 (NXT(6) == 'E')) { 4568 SKIP(7); 4569 SKIP_BLANKS; 4570 if (RAW != '[') { 4571 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4573 ctxt->sax->error(ctxt->userData, 4574 "XML conditional section '[' expected\n"); 4575 ctxt->wellFormed = 0; 4576 ctxt->disableSAX = 1; 4577 } else { 4578 NEXT; 4579 } 4580 if (xmlParserDebugEntities) { 4581 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4582 xmlGenericError(xmlGenericErrorContext, 4583 "%s(%d): ", ctxt->input->filename, 4584 ctxt->input->line); 4585 xmlGenericError(xmlGenericErrorContext, 4586 "Entering INCLUDE Conditional Section\n"); 4587 } 4588 4589 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 4590 (NXT(2) != '>'))) { 4591 const xmlChar *check = CUR_PTR; 4592 int cons = ctxt->input->consumed; 4593 int tok = ctxt->token; 4594 4595 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4596 xmlParseConditionalSections(ctxt); 4597 } else if (IS_BLANK(CUR)) { 4598 NEXT; 4599 } else if (RAW == '%') { 4600 xmlParsePEReference(ctxt); 4601 } else 4602 xmlParseMarkupDecl(ctxt); 4603 4604 /* 4605 * Pop-up of finished entities. 4606 */ 4607 while ((RAW == 0) && (ctxt->inputNr > 1)) 4608 xmlPopInput(ctxt); 4609 4610 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4611 (tok == ctxt->token)) { 4612 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4614 ctxt->sax->error(ctxt->userData, 4615 "Content error in the external subset\n"); 4616 ctxt->wellFormed = 0; 4617 ctxt->disableSAX = 1; 4618 break; 4619 } 4620 } 4621 if (xmlParserDebugEntities) { 4622 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4623 xmlGenericError(xmlGenericErrorContext, 4624 "%s(%d): ", ctxt->input->filename, 4625 ctxt->input->line); 4626 xmlGenericError(xmlGenericErrorContext, 4627 "Leaving INCLUDE Conditional Section\n"); 4628 } 4629 4630 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && 4631 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { 4632 int state; 4633 int instate; 4634 int depth = 0; 4635 4636 SKIP(6); 4637 SKIP_BLANKS; 4638 if (RAW != '[') { 4639 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4641 ctxt->sax->error(ctxt->userData, 4642 "XML conditional section '[' expected\n"); 4643 ctxt->wellFormed = 0; 4644 ctxt->disableSAX = 1; 4645 } else { 4646 NEXT; 4647 } 4648 if (xmlParserDebugEntities) { 4649 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4650 xmlGenericError(xmlGenericErrorContext, 4651 "%s(%d): ", ctxt->input->filename, 4652 ctxt->input->line); 4653 xmlGenericError(xmlGenericErrorContext, 4654 "Entering IGNORE Conditional Section\n"); 4655 } 4656 4657 /* 4658 * Parse up to the end of the conditionnal section 4659 * But disable SAX event generating DTD building in the meantime 4660 */ 4661 state = ctxt->disableSAX; 4662 instate = ctxt->instate; 4663 ctxt->disableSAX = 1; 4664 ctxt->instate = XML_PARSER_IGNORE; 4665 4666 while (depth >= 0) { 4667 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4668 depth++; 4669 SKIP(3); 4670 continue; 4671 } 4672 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4673 if (--depth >= 0) SKIP(3); 4674 continue; 4675 } 4676 NEXT; 4677 continue; 4678 } 4679 4680 ctxt->disableSAX = state; 4681 ctxt->instate = instate; 4682 4683 if (xmlParserDebugEntities) { 4684 if ((ctxt->input != NULL) && (ctxt->input->filename)) 4685 xmlGenericError(xmlGenericErrorContext, 4686 "%s(%d): ", ctxt->input->filename, 4687 ctxt->input->line); 4688 xmlGenericError(xmlGenericErrorContext, 4689 "Leaving IGNORE Conditional Section\n"); 4690 } 4691 4692 } else { 4693 ctxt->errNo = XML_ERR_CONDSEC_INVALID; 4694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4695 ctxt->sax->error(ctxt->userData, 4696 "XML conditional section INCLUDE or IGNORE keyword expected\n"); 4697 ctxt->wellFormed = 0; 4698 ctxt->disableSAX = 1; 4699 } 4700 4701 if (RAW == 0) 4702 SHRINK; 4703 4704 if (RAW == 0) { 4705 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4707 ctxt->sax->error(ctxt->userData, 4708 "XML conditional section not closed\n"); 4709 ctxt->wellFormed = 0; 4710 ctxt->disableSAX = 1; 4711 } else { 4712 SKIP(3); 4713 } 4714} 4715 4716/** 4717 * xmlParseExternalSubset: 4718 * @ctxt: an XML parser context 4719 * @ExternalID: the external identifier 4720 * @SystemID: the system identifier (or URL) 4721 * 4722 * parse Markup declarations from an external subset 4723 * 4724 * [30] extSubset ::= textDecl? extSubsetDecl 4725 * 4726 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4727 */ 4728void 4729xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4730 const xmlChar *SystemID) { 4731 GROW; 4732 if ((RAW == '<') && (NXT(1) == '?') && 4733 (NXT(2) == 'x') && (NXT(3) == 'm') && 4734 (NXT(4) == 'l')) { 4735 xmlParseTextDecl(ctxt); 4736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 4737 /* 4738 * The XML REC instructs us to stop parsing right here 4739 */ 4740 ctxt->instate = XML_PARSER_EOF; 4741 return; 4742 } 4743 } 4744 if (ctxt->myDoc == NULL) { 4745 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4746 } 4747 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4748 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 4749 4750 ctxt->instate = XML_PARSER_DTD; 4751 ctxt->external = 1; 4752 while (((RAW == '<') && (NXT(1) == '?')) || 4753 ((RAW == '<') && (NXT(1) == '!')) || 4754 IS_BLANK(CUR)) { 4755 const xmlChar *check = CUR_PTR; 4756 int cons = ctxt->input->consumed; 4757 int tok = ctxt->token; 4758 4759 GROW; 4760 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4761 xmlParseConditionalSections(ctxt); 4762 } else if (IS_BLANK(CUR)) { 4763 NEXT; 4764 } else if (RAW == '%') { 4765 xmlParsePEReference(ctxt); 4766 } else 4767 xmlParseMarkupDecl(ctxt); 4768 4769 /* 4770 * Pop-up of finished entities. 4771 */ 4772 while ((RAW == 0) && (ctxt->inputNr > 1)) 4773 xmlPopInput(ctxt); 4774 4775 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && 4776 (tok == ctxt->token)) { 4777 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4779 ctxt->sax->error(ctxt->userData, 4780 "Content error in the external subset\n"); 4781 ctxt->wellFormed = 0; 4782 ctxt->disableSAX = 1; 4783 break; 4784 } 4785 } 4786 4787 if (RAW != 0) { 4788 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4790 ctxt->sax->error(ctxt->userData, 4791 "Extra content at the end of the document\n"); 4792 ctxt->wellFormed = 0; 4793 ctxt->disableSAX = 1; 4794 } 4795 4796} 4797 4798/** 4799 * xmlParseReference: 4800 * @ctxt: an XML parser context 4801 * 4802 * parse and handle entity references in content, depending on the SAX 4803 * interface, this may end-up in a call to character() if this is a 4804 * CharRef, a predefined entity, if there is no reference() callback. 4805 * or if the parser was asked to switch to that mode. 4806 * 4807 * [67] Reference ::= EntityRef | CharRef 4808 */ 4809void 4810xmlParseReference(xmlParserCtxtPtr ctxt) { 4811 xmlEntityPtr ent; 4812 xmlChar *val; 4813 if (RAW != '&') return; 4814 4815 if (NXT(1) == '#') { 4816 int i = 0; 4817 xmlChar out[10]; 4818 int hex = NXT(2); 4819 int val = xmlParseCharRef(ctxt); 4820 4821 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 4822 /* 4823 * So we are using non-UTF-8 buffers 4824 * Check that the char fit on 8bits, if not 4825 * generate a CharRef. 4826 */ 4827 if (val <= 0xFF) { 4828 out[0] = val; 4829 out[1] = 0; 4830 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4831 (!ctxt->disableSAX)) 4832 ctxt->sax->characters(ctxt->userData, out, 1); 4833 } else { 4834 if ((hex == 'x') || (hex == 'X')) 4835 sprintf((char *)out, "#x%X", val); 4836 else 4837 sprintf((char *)out, "#%d", val); 4838 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4839 (!ctxt->disableSAX)) 4840 ctxt->sax->reference(ctxt->userData, out); 4841 } 4842 } else { 4843 /* 4844 * Just encode the value in UTF-8 4845 */ 4846 COPY_BUF(0 ,out, i, val); 4847 out[i] = 0; 4848 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 4849 (!ctxt->disableSAX)) 4850 ctxt->sax->characters(ctxt->userData, out, i); 4851 } 4852 } else { 4853 ent = xmlParseEntityRef(ctxt); 4854 if (ent == NULL) return; 4855 if ((ent->name != NULL) && 4856 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 4857 xmlNodePtr list = NULL; 4858 int ret; 4859 4860 4861 /* 4862 * The first reference to the entity trigger a parsing phase 4863 * where the ent->children is filled with the result from 4864 * the parsing. 4865 */ 4866 if (ent->children == NULL) { 4867 xmlChar *value; 4868 value = ent->content; 4869 4870 /* 4871 * Check that this entity is well formed 4872 */ 4873 if ((value != NULL) && 4874 (value[1] == 0) && (value[0] == '<') && 4875 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 4876 /* 4877 * DONE: get definite answer on this !!! 4878 * Lots of entity decls are used to declare a single 4879 * char 4880 * <!ENTITY lt "<"> 4881 * Which seems to be valid since 4882 * 2.4: The ampersand character (&) and the left angle 4883 * bracket (<) may appear in their literal form only 4884 * when used ... They are also legal within the literal 4885 * entity value of an internal entity declaration;i 4886 * see "4.3.2 Well-Formed Parsed Entities". 4887 * IMHO 2.4 and 4.3.2 are directly in contradiction. 4888 * Looking at the OASIS test suite and James Clark 4889 * tests, this is broken. However the XML REC uses 4890 * it. Is the XML REC not well-formed ???? 4891 * This is a hack to avoid this problem 4892 * 4893 * ANSWER: since lt gt amp .. are already defined, 4894 * this is a redefinition and hence the fact that the 4895 * contentis not well balanced is not a Wf error, this 4896 * is lousy but acceptable. 4897 */ 4898 list = xmlNewDocText(ctxt->myDoc, value); 4899 if (list != NULL) { 4900 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4901 (ent->children == NULL)) { 4902 ent->children = list; 4903 ent->last = list; 4904 list->parent = (xmlNodePtr) ent; 4905 } else { 4906 xmlFreeNodeList(list); 4907 } 4908 } else if (list != NULL) { 4909 xmlFreeNodeList(list); 4910 } 4911 } else { 4912 /* 4913 * 4.3.2: An internal general parsed entity is well-formed 4914 * if its replacement text matches the production labeled 4915 * content. 4916 */ 4917 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 4918 ctxt->depth++; 4919 ret = xmlParseBalancedChunkMemory(ctxt->myDoc, 4920 ctxt->sax, NULL, ctxt->depth, 4921 value, &list); 4922 ctxt->depth--; 4923 } else if (ent->etype == 4924 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 4925 ctxt->depth++; 4926 ret = xmlParseExternalEntity(ctxt->myDoc, 4927 ctxt->sax, NULL, ctxt->depth, 4928 ent->URI, ent->ExternalID, &list); 4929 ctxt->depth--; 4930 } else { 4931 ret = -1; 4932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4933 ctxt->sax->error(ctxt->userData, 4934 "Internal: invalid entity type\n"); 4935 } 4936 if (ret == XML_ERR_ENTITY_LOOP) { 4937 ctxt->errNo = XML_ERR_ENTITY_LOOP; 4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4939 ctxt->sax->error(ctxt->userData, 4940 "Detected entity reference loop\n"); 4941 ctxt->wellFormed = 0; 4942 ctxt->disableSAX = 1; 4943 } else if ((ret == 0) && (list != NULL)) { 4944 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 4945 (ent->children == NULL)) { 4946 ent->children = list; 4947 while (list != NULL) { 4948 list->parent = (xmlNodePtr) ent; 4949 if (list->next == NULL) 4950 ent->last = list; 4951 list = list->next; 4952 } 4953 } else { 4954 xmlFreeNodeList(list); 4955 } 4956 } else if (ret > 0) { 4957 ctxt->errNo = ret; 4958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4959 ctxt->sax->error(ctxt->userData, 4960 "Entity value required\n"); 4961 ctxt->wellFormed = 0; 4962 ctxt->disableSAX = 1; 4963 } else if (list != NULL) { 4964 xmlFreeNodeList(list); 4965 } 4966 } 4967 } 4968 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4969 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 4970 /* 4971 * Create a node. 4972 */ 4973 ctxt->sax->reference(ctxt->userData, ent->name); 4974 return; 4975 } else if (ctxt->replaceEntities) { 4976 if ((ctxt->node != NULL) && (ent->children != NULL)) { 4977 /* 4978 * Seems we are generating the DOM content, do 4979 * a simple tree copy 4980 */ 4981 xmlNodePtr new; 4982 new = xmlCopyNodeList(ent->children); 4983 4984 xmlAddChildList(ctxt->node, new); 4985 /* 4986 * This is to avoid a nasty side effect, see 4987 * characters() in SAX.c 4988 */ 4989 ctxt->nodemem = 0; 4990 ctxt->nodelen = 0; 4991 return; 4992 } else { 4993 /* 4994 * Probably running in SAX mode 4995 */ 4996 xmlParserInputPtr input; 4997 4998 input = xmlNewEntityInputStream(ctxt, ent); 4999 xmlPushInput(ctxt, input); 5000 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5001 (RAW == '<') && (NXT(1) == '?') && 5002 (NXT(2) == 'x') && (NXT(3) == 'm') && 5003 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5004 xmlParseTextDecl(ctxt); 5005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5006 /* 5007 * The XML REC instructs us to stop parsing right here 5008 */ 5009 ctxt->instate = XML_PARSER_EOF; 5010 return; 5011 } 5012 if (input->standalone == 1) { 5013 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; 5014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5015 ctxt->sax->error(ctxt->userData, 5016 "external parsed entities cannot be standalone\n"); 5017 ctxt->wellFormed = 0; 5018 ctxt->disableSAX = 1; 5019 } 5020 } 5021 return; 5022 } 5023 } 5024 } else { 5025 val = ent->content; 5026 if (val == NULL) return; 5027 /* 5028 * inline the entity. 5029 */ 5030 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5031 (!ctxt->disableSAX)) 5032 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5033 } 5034 } 5035} 5036 5037/** 5038 * xmlParseEntityRef: 5039 * @ctxt: an XML parser context 5040 * 5041 * parse ENTITY references declarations 5042 * 5043 * [68] EntityRef ::= '&' Name ';' 5044 * 5045 * [ WFC: Entity Declared ] 5046 * In a document without any DTD, a document with only an internal DTD 5047 * subset which contains no parameter entity references, or a document 5048 * with "standalone='yes'", the Name given in the entity reference 5049 * must match that in an entity declaration, except that well-formed 5050 * documents need not declare any of the following entities: amp, lt, 5051 * gt, apos, quot. The declaration of a parameter entity must precede 5052 * any reference to it. Similarly, the declaration of a general entity 5053 * must precede any reference to it which appears in a default value in an 5054 * attribute-list declaration. Note that if entities are declared in the 5055 * external subset or in external parameter entities, a non-validating 5056 * processor is not obligated to read and process their declarations; 5057 * for such documents, the rule that an entity must be declared is a 5058 * well-formedness constraint only if standalone='yes'. 5059 * 5060 * [ WFC: Parsed Entity ] 5061 * An entity reference must not contain the name of an unparsed entity 5062 * 5063 * Returns the xmlEntityPtr if found, or NULL otherwise. 5064 */ 5065xmlEntityPtr 5066xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5067 xmlChar *name; 5068 xmlEntityPtr ent = NULL; 5069 5070 GROW; 5071 5072 if (RAW == '&') { 5073 NEXT; 5074 name = xmlParseName(ctxt); 5075 if (name == NULL) { 5076 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5078 ctxt->sax->error(ctxt->userData, 5079 "xmlParseEntityRef: no name\n"); 5080 ctxt->wellFormed = 0; 5081 ctxt->disableSAX = 1; 5082 } else { 5083 if (RAW == ';') { 5084 NEXT; 5085 /* 5086 * Ask first SAX for entity resolution, otherwise try the 5087 * predefined set. 5088 */ 5089 if (ctxt->sax != NULL) { 5090 if (ctxt->sax->getEntity != NULL) 5091 ent = ctxt->sax->getEntity(ctxt->userData, name); 5092 if (ent == NULL) 5093 ent = xmlGetPredefinedEntity(name); 5094 } 5095 /* 5096 * [ WFC: Entity Declared ] 5097 * In a document without any DTD, a document with only an 5098 * internal DTD subset which contains no parameter entity 5099 * references, or a document with "standalone='yes'", the 5100 * Name given in the entity reference must match that in an 5101 * entity declaration, except that well-formed documents 5102 * need not declare any of the following entities: amp, lt, 5103 * gt, apos, quot. 5104 * The declaration of a parameter entity must precede any 5105 * reference to it. 5106 * Similarly, the declaration of a general entity must 5107 * precede any reference to it which appears in a default 5108 * value in an attribute-list declaration. Note that if 5109 * entities are declared in the external subset or in 5110 * external parameter entities, a non-validating processor 5111 * is not obligated to read and process their declarations; 5112 * for such documents, the rule that an entity must be 5113 * declared is a well-formedness constraint only if 5114 * standalone='yes'. 5115 */ 5116 if (ent == NULL) { 5117 if ((ctxt->standalone == 1) || 5118 ((ctxt->hasExternalSubset == 0) && 5119 (ctxt->hasPErefs == 0))) { 5120 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5122 ctxt->sax->error(ctxt->userData, 5123 "Entity '%s' not defined\n", name); 5124 ctxt->wellFormed = 0; 5125 ctxt->disableSAX = 1; 5126 } else { 5127 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5128 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5129 ctxt->sax->warning(ctxt->userData, 5130 "Entity '%s' not defined\n", name); 5131 } 5132 } 5133 5134 /* 5135 * [ WFC: Parsed Entity ] 5136 * An entity reference must not contain the name of an 5137 * unparsed entity 5138 */ 5139 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5140 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5142 ctxt->sax->error(ctxt->userData, 5143 "Entity reference to unparsed entity %s\n", name); 5144 ctxt->wellFormed = 0; 5145 ctxt->disableSAX = 1; 5146 } 5147 5148 /* 5149 * [ WFC: No External Entity References ] 5150 * Attribute values cannot contain direct or indirect 5151 * entity references to external entities. 5152 */ 5153 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5154 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5155 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5157 ctxt->sax->error(ctxt->userData, 5158 "Attribute references external entity '%s'\n", name); 5159 ctxt->wellFormed = 0; 5160 ctxt->disableSAX = 1; 5161 } 5162 /* 5163 * [ WFC: No < in Attribute Values ] 5164 * The replacement text of any entity referred to directly or 5165 * indirectly in an attribute value (other than "<") must 5166 * not contain a <. 5167 */ 5168 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5169 (ent != NULL) && 5170 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5171 (ent->content != NULL) && 5172 (xmlStrchr(ent->content, '<'))) { 5173 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5175 ctxt->sax->error(ctxt->userData, 5176 "'<' in entity '%s' is not allowed in attributes values\n", name); 5177 ctxt->wellFormed = 0; 5178 ctxt->disableSAX = 1; 5179 } 5180 5181 /* 5182 * Internal check, no parameter entities here ... 5183 */ 5184 else { 5185 switch (ent->etype) { 5186 case XML_INTERNAL_PARAMETER_ENTITY: 5187 case XML_EXTERNAL_PARAMETER_ENTITY: 5188 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5190 ctxt->sax->error(ctxt->userData, 5191 "Attempt to reference the parameter entity '%s'\n", name); 5192 ctxt->wellFormed = 0; 5193 ctxt->disableSAX = 1; 5194 break; 5195 default: 5196 break; 5197 } 5198 } 5199 5200 /* 5201 * [ WFC: No Recursion ] 5202 * A parsed entity must not contain a recursive reference 5203 * to itself, either directly or indirectly. 5204 * Done somewhere else 5205 */ 5206 5207 } else { 5208 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5210 ctxt->sax->error(ctxt->userData, 5211 "xmlParseEntityRef: expecting ';'\n"); 5212 ctxt->wellFormed = 0; 5213 ctxt->disableSAX = 1; 5214 } 5215 xmlFree(name); 5216 } 5217 } 5218 return(ent); 5219} 5220 5221/** 5222 * xmlParseStringEntityRef: 5223 * @ctxt: an XML parser context 5224 * @str: a pointer to an index in the string 5225 * 5226 * parse ENTITY references declarations, but this version parses it from 5227 * a string value. 5228 * 5229 * [68] EntityRef ::= '&' Name ';' 5230 * 5231 * [ WFC: Entity Declared ] 5232 * In a document without any DTD, a document with only an internal DTD 5233 * subset which contains no parameter entity references, or a document 5234 * with "standalone='yes'", the Name given in the entity reference 5235 * must match that in an entity declaration, except that well-formed 5236 * documents need not declare any of the following entities: amp, lt, 5237 * gt, apos, quot. The declaration of a parameter entity must precede 5238 * any reference to it. Similarly, the declaration of a general entity 5239 * must precede any reference to it which appears in a default value in an 5240 * attribute-list declaration. Note that if entities are declared in the 5241 * external subset or in external parameter entities, a non-validating 5242 * processor is not obligated to read and process their declarations; 5243 * for such documents, the rule that an entity must be declared is a 5244 * well-formedness constraint only if standalone='yes'. 5245 * 5246 * [ WFC: Parsed Entity ] 5247 * An entity reference must not contain the name of an unparsed entity 5248 * 5249 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5250 * is updated to the current location in the string. 5251 */ 5252xmlEntityPtr 5253xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5254 xmlChar *name; 5255 const xmlChar *ptr; 5256 xmlChar cur; 5257 xmlEntityPtr ent = NULL; 5258 5259 if ((str == NULL) || (*str == NULL)) 5260 return(NULL); 5261 ptr = *str; 5262 cur = *ptr; 5263 if (cur == '&') { 5264 ptr++; 5265 cur = *ptr; 5266 name = xmlParseStringName(ctxt, &ptr); 5267 if (name == NULL) { 5268 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5270 ctxt->sax->error(ctxt->userData, 5271 "xmlParseEntityRef: no name\n"); 5272 ctxt->wellFormed = 0; 5273 ctxt->disableSAX = 1; 5274 } else { 5275 if (*ptr == ';') { 5276 ptr++; 5277 /* 5278 * Ask first SAX for entity resolution, otherwise try the 5279 * predefined set. 5280 */ 5281 if (ctxt->sax != NULL) { 5282 if (ctxt->sax->getEntity != NULL) 5283 ent = ctxt->sax->getEntity(ctxt->userData, name); 5284 if (ent == NULL) 5285 ent = xmlGetPredefinedEntity(name); 5286 } 5287 /* 5288 * [ WFC: Entity Declared ] 5289 * In a document without any DTD, a document with only an 5290 * internal DTD subset which contains no parameter entity 5291 * references, or a document with "standalone='yes'", the 5292 * Name given in the entity reference must match that in an 5293 * entity declaration, except that well-formed documents 5294 * need not declare any of the following entities: amp, lt, 5295 * gt, apos, quot. 5296 * The declaration of a parameter entity must precede any 5297 * reference to it. 5298 * Similarly, the declaration of a general entity must 5299 * precede any reference to it which appears in a default 5300 * value in an attribute-list declaration. Note that if 5301 * entities are declared in the external subset or in 5302 * external parameter entities, a non-validating processor 5303 * is not obligated to read and process their declarations; 5304 * for such documents, the rule that an entity must be 5305 * declared is a well-formedness constraint only if 5306 * standalone='yes'. 5307 */ 5308 if (ent == NULL) { 5309 if ((ctxt->standalone == 1) || 5310 ((ctxt->hasExternalSubset == 0) && 5311 (ctxt->hasPErefs == 0))) { 5312 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5314 ctxt->sax->error(ctxt->userData, 5315 "Entity '%s' not defined\n", name); 5316 ctxt->wellFormed = 0; 5317 ctxt->disableSAX = 1; 5318 } else { 5319 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5321 ctxt->sax->warning(ctxt->userData, 5322 "Entity '%s' not defined\n", name); 5323 } 5324 } 5325 5326 /* 5327 * [ WFC: Parsed Entity ] 5328 * An entity reference must not contain the name of an 5329 * unparsed entity 5330 */ 5331 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5332 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5334 ctxt->sax->error(ctxt->userData, 5335 "Entity reference to unparsed entity %s\n", name); 5336 ctxt->wellFormed = 0; 5337 ctxt->disableSAX = 1; 5338 } 5339 5340 /* 5341 * [ WFC: No External Entity References ] 5342 * Attribute values cannot contain direct or indirect 5343 * entity references to external entities. 5344 */ 5345 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5346 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5347 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5349 ctxt->sax->error(ctxt->userData, 5350 "Attribute references external entity '%s'\n", name); 5351 ctxt->wellFormed = 0; 5352 ctxt->disableSAX = 1; 5353 } 5354 /* 5355 * [ WFC: No < in Attribute Values ] 5356 * The replacement text of any entity referred to directly or 5357 * indirectly in an attribute value (other than "<") must 5358 * not contain a <. 5359 */ 5360 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5361 (ent != NULL) && 5362 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5363 (ent->content != NULL) && 5364 (xmlStrchr(ent->content, '<'))) { 5365 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5367 ctxt->sax->error(ctxt->userData, 5368 "'<' in entity '%s' is not allowed in attributes values\n", name); 5369 ctxt->wellFormed = 0; 5370 ctxt->disableSAX = 1; 5371 } 5372 5373 /* 5374 * Internal check, no parameter entities here ... 5375 */ 5376 else { 5377 switch (ent->etype) { 5378 case XML_INTERNAL_PARAMETER_ENTITY: 5379 case XML_EXTERNAL_PARAMETER_ENTITY: 5380 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5382 ctxt->sax->error(ctxt->userData, 5383 "Attempt to reference the parameter entity '%s'\n", name); 5384 ctxt->wellFormed = 0; 5385 ctxt->disableSAX = 1; 5386 break; 5387 default: 5388 break; 5389 } 5390 } 5391 5392 /* 5393 * [ WFC: No Recursion ] 5394 * A parsed entity must not contain a recursive reference 5395 * to itself, either directly or indirectly. 5396 * Done somewhwere else 5397 */ 5398 5399 } else { 5400 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5402 ctxt->sax->error(ctxt->userData, 5403 "xmlParseEntityRef: expecting ';'\n"); 5404 ctxt->wellFormed = 0; 5405 ctxt->disableSAX = 1; 5406 } 5407 xmlFree(name); 5408 } 5409 } 5410 *str = ptr; 5411 return(ent); 5412} 5413 5414/** 5415 * xmlParsePEReference: 5416 * @ctxt: an XML parser context 5417 * 5418 * parse PEReference declarations 5419 * The entity content is handled directly by pushing it's content as 5420 * a new input stream. 5421 * 5422 * [69] PEReference ::= '%' Name ';' 5423 * 5424 * [ WFC: No Recursion ] 5425 * A parsed entity must not contain a recursive 5426 * reference to itself, either directly or indirectly. 5427 * 5428 * [ WFC: Entity Declared ] 5429 * In a document without any DTD, a document with only an internal DTD 5430 * subset which contains no parameter entity references, or a document 5431 * with "standalone='yes'", ... ... The declaration of a parameter 5432 * entity must precede any reference to it... 5433 * 5434 * [ VC: Entity Declared ] 5435 * In a document with an external subset or external parameter entities 5436 * with "standalone='no'", ... ... The declaration of a parameter entity 5437 * must precede any reference to it... 5438 * 5439 * [ WFC: In DTD ] 5440 * Parameter-entity references may only appear in the DTD. 5441 * NOTE: misleading but this is handled. 5442 */ 5443void 5444xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5445 xmlChar *name; 5446 xmlEntityPtr entity = NULL; 5447 xmlParserInputPtr input; 5448 5449 if (RAW == '%') { 5450 NEXT; 5451 name = xmlParseNameComplex(ctxt); 5452 if (name == NULL) { 5453 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5455 ctxt->sax->error(ctxt->userData, 5456 "xmlParsePEReference: no name\n"); 5457 ctxt->wellFormed = 0; 5458 ctxt->disableSAX = 1; 5459 } else { 5460 if (RAW == ';') { 5461 NEXT; 5462 if ((ctxt->sax != NULL) && 5463 (ctxt->sax->getParameterEntity != NULL)) 5464 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5465 name); 5466 if (entity == NULL) { 5467 /* 5468 * [ WFC: Entity Declared ] 5469 * In a document without any DTD, a document with only an 5470 * internal DTD subset which contains no parameter entity 5471 * references, or a document with "standalone='yes'", ... 5472 * ... The declaration of a parameter entity must precede 5473 * any reference to it... 5474 */ 5475 if ((ctxt->standalone == 1) || 5476 ((ctxt->hasExternalSubset == 0) && 5477 (ctxt->hasPErefs == 0))) { 5478 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5479 if ((!ctxt->disableSAX) && 5480 (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5481 ctxt->sax->error(ctxt->userData, 5482 "PEReference: %%%s; not found\n", name); 5483 ctxt->wellFormed = 0; 5484 ctxt->disableSAX = 1; 5485 } else { 5486 /* 5487 * [ VC: Entity Declared ] 5488 * In a document with an external subset or external 5489 * parameter entities with "standalone='no'", ... 5490 * ... The declaration of a parameter entity must precede 5491 * any reference to it... 5492 */ 5493 if ((!ctxt->disableSAX) && 5494 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5495 ctxt->sax->warning(ctxt->userData, 5496 "PEReference: %%%s; not found\n", name); 5497 ctxt->valid = 0; 5498 } 5499 } else { 5500 /* 5501 * Internal checking in case the entity quest barfed 5502 */ 5503 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5504 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5505 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5506 ctxt->sax->warning(ctxt->userData, 5507 "Internal: %%%s; is not a parameter entity\n", name); 5508 } else { 5509 /* 5510 * TODO !!! 5511 * handle the extra spaces added before and after 5512 * c.f. http://www.w3.org/TR/REC-xml#as-PE 5513 */ 5514 input = xmlNewEntityInputStream(ctxt, entity); 5515 xmlPushInput(ctxt, input); 5516 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 5517 (RAW == '<') && (NXT(1) == '?') && 5518 (NXT(2) == 'x') && (NXT(3) == 'm') && 5519 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 5520 xmlParseTextDecl(ctxt); 5521 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5522 /* 5523 * The XML REC instructs us to stop parsing 5524 * right here 5525 */ 5526 ctxt->instate = XML_PARSER_EOF; 5527 xmlFree(name); 5528 return; 5529 } 5530 } 5531 if (ctxt->token == 0) 5532 ctxt->token = ' '; 5533 } 5534 } 5535 ctxt->hasPErefs = 1; 5536 } else { 5537 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5539 ctxt->sax->error(ctxt->userData, 5540 "xmlParsePEReference: expecting ';'\n"); 5541 ctxt->wellFormed = 0; 5542 ctxt->disableSAX = 1; 5543 } 5544 xmlFree(name); 5545 } 5546 } 5547} 5548 5549/** 5550 * xmlParseStringPEReference: 5551 * @ctxt: an XML parser context 5552 * @str: a pointer to an index in the string 5553 * 5554 * parse PEReference declarations 5555 * 5556 * [69] PEReference ::= '%' Name ';' 5557 * 5558 * [ WFC: No Recursion ] 5559 * A parsed entity must not contain a recursive 5560 * reference to itself, either directly or indirectly. 5561 * 5562 * [ WFC: Entity Declared ] 5563 * In a document without any DTD, a document with only an internal DTD 5564 * subset which contains no parameter entity references, or a document 5565 * with "standalone='yes'", ... ... The declaration of a parameter 5566 * entity must precede any reference to it... 5567 * 5568 * [ VC: Entity Declared ] 5569 * In a document with an external subset or external parameter entities 5570 * with "standalone='no'", ... ... The declaration of a parameter entity 5571 * must precede any reference to it... 5572 * 5573 * [ WFC: In DTD ] 5574 * Parameter-entity references may only appear in the DTD. 5575 * NOTE: misleading but this is handled. 5576 * 5577 * Returns the string of the entity content. 5578 * str is updated to the current value of the index 5579 */ 5580xmlEntityPtr 5581xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5582 const xmlChar *ptr; 5583 xmlChar cur; 5584 xmlChar *name; 5585 xmlEntityPtr entity = NULL; 5586 5587 if ((str == NULL) || (*str == NULL)) return(NULL); 5588 ptr = *str; 5589 cur = *ptr; 5590 if (cur == '%') { 5591 ptr++; 5592 cur = *ptr; 5593 name = xmlParseStringName(ctxt, &ptr); 5594 if (name == NULL) { 5595 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5597 ctxt->sax->error(ctxt->userData, 5598 "xmlParseStringPEReference: no name\n"); 5599 ctxt->wellFormed = 0; 5600 ctxt->disableSAX = 1; 5601 } else { 5602 cur = *ptr; 5603 if (cur == ';') { 5604 ptr++; 5605 cur = *ptr; 5606 if ((ctxt->sax != NULL) && 5607 (ctxt->sax->getParameterEntity != NULL)) 5608 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5609 name); 5610 if (entity == NULL) { 5611 /* 5612 * [ WFC: Entity Declared ] 5613 * In a document without any DTD, a document with only an 5614 * internal DTD subset which contains no parameter entity 5615 * references, or a document with "standalone='yes'", ... 5616 * ... The declaration of a parameter entity must precede 5617 * any reference to it... 5618 */ 5619 if ((ctxt->standalone == 1) || 5620 ((ctxt->hasExternalSubset == 0) && 5621 (ctxt->hasPErefs == 0))) { 5622 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5624 ctxt->sax->error(ctxt->userData, 5625 "PEReference: %%%s; not found\n", name); 5626 ctxt->wellFormed = 0; 5627 ctxt->disableSAX = 1; 5628 } else { 5629 /* 5630 * [ VC: Entity Declared ] 5631 * In a document with an external subset or external 5632 * parameter entities with "standalone='no'", ... 5633 * ... The declaration of a parameter entity must 5634 * precede any reference to it... 5635 */ 5636 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5637 ctxt->sax->warning(ctxt->userData, 5638 "PEReference: %%%s; not found\n", name); 5639 ctxt->valid = 0; 5640 } 5641 } else { 5642 /* 5643 * Internal checking in case the entity quest barfed 5644 */ 5645 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 5646 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 5647 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5648 ctxt->sax->warning(ctxt->userData, 5649 "Internal: %%%s; is not a parameter entity\n", name); 5650 } 5651 } 5652 ctxt->hasPErefs = 1; 5653 } else { 5654 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5656 ctxt->sax->error(ctxt->userData, 5657 "xmlParseStringPEReference: expecting ';'\n"); 5658 ctxt->wellFormed = 0; 5659 ctxt->disableSAX = 1; 5660 } 5661 xmlFree(name); 5662 } 5663 } 5664 *str = ptr; 5665 return(entity); 5666} 5667 5668/** 5669 * xmlParseDocTypeDecl: 5670 * @ctxt: an XML parser context 5671 * 5672 * parse a DOCTYPE declaration 5673 * 5674 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5675 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5676 * 5677 * [ VC: Root Element Type ] 5678 * The Name in the document type declaration must match the element 5679 * type of the root element. 5680 */ 5681 5682void 5683xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5684 xmlChar *name = NULL; 5685 xmlChar *ExternalID = NULL; 5686 xmlChar *URI = NULL; 5687 5688 /* 5689 * We know that '<!DOCTYPE' has been detected. 5690 */ 5691 SKIP(9); 5692 5693 SKIP_BLANKS; 5694 5695 /* 5696 * Parse the DOCTYPE name. 5697 */ 5698 name = xmlParseName(ctxt); 5699 if (name == NULL) { 5700 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5702 ctxt->sax->error(ctxt->userData, 5703 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5704 ctxt->wellFormed = 0; 5705 ctxt->disableSAX = 1; 5706 } 5707 ctxt->intSubName = name; 5708 5709 SKIP_BLANKS; 5710 5711 /* 5712 * Check for SystemID and ExternalID 5713 */ 5714 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 5715 5716 if ((URI != NULL) || (ExternalID != NULL)) { 5717 ctxt->hasExternalSubset = 1; 5718 } 5719 ctxt->extSubURI = URI; 5720 ctxt->extSubSystem = ExternalID; 5721 5722 SKIP_BLANKS; 5723 5724 /* 5725 * Create and update the internal subset. 5726 */ 5727 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 5728 (!ctxt->disableSAX)) 5729 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 5730 5731 /* 5732 * Is there any internal subset declarations ? 5733 * they are handled separately in xmlParseInternalSubset() 5734 */ 5735 if (RAW == '[') 5736 return; 5737 5738 /* 5739 * We should be at the end of the DOCTYPE declaration. 5740 */ 5741 if (RAW != '>') { 5742 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5744 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5745 ctxt->wellFormed = 0; 5746 ctxt->disableSAX = 1; 5747 } 5748 NEXT; 5749} 5750 5751/** 5752 * xmlParseInternalsubset: 5753 * @ctxt: an XML parser context 5754 * 5755 * parse the internal subset declaration 5756 * 5757 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5758 */ 5759 5760void 5761xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 5762 /* 5763 * Is there any DTD definition ? 5764 */ 5765 if (RAW == '[') { 5766 ctxt->instate = XML_PARSER_DTD; 5767 NEXT; 5768 /* 5769 * Parse the succession of Markup declarations and 5770 * PEReferences. 5771 * Subsequence (markupdecl | PEReference | S)* 5772 */ 5773 while (RAW != ']') { 5774 const xmlChar *check = CUR_PTR; 5775 int cons = ctxt->input->consumed; 5776 5777 SKIP_BLANKS; 5778 xmlParseMarkupDecl(ctxt); 5779 xmlParsePEReference(ctxt); 5780 5781 /* 5782 * Pop-up of finished entities. 5783 */ 5784 while ((RAW == 0) && (ctxt->inputNr > 1)) 5785 xmlPopInput(ctxt); 5786 5787 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5788 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5790 ctxt->sax->error(ctxt->userData, 5791 "xmlParseInternalSubset: error detected in Markup declaration\n"); 5792 ctxt->wellFormed = 0; 5793 ctxt->disableSAX = 1; 5794 break; 5795 } 5796 } 5797 if (RAW == ']') { 5798 NEXT; 5799 SKIP_BLANKS; 5800 } 5801 } 5802 5803 /* 5804 * We should be at the end of the DOCTYPE declaration. 5805 */ 5806 if (RAW != '>') { 5807 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5809 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5810 ctxt->wellFormed = 0; 5811 ctxt->disableSAX = 1; 5812 } 5813 NEXT; 5814} 5815 5816/** 5817 * xmlParseAttribute: 5818 * @ctxt: an XML parser context 5819 * @value: a xmlChar ** used to store the value of the attribute 5820 * 5821 * parse an attribute 5822 * 5823 * [41] Attribute ::= Name Eq AttValue 5824 * 5825 * [ WFC: No External Entity References ] 5826 * Attribute values cannot contain direct or indirect entity references 5827 * to external entities. 5828 * 5829 * [ WFC: No < in Attribute Values ] 5830 * The replacement text of any entity referred to directly or indirectly in 5831 * an attribute value (other than "<") must not contain a <. 5832 * 5833 * [ VC: Attribute Value Type ] 5834 * The attribute must have been declared; the value must be of the type 5835 * declared for it. 5836 * 5837 * [25] Eq ::= S? '=' S? 5838 * 5839 * With namespace: 5840 * 5841 * [NS 11] Attribute ::= QName Eq AttValue 5842 * 5843 * Also the case QName == xmlns:??? is handled independently as a namespace 5844 * definition. 5845 * 5846 * Returns the attribute name, and the value in *value. 5847 */ 5848 5849xmlChar * 5850xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 5851 xmlChar *name, *val; 5852 5853 *value = NULL; 5854 name = xmlParseName(ctxt); 5855 if (name == NULL) { 5856 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5858 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 5859 ctxt->wellFormed = 0; 5860 ctxt->disableSAX = 1; 5861 return(NULL); 5862 } 5863 5864 /* 5865 * read the value 5866 */ 5867 SKIP_BLANKS; 5868 if (RAW == '=') { 5869 NEXT; 5870 SKIP_BLANKS; 5871 val = xmlParseAttValue(ctxt); 5872 ctxt->instate = XML_PARSER_CONTENT; 5873 } else { 5874 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5876 ctxt->sax->error(ctxt->userData, 5877 "Specification mandate value for attribute %s\n", name); 5878 ctxt->wellFormed = 0; 5879 ctxt->disableSAX = 1; 5880 xmlFree(name); 5881 return(NULL); 5882 } 5883 5884 /* 5885 * Check that xml:lang conforms to the specification 5886 * No more registered as an error, just generate a warning now 5887 * since this was deprecated in XML second edition 5888 */ 5889 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 5890 if (!xmlCheckLanguageID(val)) { 5891 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5892 ctxt->sax->warning(ctxt->userData, 5893 "Malformed value for xml:lang : %s\n", val); 5894 } 5895 } 5896 5897 /* 5898 * Check that xml:space conforms to the specification 5899 */ 5900 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 5901 if (xmlStrEqual(val, BAD_CAST "default")) 5902 *(ctxt->space) = 0; 5903 else if (xmlStrEqual(val, BAD_CAST "preserve")) 5904 *(ctxt->space) = 1; 5905 else { 5906 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5908 ctxt->sax->error(ctxt->userData, 5909"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", 5910 val); 5911 ctxt->wellFormed = 0; 5912 ctxt->disableSAX = 1; 5913 } 5914 } 5915 5916 *value = val; 5917 return(name); 5918} 5919 5920/** 5921 * xmlParseStartTag: 5922 * @ctxt: an XML parser context 5923 * 5924 * parse a start of tag either for rule element or 5925 * EmptyElement. In both case we don't parse the tag closing chars. 5926 * 5927 * [40] STag ::= '<' Name (S Attribute)* S? '>' 5928 * 5929 * [ WFC: Unique Att Spec ] 5930 * No attribute name may appear more than once in the same start-tag or 5931 * empty-element tag. 5932 * 5933 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 5934 * 5935 * [ WFC: Unique Att Spec ] 5936 * No attribute name may appear more than once in the same start-tag or 5937 * empty-element tag. 5938 * 5939 * With namespace: 5940 * 5941 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 5942 * 5943 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 5944 * 5945 * Returns the element name parsed 5946 */ 5947 5948xmlChar * 5949xmlParseStartTag(xmlParserCtxtPtr ctxt) { 5950 xmlChar *name; 5951 xmlChar *attname; 5952 xmlChar *attvalue; 5953 const xmlChar **atts = NULL; 5954 int nbatts = 0; 5955 int maxatts = 0; 5956 int i; 5957 5958 if (RAW != '<') return(NULL); 5959 NEXT1; 5960 5961 name = xmlParseName(ctxt); 5962 if (name == NULL) { 5963 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5965 ctxt->sax->error(ctxt->userData, 5966 "xmlParseStartTag: invalid element name\n"); 5967 ctxt->wellFormed = 0; 5968 ctxt->disableSAX = 1; 5969 return(NULL); 5970 } 5971 5972 /* 5973 * Now parse the attributes, it ends up with the ending 5974 * 5975 * (S Attribute)* S? 5976 */ 5977 SKIP_BLANKS; 5978 GROW; 5979 5980 while ((RAW != '>') && 5981 ((RAW != '/') || (NXT(1) != '>')) && 5982 (IS_CHAR(RAW))) { 5983 const xmlChar *q = CUR_PTR; 5984 int cons = ctxt->input->consumed; 5985 5986 attname = xmlParseAttribute(ctxt, &attvalue); 5987 if ((attname != NULL) && (attvalue != NULL)) { 5988 /* 5989 * [ WFC: Unique Att Spec ] 5990 * No attribute name may appear more than once in the same 5991 * start-tag or empty-element tag. 5992 */ 5993 for (i = 0; i < nbatts;i += 2) { 5994 if (xmlStrEqual(atts[i], attname)) { 5995 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 5996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5997 ctxt->sax->error(ctxt->userData, 5998 "Attribute %s redefined\n", 5999 attname); 6000 ctxt->wellFormed = 0; 6001 ctxt->disableSAX = 1; 6002 xmlFree(attname); 6003 xmlFree(attvalue); 6004 goto failed; 6005 } 6006 } 6007 6008 /* 6009 * Add the pair to atts 6010 */ 6011 if (atts == NULL) { 6012 maxatts = 10; 6013 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 6014 if (atts == NULL) { 6015 xmlGenericError(xmlGenericErrorContext, 6016 "malloc of %ld byte failed\n", 6017 maxatts * (long)sizeof(xmlChar *)); 6018 return(NULL); 6019 } 6020 } else if (nbatts + 4 > maxatts) { 6021 maxatts *= 2; 6022 atts = (const xmlChar **) xmlRealloc((void *) atts, 6023 maxatts * sizeof(xmlChar *)); 6024 if (atts == NULL) { 6025 xmlGenericError(xmlGenericErrorContext, 6026 "realloc of %ld byte failed\n", 6027 maxatts * (long)sizeof(xmlChar *)); 6028 return(NULL); 6029 } 6030 } 6031 atts[nbatts++] = attname; 6032 atts[nbatts++] = attvalue; 6033 atts[nbatts] = NULL; 6034 atts[nbatts + 1] = NULL; 6035 } else { 6036 if (attname != NULL) 6037 xmlFree(attname); 6038 if (attvalue != NULL) 6039 xmlFree(attvalue); 6040 } 6041 6042failed: 6043 6044 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6045 break; 6046 if (!IS_BLANK(RAW)) { 6047 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6049 ctxt->sax->error(ctxt->userData, 6050 "attributes construct error\n"); 6051 ctxt->wellFormed = 0; 6052 ctxt->disableSAX = 1; 6053 } 6054 SKIP_BLANKS; 6055 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 6056 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6058 ctxt->sax->error(ctxt->userData, 6059 "xmlParseStartTag: problem parsing attributes\n"); 6060 ctxt->wellFormed = 0; 6061 ctxt->disableSAX = 1; 6062 break; 6063 } 6064 GROW; 6065 } 6066 6067 /* 6068 * SAX: Start of Element ! 6069 */ 6070 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6071 (!ctxt->disableSAX)) 6072 ctxt->sax->startElement(ctxt->userData, name, atts); 6073 6074 if (atts != NULL) { 6075 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 6076 xmlFree((void *) atts); 6077 } 6078 return(name); 6079} 6080 6081/** 6082 * xmlParseEndTag: 6083 * @ctxt: an XML parser context 6084 * 6085 * parse an end of tag 6086 * 6087 * [42] ETag ::= '</' Name S? '>' 6088 * 6089 * With namespace 6090 * 6091 * [NS 9] ETag ::= '</' QName S? '>' 6092 */ 6093 6094void 6095xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6096 xmlChar *name; 6097 xmlChar *oldname; 6098 6099 GROW; 6100 if ((RAW != '<') || (NXT(1) != '/')) { 6101 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 6102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6103 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 6104 ctxt->wellFormed = 0; 6105 ctxt->disableSAX = 1; 6106 return; 6107 } 6108 SKIP(2); 6109 6110 name = xmlParseName(ctxt); 6111 6112 /* 6113 * We should definitely be at the ending "S? '>'" part 6114 */ 6115 GROW; 6116 SKIP_BLANKS; 6117 if ((!IS_CHAR(RAW)) || (RAW != '>')) { 6118 ctxt->errNo = XML_ERR_GT_REQUIRED; 6119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6120 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 6121 ctxt->wellFormed = 0; 6122 ctxt->disableSAX = 1; 6123 } else 6124 NEXT1; 6125 6126 /* 6127 * [ WFC: Element Type Match ] 6128 * The Name in an element's end-tag must match the element type in the 6129 * start-tag. 6130 * 6131 */ 6132 if ((name == NULL) || (ctxt->name == NULL) || 6133 (!xmlStrEqual(name, ctxt->name))) { 6134 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6136 if ((name != NULL) && (ctxt->name != NULL)) { 6137 ctxt->sax->error(ctxt->userData, 6138 "Opening and ending tag mismatch: %s and %s\n", 6139 ctxt->name, name); 6140 } else if (ctxt->name != NULL) { 6141 ctxt->sax->error(ctxt->userData, 6142 "Ending tag eror for: %s\n", ctxt->name); 6143 } else { 6144 ctxt->sax->error(ctxt->userData, 6145 "Ending tag error: internal error ???\n"); 6146 } 6147 6148 } 6149 ctxt->wellFormed = 0; 6150 ctxt->disableSAX = 1; 6151 } 6152 6153 /* 6154 * SAX: End of Tag 6155 */ 6156 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6157 (!ctxt->disableSAX)) 6158 ctxt->sax->endElement(ctxt->userData, name); 6159 6160 if (name != NULL) 6161 xmlFree(name); 6162 oldname = namePop(ctxt); 6163 spacePop(ctxt); 6164 if (oldname != NULL) { 6165#ifdef DEBUG_STACK 6166 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6167#endif 6168 xmlFree(oldname); 6169 } 6170 return; 6171} 6172 6173/** 6174 * xmlParseCDSect: 6175 * @ctxt: an XML parser context 6176 * 6177 * Parse escaped pure raw content. 6178 * 6179 * [18] CDSect ::= CDStart CData CDEnd 6180 * 6181 * [19] CDStart ::= '<![CDATA[' 6182 * 6183 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6184 * 6185 * [21] CDEnd ::= ']]>' 6186 */ 6187void 6188xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6189 xmlChar *buf = NULL; 6190 int len = 0; 6191 int size = XML_PARSER_BUFFER_SIZE; 6192 int r, rl; 6193 int s, sl; 6194 int cur, l; 6195 int count = 0; 6196 6197 if ((NXT(0) == '<') && (NXT(1) == '!') && 6198 (NXT(2) == '[') && (NXT(3) == 'C') && 6199 (NXT(4) == 'D') && (NXT(5) == 'A') && 6200 (NXT(6) == 'T') && (NXT(7) == 'A') && 6201 (NXT(8) == '[')) { 6202 SKIP(9); 6203 } else 6204 return; 6205 6206 ctxt->instate = XML_PARSER_CDATA_SECTION; 6207 r = CUR_CHAR(rl); 6208 if (!IS_CHAR(r)) { 6209 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6211 ctxt->sax->error(ctxt->userData, 6212 "CData section not finished\n"); 6213 ctxt->wellFormed = 0; 6214 ctxt->disableSAX = 1; 6215 ctxt->instate = XML_PARSER_CONTENT; 6216 return; 6217 } 6218 NEXTL(rl); 6219 s = CUR_CHAR(sl); 6220 if (!IS_CHAR(s)) { 6221 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6223 ctxt->sax->error(ctxt->userData, 6224 "CData section not finished\n"); 6225 ctxt->wellFormed = 0; 6226 ctxt->disableSAX = 1; 6227 ctxt->instate = XML_PARSER_CONTENT; 6228 return; 6229 } 6230 NEXTL(sl); 6231 cur = CUR_CHAR(l); 6232 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6233 if (buf == NULL) { 6234 xmlGenericError(xmlGenericErrorContext, 6235 "malloc of %d byte failed\n", size); 6236 return; 6237 } 6238 while (IS_CHAR(cur) && 6239 ((r != ']') || (s != ']') || (cur != '>'))) { 6240 if (len + 5 >= size) { 6241 size *= 2; 6242 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6243 if (buf == NULL) { 6244 xmlGenericError(xmlGenericErrorContext, 6245 "realloc of %d byte failed\n", size); 6246 return; 6247 } 6248 } 6249 COPY_BUF(rl,buf,len,r); 6250 r = s; 6251 rl = sl; 6252 s = cur; 6253 sl = l; 6254 count++; 6255 if (count > 50) { 6256 GROW; 6257 count = 0; 6258 } 6259 NEXTL(l); 6260 cur = CUR_CHAR(l); 6261 } 6262 buf[len] = 0; 6263 ctxt->instate = XML_PARSER_CONTENT; 6264 if (cur != '>') { 6265 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6267 ctxt->sax->error(ctxt->userData, 6268 "CData section not finished\n%.50s\n", buf); 6269 ctxt->wellFormed = 0; 6270 ctxt->disableSAX = 1; 6271 xmlFree(buf); 6272 return; 6273 } 6274 NEXTL(l); 6275 6276 /* 6277 * Ok the buffer is to be consumed as cdata. 6278 */ 6279 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 6280 if (ctxt->sax->cdataBlock != NULL) 6281 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6282 } 6283 xmlFree(buf); 6284} 6285 6286/** 6287 * xmlParseContent: 6288 * @ctxt: an XML parser context 6289 * 6290 * Parse a content: 6291 * 6292 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6293 */ 6294 6295void 6296xmlParseContent(xmlParserCtxtPtr ctxt) { 6297 GROW; 6298 while (((RAW != 0) || (ctxt->token != 0)) && 6299 ((RAW != '<') || (NXT(1) != '/'))) { 6300 const xmlChar *test = CUR_PTR; 6301 int cons = ctxt->input->consumed; 6302 xmlChar tok = ctxt->token; 6303 const xmlChar *cur = ctxt->input->cur; 6304 6305 /* 6306 * Handle possible processed charrefs. 6307 */ 6308 if (ctxt->token != 0) { 6309 xmlParseCharData(ctxt, 0); 6310 } 6311 /* 6312 * First case : a Processing Instruction. 6313 */ 6314 else if ((*cur == '<') && (cur[1] == '?')) { 6315 xmlParsePI(ctxt); 6316 } 6317 6318 /* 6319 * Second case : a CDSection 6320 */ 6321 else if ((*cur == '<') && (NXT(1) == '!') && 6322 (NXT(2) == '[') && (NXT(3) == 'C') && 6323 (NXT(4) == 'D') && (NXT(5) == 'A') && 6324 (NXT(6) == 'T') && (NXT(7) == 'A') && 6325 (NXT(8) == '[')) { 6326 xmlParseCDSect(ctxt); 6327 } 6328 6329 /* 6330 * Third case : a comment 6331 */ 6332 else if ((*cur == '<') && (NXT(1) == '!') && 6333 (NXT(2) == '-') && (NXT(3) == '-')) { 6334 xmlParseComment(ctxt); 6335 ctxt->instate = XML_PARSER_CONTENT; 6336 } 6337 6338 /* 6339 * Fourth case : a sub-element. 6340 */ 6341 else if (*cur == '<') { 6342 xmlParseElement(ctxt); 6343 } 6344 6345 /* 6346 * Fifth case : a reference. If if has not been resolved, 6347 * parsing returns it's Name, create the node 6348 */ 6349 6350 else if (*cur == '&') { 6351 xmlParseReference(ctxt); 6352 } 6353 6354 /* 6355 * Last case, text. Note that References are handled directly. 6356 */ 6357 else { 6358 xmlParseCharData(ctxt, 0); 6359 } 6360 6361 GROW; 6362 /* 6363 * Pop-up of finished entities. 6364 */ 6365 while ((RAW == 0) && (ctxt->inputNr > 1)) 6366 xmlPopInput(ctxt); 6367 SHRINK; 6368 6369 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6370 (tok == ctxt->token)) { 6371 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6373 ctxt->sax->error(ctxt->userData, 6374 "detected an error in element content\n"); 6375 ctxt->wellFormed = 0; 6376 ctxt->disableSAX = 1; 6377 ctxt->instate = XML_PARSER_EOF; 6378 break; 6379 } 6380 } 6381} 6382 6383/** 6384 * xmlParseElement: 6385 * @ctxt: an XML parser context 6386 * 6387 * parse an XML element, this is highly recursive 6388 * 6389 * [39] element ::= EmptyElemTag | STag content ETag 6390 * 6391 * [ WFC: Element Type Match ] 6392 * The Name in an element's end-tag must match the element type in the 6393 * start-tag. 6394 * 6395 * [ VC: Element Valid ] 6396 * An element is valid if there is a declaration matching elementdecl 6397 * where the Name matches the element type and one of the following holds: 6398 * - The declaration matches EMPTY and the element has no content. 6399 * - The declaration matches children and the sequence of child elements 6400 * belongs to the language generated by the regular expression in the 6401 * content model, with optional white space (characters matching the 6402 * nonterminal S) between each pair of child elements. 6403 * - The declaration matches Mixed and the content consists of character 6404 * data and child elements whose types match names in the content model. 6405 * - The declaration matches ANY, and the types of any child elements have 6406 * been declared. 6407 */ 6408 6409void 6410xmlParseElement(xmlParserCtxtPtr ctxt) { 6411 const xmlChar *openTag = CUR_PTR; 6412 xmlChar *name; 6413 xmlChar *oldname; 6414 xmlParserNodeInfo node_info; 6415 xmlNodePtr ret; 6416 6417 /* Capture start position */ 6418 if (ctxt->record_info) { 6419 node_info.begin_pos = ctxt->input->consumed + 6420 (CUR_PTR - ctxt->input->base); 6421 node_info.begin_line = ctxt->input->line; 6422 } 6423 6424 if (ctxt->spaceNr == 0) 6425 spacePush(ctxt, -1); 6426 else 6427 spacePush(ctxt, *ctxt->space); 6428 6429 name = xmlParseStartTag(ctxt); 6430 if (name == NULL) { 6431 spacePop(ctxt); 6432 return; 6433 } 6434 namePush(ctxt, name); 6435 ret = ctxt->node; 6436 6437 /* 6438 * [ VC: Root Element Type ] 6439 * The Name in the document type declaration must match the element 6440 * type of the root element. 6441 */ 6442 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6443 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 6444 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6445 6446 /* 6447 * Check for an Empty Element. 6448 */ 6449 if ((RAW == '/') && (NXT(1) == '>')) { 6450 SKIP(2); 6451 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6452 (!ctxt->disableSAX)) 6453 ctxt->sax->endElement(ctxt->userData, name); 6454 oldname = namePop(ctxt); 6455 spacePop(ctxt); 6456 if (oldname != NULL) { 6457#ifdef DEBUG_STACK 6458 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6459#endif 6460 xmlFree(oldname); 6461 } 6462 if ( ret != NULL && ctxt->record_info ) { 6463 node_info.end_pos = ctxt->input->consumed + 6464 (CUR_PTR - ctxt->input->base); 6465 node_info.end_line = ctxt->input->line; 6466 node_info.node = ret; 6467 xmlParserAddNodeInfo(ctxt, &node_info); 6468 } 6469 return; 6470 } 6471 if (RAW == '>') { 6472 NEXT1; 6473 } else { 6474 ctxt->errNo = XML_ERR_GT_REQUIRED; 6475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6476 ctxt->sax->error(ctxt->userData, 6477 "Couldn't find end of Start Tag\n%.30s\n", 6478 openTag); 6479 ctxt->wellFormed = 0; 6480 ctxt->disableSAX = 1; 6481 6482 /* 6483 * end of parsing of this node. 6484 */ 6485 nodePop(ctxt); 6486 oldname = namePop(ctxt); 6487 spacePop(ctxt); 6488 if (oldname != NULL) { 6489#ifdef DEBUG_STACK 6490 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6491#endif 6492 xmlFree(oldname); 6493 } 6494 6495 /* 6496 * Capture end position and add node 6497 */ 6498 if ( ret != NULL && ctxt->record_info ) { 6499 node_info.end_pos = ctxt->input->consumed + 6500 (CUR_PTR - ctxt->input->base); 6501 node_info.end_line = ctxt->input->line; 6502 node_info.node = ret; 6503 xmlParserAddNodeInfo(ctxt, &node_info); 6504 } 6505 return; 6506 } 6507 6508 /* 6509 * Parse the content of the element: 6510 */ 6511 xmlParseContent(ctxt); 6512 if (!IS_CHAR(RAW)) { 6513 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6515 ctxt->sax->error(ctxt->userData, 6516 "Premature end of data in tag %.30s\n", openTag); 6517 ctxt->wellFormed = 0; 6518 ctxt->disableSAX = 1; 6519 6520 /* 6521 * end of parsing of this node. 6522 */ 6523 nodePop(ctxt); 6524 oldname = namePop(ctxt); 6525 spacePop(ctxt); 6526 if (oldname != NULL) { 6527#ifdef DEBUG_STACK 6528 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 6529#endif 6530 xmlFree(oldname); 6531 } 6532 return; 6533 } 6534 6535 /* 6536 * parse the end of tag: '</' should be here. 6537 */ 6538 xmlParseEndTag(ctxt); 6539 6540 /* 6541 * Capture end position and add node 6542 */ 6543 if ( ret != NULL && ctxt->record_info ) { 6544 node_info.end_pos = ctxt->input->consumed + 6545 (CUR_PTR - ctxt->input->base); 6546 node_info.end_line = ctxt->input->line; 6547 node_info.node = ret; 6548 xmlParserAddNodeInfo(ctxt, &node_info); 6549 } 6550} 6551 6552/** 6553 * xmlParseVersionNum: 6554 * @ctxt: an XML parser context 6555 * 6556 * parse the XML version value. 6557 * 6558 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6559 * 6560 * Returns the string giving the XML version number, or NULL 6561 */ 6562xmlChar * 6563xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6564 xmlChar *buf = NULL; 6565 int len = 0; 6566 int size = 10; 6567 xmlChar cur; 6568 6569 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6570 if (buf == NULL) { 6571 xmlGenericError(xmlGenericErrorContext, 6572 "malloc of %d byte failed\n", size); 6573 return(NULL); 6574 } 6575 cur = CUR; 6576 while (((cur >= 'a') && (cur <= 'z')) || 6577 ((cur >= 'A') && (cur <= 'Z')) || 6578 ((cur >= '0') && (cur <= '9')) || 6579 (cur == '_') || (cur == '.') || 6580 (cur == ':') || (cur == '-')) { 6581 if (len + 1 >= size) { 6582 size *= 2; 6583 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6584 if (buf == NULL) { 6585 xmlGenericError(xmlGenericErrorContext, 6586 "realloc of %d byte failed\n", size); 6587 return(NULL); 6588 } 6589 } 6590 buf[len++] = cur; 6591 NEXT; 6592 cur=CUR; 6593 } 6594 buf[len] = 0; 6595 return(buf); 6596} 6597 6598/** 6599 * xmlParseVersionInfo: 6600 * @ctxt: an XML parser context 6601 * 6602 * parse the XML version. 6603 * 6604 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6605 * 6606 * [25] Eq ::= S? '=' S? 6607 * 6608 * Returns the version string, e.g. "1.0" 6609 */ 6610 6611xmlChar * 6612xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6613 xmlChar *version = NULL; 6614 const xmlChar *q; 6615 6616 if ((RAW == 'v') && (NXT(1) == 'e') && 6617 (NXT(2) == 'r') && (NXT(3) == 's') && 6618 (NXT(4) == 'i') && (NXT(5) == 'o') && 6619 (NXT(6) == 'n')) { 6620 SKIP(7); 6621 SKIP_BLANKS; 6622 if (RAW != '=') { 6623 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6625 ctxt->sax->error(ctxt->userData, 6626 "xmlParseVersionInfo : expected '='\n"); 6627 ctxt->wellFormed = 0; 6628 ctxt->disableSAX = 1; 6629 return(NULL); 6630 } 6631 NEXT; 6632 SKIP_BLANKS; 6633 if (RAW == '"') { 6634 NEXT; 6635 q = CUR_PTR; 6636 version = xmlParseVersionNum(ctxt); 6637 if (RAW != '"') { 6638 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6640 ctxt->sax->error(ctxt->userData, 6641 "String not closed\n%.50s\n", q); 6642 ctxt->wellFormed = 0; 6643 ctxt->disableSAX = 1; 6644 } else 6645 NEXT; 6646 } else if (RAW == '\''){ 6647 NEXT; 6648 q = CUR_PTR; 6649 version = xmlParseVersionNum(ctxt); 6650 if (RAW != '\'') { 6651 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6653 ctxt->sax->error(ctxt->userData, 6654 "String not closed\n%.50s\n", q); 6655 ctxt->wellFormed = 0; 6656 ctxt->disableSAX = 1; 6657 } else 6658 NEXT; 6659 } else { 6660 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6662 ctxt->sax->error(ctxt->userData, 6663 "xmlParseVersionInfo : expected ' or \"\n"); 6664 ctxt->wellFormed = 0; 6665 ctxt->disableSAX = 1; 6666 } 6667 } 6668 return(version); 6669} 6670 6671/** 6672 * xmlParseEncName: 6673 * @ctxt: an XML parser context 6674 * 6675 * parse the XML encoding name 6676 * 6677 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6678 * 6679 * Returns the encoding name value or NULL 6680 */ 6681xmlChar * 6682xmlParseEncName(xmlParserCtxtPtr ctxt) { 6683 xmlChar *buf = NULL; 6684 int len = 0; 6685 int size = 10; 6686 xmlChar cur; 6687 6688 cur = CUR; 6689 if (((cur >= 'a') && (cur <= 'z')) || 6690 ((cur >= 'A') && (cur <= 'Z'))) { 6691 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6692 if (buf == NULL) { 6693 xmlGenericError(xmlGenericErrorContext, 6694 "malloc of %d byte failed\n", size); 6695 return(NULL); 6696 } 6697 6698 buf[len++] = cur; 6699 NEXT; 6700 cur = CUR; 6701 while (((cur >= 'a') && (cur <= 'z')) || 6702 ((cur >= 'A') && (cur <= 'Z')) || 6703 ((cur >= '0') && (cur <= '9')) || 6704 (cur == '.') || (cur == '_') || 6705 (cur == '-')) { 6706 if (len + 1 >= size) { 6707 size *= 2; 6708 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 6709 if (buf == NULL) { 6710 xmlGenericError(xmlGenericErrorContext, 6711 "realloc of %d byte failed\n", size); 6712 return(NULL); 6713 } 6714 } 6715 buf[len++] = cur; 6716 NEXT; 6717 cur = CUR; 6718 if (cur == 0) { 6719 SHRINK; 6720 GROW; 6721 cur = CUR; 6722 } 6723 } 6724 buf[len] = 0; 6725 } else { 6726 ctxt->errNo = XML_ERR_ENCODING_NAME; 6727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6728 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 6729 ctxt->wellFormed = 0; 6730 ctxt->disableSAX = 1; 6731 } 6732 return(buf); 6733} 6734 6735/** 6736 * xmlParseEncodingDecl: 6737 * @ctxt: an XML parser context 6738 * 6739 * parse the XML encoding declaration 6740 * 6741 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 6742 * 6743 * this setups the conversion filters. 6744 * 6745 * Returns the encoding value or NULL 6746 */ 6747 6748xmlChar * 6749xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 6750 xmlChar *encoding = NULL; 6751 const xmlChar *q; 6752 6753 SKIP_BLANKS; 6754 if ((RAW == 'e') && (NXT(1) == 'n') && 6755 (NXT(2) == 'c') && (NXT(3) == 'o') && 6756 (NXT(4) == 'd') && (NXT(5) == 'i') && 6757 (NXT(6) == 'n') && (NXT(7) == 'g')) { 6758 SKIP(8); 6759 SKIP_BLANKS; 6760 if (RAW != '=') { 6761 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6763 ctxt->sax->error(ctxt->userData, 6764 "xmlParseEncodingDecl : expected '='\n"); 6765 ctxt->wellFormed = 0; 6766 ctxt->disableSAX = 1; 6767 return(NULL); 6768 } 6769 NEXT; 6770 SKIP_BLANKS; 6771 if (RAW == '"') { 6772 NEXT; 6773 q = CUR_PTR; 6774 encoding = xmlParseEncName(ctxt); 6775 if (RAW != '"') { 6776 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6778 ctxt->sax->error(ctxt->userData, 6779 "String not closed\n%.50s\n", q); 6780 ctxt->wellFormed = 0; 6781 ctxt->disableSAX = 1; 6782 } else 6783 NEXT; 6784 } else if (RAW == '\''){ 6785 NEXT; 6786 q = CUR_PTR; 6787 encoding = xmlParseEncName(ctxt); 6788 if (RAW != '\'') { 6789 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6791 ctxt->sax->error(ctxt->userData, 6792 "String not closed\n%.50s\n", q); 6793 ctxt->wellFormed = 0; 6794 ctxt->disableSAX = 1; 6795 } else 6796 NEXT; 6797 } else if (RAW == '"'){ 6798 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6800 ctxt->sax->error(ctxt->userData, 6801 "xmlParseEncodingDecl : expected ' or \"\n"); 6802 ctxt->wellFormed = 0; 6803 ctxt->disableSAX = 1; 6804 } 6805 if (encoding != NULL) { 6806 xmlCharEncoding enc; 6807 xmlCharEncodingHandlerPtr handler; 6808 6809 if (ctxt->input->encoding != NULL) 6810 xmlFree((xmlChar *) ctxt->input->encoding); 6811 ctxt->input->encoding = encoding; 6812 6813 enc = xmlParseCharEncoding((const char *) encoding); 6814 /* 6815 * registered set of known encodings 6816 */ 6817 if (enc != XML_CHAR_ENCODING_ERROR) { 6818 xmlSwitchEncoding(ctxt, enc); 6819 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6820 xmlFree(encoding); 6821 return(NULL); 6822 } 6823 } else { 6824 /* 6825 * fallback for unknown encodings 6826 */ 6827 handler = xmlFindCharEncodingHandler((const char *) encoding); 6828 if (handler != NULL) { 6829 xmlSwitchToEncoding(ctxt, handler); 6830 } else { 6831 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6833 ctxt->sax->error(ctxt->userData, 6834 "Unsupported encoding %s\n", encoding); 6835 return(NULL); 6836 } 6837 } 6838 } 6839 } 6840 return(encoding); 6841} 6842 6843/** 6844 * xmlParseSDDecl: 6845 * @ctxt: an XML parser context 6846 * 6847 * parse the XML standalone declaration 6848 * 6849 * [32] SDDecl ::= S 'standalone' Eq 6850 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 6851 * 6852 * [ VC: Standalone Document Declaration ] 6853 * TODO The standalone document declaration must have the value "no" 6854 * if any external markup declarations contain declarations of: 6855 * - attributes with default values, if elements to which these 6856 * attributes apply appear in the document without specifications 6857 * of values for these attributes, or 6858 * - entities (other than amp, lt, gt, apos, quot), if references 6859 * to those entities appear in the document, or 6860 * - attributes with values subject to normalization, where the 6861 * attribute appears in the document with a value which will change 6862 * as a result of normalization, or 6863 * - element types with element content, if white space occurs directly 6864 * within any instance of those types. 6865 * 6866 * Returns 1 if standalone, 0 otherwise 6867 */ 6868 6869int 6870xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 6871 int standalone = -1; 6872 6873 SKIP_BLANKS; 6874 if ((RAW == 's') && (NXT(1) == 't') && 6875 (NXT(2) == 'a') && (NXT(3) == 'n') && 6876 (NXT(4) == 'd') && (NXT(5) == 'a') && 6877 (NXT(6) == 'l') && (NXT(7) == 'o') && 6878 (NXT(8) == 'n') && (NXT(9) == 'e')) { 6879 SKIP(10); 6880 SKIP_BLANKS; 6881 if (RAW != '=') { 6882 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6884 ctxt->sax->error(ctxt->userData, 6885 "XML standalone declaration : expected '='\n"); 6886 ctxt->wellFormed = 0; 6887 ctxt->disableSAX = 1; 6888 return(standalone); 6889 } 6890 NEXT; 6891 SKIP_BLANKS; 6892 if (RAW == '\''){ 6893 NEXT; 6894 if ((RAW == 'n') && (NXT(1) == 'o')) { 6895 standalone = 0; 6896 SKIP(2); 6897 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6898 (NXT(2) == 's')) { 6899 standalone = 1; 6900 SKIP(3); 6901 } else { 6902 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6904 ctxt->sax->error(ctxt->userData, 6905 "standalone accepts only 'yes' or 'no'\n"); 6906 ctxt->wellFormed = 0; 6907 ctxt->disableSAX = 1; 6908 } 6909 if (RAW != '\'') { 6910 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6912 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6913 ctxt->wellFormed = 0; 6914 ctxt->disableSAX = 1; 6915 } else 6916 NEXT; 6917 } else if (RAW == '"'){ 6918 NEXT; 6919 if ((RAW == 'n') && (NXT(1) == 'o')) { 6920 standalone = 0; 6921 SKIP(2); 6922 } else if ((RAW == 'y') && (NXT(1) == 'e') && 6923 (NXT(2) == 's')) { 6924 standalone = 1; 6925 SKIP(3); 6926 } else { 6927 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6929 ctxt->sax->error(ctxt->userData, 6930 "standalone accepts only 'yes' or 'no'\n"); 6931 ctxt->wellFormed = 0; 6932 ctxt->disableSAX = 1; 6933 } 6934 if (RAW != '"') { 6935 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6937 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6938 ctxt->wellFormed = 0; 6939 ctxt->disableSAX = 1; 6940 } else 6941 NEXT; 6942 } else { 6943 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6945 ctxt->sax->error(ctxt->userData, 6946 "Standalone value not found\n"); 6947 ctxt->wellFormed = 0; 6948 ctxt->disableSAX = 1; 6949 } 6950 } 6951 return(standalone); 6952} 6953 6954/** 6955 * xmlParseXMLDecl: 6956 * @ctxt: an XML parser context 6957 * 6958 * parse an XML declaration header 6959 * 6960 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 6961 */ 6962 6963void 6964xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 6965 xmlChar *version; 6966 6967 /* 6968 * We know that '<?xml' is here. 6969 */ 6970 SKIP(5); 6971 6972 if (!IS_BLANK(RAW)) { 6973 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6975 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 6976 ctxt->wellFormed = 0; 6977 ctxt->disableSAX = 1; 6978 } 6979 SKIP_BLANKS; 6980 6981 /* 6982 * We should have the VersionInfo here. 6983 */ 6984 version = xmlParseVersionInfo(ctxt); 6985 if (version == NULL) 6986 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6987 ctxt->version = xmlStrdup(version); 6988 xmlFree(version); 6989 6990 /* 6991 * We may have the encoding declaration 6992 */ 6993 if (!IS_BLANK(RAW)) { 6994 if ((RAW == '?') && (NXT(1) == '>')) { 6995 SKIP(2); 6996 return; 6997 } 6998 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7000 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7001 ctxt->wellFormed = 0; 7002 ctxt->disableSAX = 1; 7003 } 7004 xmlParseEncodingDecl(ctxt); 7005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7006 /* 7007 * The XML REC instructs us to stop parsing right here 7008 */ 7009 return; 7010 } 7011 7012 /* 7013 * We may have the standalone status. 7014 */ 7015 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { 7016 if ((RAW == '?') && (NXT(1) == '>')) { 7017 SKIP(2); 7018 return; 7019 } 7020 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7022 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 7023 ctxt->wellFormed = 0; 7024 ctxt->disableSAX = 1; 7025 } 7026 SKIP_BLANKS; 7027 ctxt->input->standalone = xmlParseSDDecl(ctxt); 7028 7029 SKIP_BLANKS; 7030 if ((RAW == '?') && (NXT(1) == '>')) { 7031 SKIP(2); 7032 } else if (RAW == '>') { 7033 /* Deprecated old WD ... */ 7034 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7036 ctxt->sax->error(ctxt->userData, 7037 "XML declaration must end-up with '?>'\n"); 7038 ctxt->wellFormed = 0; 7039 ctxt->disableSAX = 1; 7040 NEXT; 7041 } else { 7042 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7044 ctxt->sax->error(ctxt->userData, 7045 "parsing XML declaration: '?>' expected\n"); 7046 ctxt->wellFormed = 0; 7047 ctxt->disableSAX = 1; 7048 MOVETO_ENDTAG(CUR_PTR); 7049 NEXT; 7050 } 7051} 7052 7053/** 7054 * xmlParseMisc: 7055 * @ctxt: an XML parser context 7056 * 7057 * parse an XML Misc* optionnal field. 7058 * 7059 * [27] Misc ::= Comment | PI | S 7060 */ 7061 7062void 7063xmlParseMisc(xmlParserCtxtPtr ctxt) { 7064 while (((RAW == '<') && (NXT(1) == '?')) || 7065 ((RAW == '<') && (NXT(1) == '!') && 7066 (NXT(2) == '-') && (NXT(3) == '-')) || 7067 IS_BLANK(CUR)) { 7068 if ((RAW == '<') && (NXT(1) == '?')) { 7069 xmlParsePI(ctxt); 7070 } else if (IS_BLANK(CUR)) { 7071 NEXT; 7072 } else 7073 xmlParseComment(ctxt); 7074 } 7075} 7076 7077/** 7078 * xmlParseDocument: 7079 * @ctxt: an XML parser context 7080 * 7081 * parse an XML document (and build a tree if using the standard SAX 7082 * interface). 7083 * 7084 * [1] document ::= prolog element Misc* 7085 * 7086 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 7087 * 7088 * Returns 0, -1 in case of error. the parser context is augmented 7089 * as a result of the parsing. 7090 */ 7091 7092int 7093xmlParseDocument(xmlParserCtxtPtr ctxt) { 7094 xmlChar start[4]; 7095 xmlCharEncoding enc; 7096 7097 xmlInitParser(); 7098 7099 GROW; 7100 7101 /* 7102 * SAX: beginning of the document processing. 7103 */ 7104 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7105 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7106 7107 /* 7108 * Get the 4 first bytes and decode the charset 7109 * if enc != XML_CHAR_ENCODING_NONE 7110 * plug some encoding conversion routines. 7111 */ 7112 start[0] = RAW; 7113 start[1] = NXT(1); 7114 start[2] = NXT(2); 7115 start[3] = NXT(3); 7116 enc = xmlDetectCharEncoding(start, 4); 7117 if (enc != XML_CHAR_ENCODING_NONE) { 7118 xmlSwitchEncoding(ctxt, enc); 7119 } 7120 7121 7122 if (CUR == 0) { 7123 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7125 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7126 ctxt->wellFormed = 0; 7127 ctxt->disableSAX = 1; 7128 } 7129 7130 /* 7131 * Check for the XMLDecl in the Prolog. 7132 */ 7133 GROW; 7134 if ((RAW == '<') && (NXT(1) == '?') && 7135 (NXT(2) == 'x') && (NXT(3) == 'm') && 7136 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7137 7138 /* 7139 * Note that we will switch encoding on the fly. 7140 */ 7141 xmlParseXMLDecl(ctxt); 7142 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7143 /* 7144 * The XML REC instructs us to stop parsing right here 7145 */ 7146 return(-1); 7147 } 7148 ctxt->standalone = ctxt->input->standalone; 7149 SKIP_BLANKS; 7150 } else { 7151 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7152 } 7153 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7154 ctxt->sax->startDocument(ctxt->userData); 7155 7156 /* 7157 * The Misc part of the Prolog 7158 */ 7159 GROW; 7160 xmlParseMisc(ctxt); 7161 7162 /* 7163 * Then possibly doc type declaration(s) and more Misc 7164 * (doctypedecl Misc*)? 7165 */ 7166 GROW; 7167 if ((RAW == '<') && (NXT(1) == '!') && 7168 (NXT(2) == 'D') && (NXT(3) == 'O') && 7169 (NXT(4) == 'C') && (NXT(5) == 'T') && 7170 (NXT(6) == 'Y') && (NXT(7) == 'P') && 7171 (NXT(8) == 'E')) { 7172 7173 ctxt->inSubset = 1; 7174 xmlParseDocTypeDecl(ctxt); 7175 if (RAW == '[') { 7176 ctxt->instate = XML_PARSER_DTD; 7177 xmlParseInternalSubset(ctxt); 7178 } 7179 7180 /* 7181 * Create and update the external subset. 7182 */ 7183 ctxt->inSubset = 2; 7184 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 7185 (!ctxt->disableSAX)) 7186 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 7187 ctxt->extSubSystem, ctxt->extSubURI); 7188 ctxt->inSubset = 0; 7189 7190 7191 ctxt->instate = XML_PARSER_PROLOG; 7192 xmlParseMisc(ctxt); 7193 } 7194 7195 /* 7196 * Time to start parsing the tree itself 7197 */ 7198 GROW; 7199 if (RAW != '<') { 7200 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7202 ctxt->sax->error(ctxt->userData, 7203 "Start tag expected, '<' not found\n"); 7204 ctxt->wellFormed = 0; 7205 ctxt->disableSAX = 1; 7206 ctxt->instate = XML_PARSER_EOF; 7207 } else { 7208 ctxt->instate = XML_PARSER_CONTENT; 7209 xmlParseElement(ctxt); 7210 ctxt->instate = XML_PARSER_EPILOG; 7211 7212 7213 /* 7214 * The Misc part at the end 7215 */ 7216 xmlParseMisc(ctxt); 7217 7218 if (RAW != 0) { 7219 ctxt->errNo = XML_ERR_DOCUMENT_END; 7220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7221 ctxt->sax->error(ctxt->userData, 7222 "Extra content at the end of the document\n"); 7223 ctxt->wellFormed = 0; 7224 ctxt->disableSAX = 1; 7225 } 7226 ctxt->instate = XML_PARSER_EOF; 7227 } 7228 7229 /* 7230 * SAX: end of the document processing. 7231 */ 7232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7233 (!ctxt->disableSAX)) 7234 ctxt->sax->endDocument(ctxt->userData); 7235 7236 if (! ctxt->wellFormed) return(-1); 7237 return(0); 7238} 7239 7240/** 7241 * xmlParseExtParsedEnt: 7242 * @ctxt: an XML parser context 7243 * 7244 * parse a genreral parsed entity 7245 * An external general parsed entity is well-formed if it matches the 7246 * production labeled extParsedEnt. 7247 * 7248 * [78] extParsedEnt ::= TextDecl? content 7249 * 7250 * Returns 0, -1 in case of error. the parser context is augmented 7251 * as a result of the parsing. 7252 */ 7253 7254int 7255xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 7256 xmlChar start[4]; 7257 xmlCharEncoding enc; 7258 7259 xmlDefaultSAXHandlerInit(); 7260 7261 GROW; 7262 7263 /* 7264 * SAX: beginning of the document processing. 7265 */ 7266 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7267 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 7268 7269 /* 7270 * Get the 4 first bytes and decode the charset 7271 * if enc != XML_CHAR_ENCODING_NONE 7272 * plug some encoding conversion routines. 7273 */ 7274 start[0] = RAW; 7275 start[1] = NXT(1); 7276 start[2] = NXT(2); 7277 start[3] = NXT(3); 7278 enc = xmlDetectCharEncoding(start, 4); 7279 if (enc != XML_CHAR_ENCODING_NONE) { 7280 xmlSwitchEncoding(ctxt, enc); 7281 } 7282 7283 7284 if (CUR == 0) { 7285 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7287 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7288 ctxt->wellFormed = 0; 7289 ctxt->disableSAX = 1; 7290 } 7291 7292 /* 7293 * Check for the XMLDecl in the Prolog. 7294 */ 7295 GROW; 7296 if ((RAW == '<') && (NXT(1) == '?') && 7297 (NXT(2) == 'x') && (NXT(3) == 'm') && 7298 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 7299 7300 /* 7301 * Note that we will switch encoding on the fly. 7302 */ 7303 xmlParseXMLDecl(ctxt); 7304 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7305 /* 7306 * The XML REC instructs us to stop parsing right here 7307 */ 7308 return(-1); 7309 } 7310 SKIP_BLANKS; 7311 } else { 7312 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7313 } 7314 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 7315 ctxt->sax->startDocument(ctxt->userData); 7316 7317 /* 7318 * Doing validity checking on chunk doesn't make sense 7319 */ 7320 ctxt->instate = XML_PARSER_CONTENT; 7321 ctxt->validate = 0; 7322 ctxt->loadsubset = 0; 7323 ctxt->depth = 0; 7324 7325 xmlParseContent(ctxt); 7326 7327 if ((RAW == '<') && (NXT(1) == '/')) { 7328 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 7329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7330 ctxt->sax->error(ctxt->userData, 7331 "chunk is not well balanced\n"); 7332 ctxt->wellFormed = 0; 7333 ctxt->disableSAX = 1; 7334 } else if (RAW != 0) { 7335 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7337 ctxt->sax->error(ctxt->userData, 7338 "extra content at the end of well balanced chunk\n"); 7339 ctxt->wellFormed = 0; 7340 ctxt->disableSAX = 1; 7341 } 7342 7343 /* 7344 * SAX: end of the document processing. 7345 */ 7346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7347 (!ctxt->disableSAX)) 7348 ctxt->sax->endDocument(ctxt->userData); 7349 7350 if (! ctxt->wellFormed) return(-1); 7351 return(0); 7352} 7353 7354/************************************************************************ 7355 * * 7356 * Progressive parsing interfaces * 7357 * * 7358 ************************************************************************/ 7359 7360/** 7361 * xmlParseLookupSequence: 7362 * @ctxt: an XML parser context 7363 * @first: the first char to lookup 7364 * @next: the next char to lookup or zero 7365 * @third: the next char to lookup or zero 7366 * 7367 * Try to find if a sequence (first, next, third) or just (first next) or 7368 * (first) is available in the input stream. 7369 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 7370 * to avoid rescanning sequences of bytes, it DOES change the state of the 7371 * parser, do not use liberally. 7372 * 7373 * Returns the index to the current parsing point if the full sequence 7374 * is available, -1 otherwise. 7375 */ 7376int 7377xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7378 xmlChar next, xmlChar third) { 7379 int base, len; 7380 xmlParserInputPtr in; 7381 const xmlChar *buf; 7382 7383 in = ctxt->input; 7384 if (in == NULL) return(-1); 7385 base = in->cur - in->base; 7386 if (base < 0) return(-1); 7387 if (ctxt->checkIndex > base) 7388 base = ctxt->checkIndex; 7389 if (in->buf == NULL) { 7390 buf = in->base; 7391 len = in->length; 7392 } else { 7393 buf = in->buf->buffer->content; 7394 len = in->buf->buffer->use; 7395 } 7396 /* take into account the sequence length */ 7397 if (third) len -= 2; 7398 else if (next) len --; 7399 for (;base < len;base++) { 7400 if (buf[base] == first) { 7401 if (third != 0) { 7402 if ((buf[base + 1] != next) || 7403 (buf[base + 2] != third)) continue; 7404 } else if (next != 0) { 7405 if (buf[base + 1] != next) continue; 7406 } 7407 ctxt->checkIndex = 0; 7408#ifdef DEBUG_PUSH 7409 if (next == 0) 7410 xmlGenericError(xmlGenericErrorContext, 7411 "PP: lookup '%c' found at %d\n", 7412 first, base); 7413 else if (third == 0) 7414 xmlGenericError(xmlGenericErrorContext, 7415 "PP: lookup '%c%c' found at %d\n", 7416 first, next, base); 7417 else 7418 xmlGenericError(xmlGenericErrorContext, 7419 "PP: lookup '%c%c%c' found at %d\n", 7420 first, next, third, base); 7421#endif 7422 return(base - (in->cur - in->base)); 7423 } 7424 } 7425 ctxt->checkIndex = base; 7426#ifdef DEBUG_PUSH 7427 if (next == 0) 7428 xmlGenericError(xmlGenericErrorContext, 7429 "PP: lookup '%c' failed\n", first); 7430 else if (third == 0) 7431 xmlGenericError(xmlGenericErrorContext, 7432 "PP: lookup '%c%c' failed\n", first, next); 7433 else 7434 xmlGenericError(xmlGenericErrorContext, 7435 "PP: lookup '%c%c%c' failed\n", first, next, third); 7436#endif 7437 return(-1); 7438} 7439 7440/** 7441 * xmlParseTryOrFinish: 7442 * @ctxt: an XML parser context 7443 * @terminate: last chunk indicator 7444 * 7445 * Try to progress on parsing 7446 * 7447 * Returns zero if no parsing was possible 7448 */ 7449int 7450xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7451 int ret = 0; 7452 int avail; 7453 xmlChar cur, next; 7454 7455#ifdef DEBUG_PUSH 7456 switch (ctxt->instate) { 7457 case XML_PARSER_EOF: 7458 xmlGenericError(xmlGenericErrorContext, 7459 "PP: try EOF\n"); break; 7460 case XML_PARSER_START: 7461 xmlGenericError(xmlGenericErrorContext, 7462 "PP: try START\n"); break; 7463 case XML_PARSER_MISC: 7464 xmlGenericError(xmlGenericErrorContext, 7465 "PP: try MISC\n");break; 7466 case XML_PARSER_COMMENT: 7467 xmlGenericError(xmlGenericErrorContext, 7468 "PP: try COMMENT\n");break; 7469 case XML_PARSER_PROLOG: 7470 xmlGenericError(xmlGenericErrorContext, 7471 "PP: try PROLOG\n");break; 7472 case XML_PARSER_START_TAG: 7473 xmlGenericError(xmlGenericErrorContext, 7474 "PP: try START_TAG\n");break; 7475 case XML_PARSER_CONTENT: 7476 xmlGenericError(xmlGenericErrorContext, 7477 "PP: try CONTENT\n");break; 7478 case XML_PARSER_CDATA_SECTION: 7479 xmlGenericError(xmlGenericErrorContext, 7480 "PP: try CDATA_SECTION\n");break; 7481 case XML_PARSER_END_TAG: 7482 xmlGenericError(xmlGenericErrorContext, 7483 "PP: try END_TAG\n");break; 7484 case XML_PARSER_ENTITY_DECL: 7485 xmlGenericError(xmlGenericErrorContext, 7486 "PP: try ENTITY_DECL\n");break; 7487 case XML_PARSER_ENTITY_VALUE: 7488 xmlGenericError(xmlGenericErrorContext, 7489 "PP: try ENTITY_VALUE\n");break; 7490 case XML_PARSER_ATTRIBUTE_VALUE: 7491 xmlGenericError(xmlGenericErrorContext, 7492 "PP: try ATTRIBUTE_VALUE\n");break; 7493 case XML_PARSER_DTD: 7494 xmlGenericError(xmlGenericErrorContext, 7495 "PP: try DTD\n");break; 7496 case XML_PARSER_EPILOG: 7497 xmlGenericError(xmlGenericErrorContext, 7498 "PP: try EPILOG\n");break; 7499 case XML_PARSER_PI: 7500 xmlGenericError(xmlGenericErrorContext, 7501 "PP: try PI\n");break; 7502 case XML_PARSER_IGNORE: 7503 xmlGenericError(xmlGenericErrorContext, 7504 "PP: try IGNORE\n");break; 7505 } 7506#endif 7507 7508 while (1) { 7509 /* 7510 * Pop-up of finished entities. 7511 */ 7512 while ((RAW == 0) && (ctxt->inputNr > 1)) 7513 xmlPopInput(ctxt); 7514 7515 if (ctxt->input ==NULL) break; 7516 if (ctxt->input->buf == NULL) 7517 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7518 else 7519 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7520 if (avail < 1) 7521 goto done; 7522 switch (ctxt->instate) { 7523 case XML_PARSER_EOF: 7524 /* 7525 * Document parsing is done ! 7526 */ 7527 goto done; 7528 case XML_PARSER_START: 7529 /* 7530 * Very first chars read from the document flow. 7531 */ 7532 if (avail < 2) 7533 goto done; 7534 7535 cur = ctxt->input->cur[0]; 7536 next = ctxt->input->cur[1]; 7537 if (cur == 0) { 7538 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7539 ctxt->sax->setDocumentLocator(ctxt->userData, 7540 &xmlDefaultSAXLocator); 7541 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7543 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7544 ctxt->wellFormed = 0; 7545 ctxt->disableSAX = 1; 7546 ctxt->instate = XML_PARSER_EOF; 7547#ifdef DEBUG_PUSH 7548 xmlGenericError(xmlGenericErrorContext, 7549 "PP: entering EOF\n"); 7550#endif 7551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7552 ctxt->sax->endDocument(ctxt->userData); 7553 goto done; 7554 } 7555 if ((cur == '<') && (next == '?')) { 7556 /* PI or XML decl */ 7557 if (avail < 5) return(ret); 7558 if ((!terminate) && 7559 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7560 return(ret); 7561 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7562 ctxt->sax->setDocumentLocator(ctxt->userData, 7563 &xmlDefaultSAXLocator); 7564 if ((ctxt->input->cur[2] == 'x') && 7565 (ctxt->input->cur[3] == 'm') && 7566 (ctxt->input->cur[4] == 'l') && 7567 (IS_BLANK(ctxt->input->cur[5]))) { 7568 ret += 5; 7569#ifdef DEBUG_PUSH 7570 xmlGenericError(xmlGenericErrorContext, 7571 "PP: Parsing XML Decl\n"); 7572#endif 7573 xmlParseXMLDecl(ctxt); 7574 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7575 /* 7576 * The XML REC instructs us to stop parsing right 7577 * here 7578 */ 7579 ctxt->instate = XML_PARSER_EOF; 7580 return(0); 7581 } 7582 ctxt->standalone = ctxt->input->standalone; 7583 if ((ctxt->encoding == NULL) && 7584 (ctxt->input->encoding != NULL)) 7585 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 7586 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7587 (!ctxt->disableSAX)) 7588 ctxt->sax->startDocument(ctxt->userData); 7589 ctxt->instate = XML_PARSER_MISC; 7590#ifdef DEBUG_PUSH 7591 xmlGenericError(xmlGenericErrorContext, 7592 "PP: entering MISC\n"); 7593#endif 7594 } else { 7595 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7596 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7597 (!ctxt->disableSAX)) 7598 ctxt->sax->startDocument(ctxt->userData); 7599 ctxt->instate = XML_PARSER_MISC; 7600#ifdef DEBUG_PUSH 7601 xmlGenericError(xmlGenericErrorContext, 7602 "PP: entering MISC\n"); 7603#endif 7604 } 7605 } else { 7606 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7607 ctxt->sax->setDocumentLocator(ctxt->userData, 7608 &xmlDefaultSAXLocator); 7609 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7610 if ((ctxt->sax) && (ctxt->sax->startDocument) && 7611 (!ctxt->disableSAX)) 7612 ctxt->sax->startDocument(ctxt->userData); 7613 ctxt->instate = XML_PARSER_MISC; 7614#ifdef DEBUG_PUSH 7615 xmlGenericError(xmlGenericErrorContext, 7616 "PP: entering MISC\n"); 7617#endif 7618 } 7619 break; 7620 case XML_PARSER_MISC: 7621 SKIP_BLANKS; 7622 if (ctxt->input->buf == NULL) 7623 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7624 else 7625 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7626 if (avail < 2) 7627 goto done; 7628 cur = ctxt->input->cur[0]; 7629 next = ctxt->input->cur[1]; 7630 if ((cur == '<') && (next == '?')) { 7631 if ((!terminate) && 7632 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7633 goto done; 7634#ifdef DEBUG_PUSH 7635 xmlGenericError(xmlGenericErrorContext, 7636 "PP: Parsing PI\n"); 7637#endif 7638 xmlParsePI(ctxt); 7639 } else if ((cur == '<') && (next == '!') && 7640 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7641 if ((!terminate) && 7642 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7643 goto done; 7644#ifdef DEBUG_PUSH 7645 xmlGenericError(xmlGenericErrorContext, 7646 "PP: Parsing Comment\n"); 7647#endif 7648 xmlParseComment(ctxt); 7649 ctxt->instate = XML_PARSER_MISC; 7650 } else if ((cur == '<') && (next == '!') && 7651 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && 7652 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && 7653 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && 7654 (ctxt->input->cur[8] == 'E')) { 7655 if ((!terminate) && 7656 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7657 goto done; 7658#ifdef DEBUG_PUSH 7659 xmlGenericError(xmlGenericErrorContext, 7660 "PP: Parsing internal subset\n"); 7661#endif 7662 ctxt->inSubset = 1; 7663 xmlParseDocTypeDecl(ctxt); 7664 if (RAW == '[') { 7665 ctxt->instate = XML_PARSER_DTD; 7666#ifdef DEBUG_PUSH 7667 xmlGenericError(xmlGenericErrorContext, 7668 "PP: entering DTD\n"); 7669#endif 7670 } else { 7671 /* 7672 * Create and update the external subset. 7673 */ 7674 ctxt->inSubset = 2; 7675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7676 (ctxt->sax->externalSubset != NULL)) 7677 ctxt->sax->externalSubset(ctxt->userData, 7678 ctxt->intSubName, ctxt->extSubSystem, 7679 ctxt->extSubURI); 7680 ctxt->inSubset = 0; 7681 ctxt->instate = XML_PARSER_PROLOG; 7682#ifdef DEBUG_PUSH 7683 xmlGenericError(xmlGenericErrorContext, 7684 "PP: entering PROLOG\n"); 7685#endif 7686 } 7687 } else if ((cur == '<') && (next == '!') && 7688 (avail < 9)) { 7689 goto done; 7690 } else { 7691 ctxt->instate = XML_PARSER_START_TAG; 7692#ifdef DEBUG_PUSH 7693 xmlGenericError(xmlGenericErrorContext, 7694 "PP: entering START_TAG\n"); 7695#endif 7696 } 7697 break; 7698 case XML_PARSER_IGNORE: 7699 xmlGenericError(xmlGenericErrorContext, 7700 "PP: internal error, state == IGNORE"); 7701 ctxt->instate = XML_PARSER_DTD; 7702#ifdef DEBUG_PUSH 7703 xmlGenericError(xmlGenericErrorContext, 7704 "PP: entering DTD\n"); 7705#endif 7706 break; 7707 case XML_PARSER_PROLOG: 7708 SKIP_BLANKS; 7709 if (ctxt->input->buf == NULL) 7710 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7711 else 7712 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7713 if (avail < 2) 7714 goto done; 7715 cur = ctxt->input->cur[0]; 7716 next = ctxt->input->cur[1]; 7717 if ((cur == '<') && (next == '?')) { 7718 if ((!terminate) && 7719 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7720 goto done; 7721#ifdef DEBUG_PUSH 7722 xmlGenericError(xmlGenericErrorContext, 7723 "PP: Parsing PI\n"); 7724#endif 7725 xmlParsePI(ctxt); 7726 } else if ((cur == '<') && (next == '!') && 7727 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7728 if ((!terminate) && 7729 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7730 goto done; 7731#ifdef DEBUG_PUSH 7732 xmlGenericError(xmlGenericErrorContext, 7733 "PP: Parsing Comment\n"); 7734#endif 7735 xmlParseComment(ctxt); 7736 ctxt->instate = XML_PARSER_PROLOG; 7737 } else if ((cur == '<') && (next == '!') && 7738 (avail < 4)) { 7739 goto done; 7740 } else { 7741 ctxt->instate = XML_PARSER_START_TAG; 7742#ifdef DEBUG_PUSH 7743 xmlGenericError(xmlGenericErrorContext, 7744 "PP: entering START_TAG\n"); 7745#endif 7746 } 7747 break; 7748 case XML_PARSER_EPILOG: 7749 SKIP_BLANKS; 7750 if (ctxt->input->buf == NULL) 7751 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 7752 else 7753 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 7754 if (avail < 2) 7755 goto done; 7756 cur = ctxt->input->cur[0]; 7757 next = ctxt->input->cur[1]; 7758 if ((cur == '<') && (next == '?')) { 7759 if ((!terminate) && 7760 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7761 goto done; 7762#ifdef DEBUG_PUSH 7763 xmlGenericError(xmlGenericErrorContext, 7764 "PP: Parsing PI\n"); 7765#endif 7766 xmlParsePI(ctxt); 7767 ctxt->instate = XML_PARSER_EPILOG; 7768 } else if ((cur == '<') && (next == '!') && 7769 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7770 if ((!terminate) && 7771 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7772 goto done; 7773#ifdef DEBUG_PUSH 7774 xmlGenericError(xmlGenericErrorContext, 7775 "PP: Parsing Comment\n"); 7776#endif 7777 xmlParseComment(ctxt); 7778 ctxt->instate = XML_PARSER_EPILOG; 7779 } else if ((cur == '<') && (next == '!') && 7780 (avail < 4)) { 7781 goto done; 7782 } else { 7783 ctxt->errNo = XML_ERR_DOCUMENT_END; 7784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7785 ctxt->sax->error(ctxt->userData, 7786 "Extra content at the end of the document\n"); 7787 ctxt->wellFormed = 0; 7788 ctxt->disableSAX = 1; 7789 ctxt->instate = XML_PARSER_EOF; 7790#ifdef DEBUG_PUSH 7791 xmlGenericError(xmlGenericErrorContext, 7792 "PP: entering EOF\n"); 7793#endif 7794 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7795 (!ctxt->disableSAX)) 7796 ctxt->sax->endDocument(ctxt->userData); 7797 goto done; 7798 } 7799 break; 7800 case XML_PARSER_START_TAG: { 7801 xmlChar *name, *oldname; 7802 7803 if ((avail < 2) && (ctxt->inputNr == 1)) 7804 goto done; 7805 cur = ctxt->input->cur[0]; 7806 if (cur != '<') { 7807 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7809 ctxt->sax->error(ctxt->userData, 7810 "Start tag expect, '<' not found\n"); 7811 ctxt->wellFormed = 0; 7812 ctxt->disableSAX = 1; 7813 ctxt->instate = XML_PARSER_EOF; 7814#ifdef DEBUG_PUSH 7815 xmlGenericError(xmlGenericErrorContext, 7816 "PP: entering EOF\n"); 7817#endif 7818 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7819 (!ctxt->disableSAX)) 7820 ctxt->sax->endDocument(ctxt->userData); 7821 goto done; 7822 } 7823 if ((!terminate) && 7824 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7825 goto done; 7826 if (ctxt->spaceNr == 0) 7827 spacePush(ctxt, -1); 7828 else 7829 spacePush(ctxt, *ctxt->space); 7830 name = xmlParseStartTag(ctxt); 7831 if (name == NULL) { 7832 spacePop(ctxt); 7833 ctxt->instate = XML_PARSER_EOF; 7834#ifdef DEBUG_PUSH 7835 xmlGenericError(xmlGenericErrorContext, 7836 "PP: entering EOF\n"); 7837#endif 7838 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 7839 (!ctxt->disableSAX)) 7840 ctxt->sax->endDocument(ctxt->userData); 7841 goto done; 7842 } 7843 namePush(ctxt, xmlStrdup(name)); 7844 7845 /* 7846 * [ VC: Root Element Type ] 7847 * The Name in the document type declaration must match 7848 * the element type of the root element. 7849 */ 7850 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7851 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7852 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7853 7854 /* 7855 * Check for an Empty Element. 7856 */ 7857 if ((RAW == '/') && (NXT(1) == '>')) { 7858 SKIP(2); 7859 if ((ctxt->sax != NULL) && 7860 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) 7861 ctxt->sax->endElement(ctxt->userData, name); 7862 xmlFree(name); 7863 oldname = namePop(ctxt); 7864 spacePop(ctxt); 7865 if (oldname != NULL) { 7866#ifdef DEBUG_STACK 7867 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7868#endif 7869 xmlFree(oldname); 7870 } 7871 if (ctxt->name == NULL) { 7872 ctxt->instate = XML_PARSER_EPILOG; 7873#ifdef DEBUG_PUSH 7874 xmlGenericError(xmlGenericErrorContext, 7875 "PP: entering EPILOG\n"); 7876#endif 7877 } else { 7878 ctxt->instate = XML_PARSER_CONTENT; 7879#ifdef DEBUG_PUSH 7880 xmlGenericError(xmlGenericErrorContext, 7881 "PP: entering CONTENT\n"); 7882#endif 7883 } 7884 break; 7885 } 7886 if (RAW == '>') { 7887 NEXT; 7888 } else { 7889 ctxt->errNo = XML_ERR_GT_REQUIRED; 7890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7891 ctxt->sax->error(ctxt->userData, 7892 "Couldn't find end of Start Tag %s\n", 7893 name); 7894 ctxt->wellFormed = 0; 7895 ctxt->disableSAX = 1; 7896 7897 /* 7898 * end of parsing of this node. 7899 */ 7900 nodePop(ctxt); 7901 oldname = namePop(ctxt); 7902 spacePop(ctxt); 7903 if (oldname != NULL) { 7904#ifdef DEBUG_STACK 7905 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); 7906#endif 7907 xmlFree(oldname); 7908 } 7909 } 7910 xmlFree(name); 7911 ctxt->instate = XML_PARSER_CONTENT; 7912#ifdef DEBUG_PUSH 7913 xmlGenericError(xmlGenericErrorContext, 7914 "PP: entering CONTENT\n"); 7915#endif 7916 break; 7917 } 7918 case XML_PARSER_CONTENT: { 7919 const xmlChar *test; 7920 int cons; 7921 xmlChar tok; 7922 7923 /* 7924 * Handle preparsed entities and charRef 7925 */ 7926 if (ctxt->token != 0) { 7927 xmlChar cur[2] = { 0 , 0 } ; 7928 7929 cur[0] = (xmlChar) ctxt->token; 7930 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 7931 (ctxt->sax->characters != NULL)) 7932 ctxt->sax->characters(ctxt->userData, cur, 1); 7933 ctxt->token = 0; 7934 } 7935 if ((avail < 2) && (ctxt->inputNr == 1)) 7936 goto done; 7937 cur = ctxt->input->cur[0]; 7938 next = ctxt->input->cur[1]; 7939 7940 test = CUR_PTR; 7941 cons = ctxt->input->consumed; 7942 tok = ctxt->token; 7943 if ((cur == '<') && (next == '?')) { 7944 if ((!terminate) && 7945 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7946 goto done; 7947#ifdef DEBUG_PUSH 7948 xmlGenericError(xmlGenericErrorContext, 7949 "PP: Parsing PI\n"); 7950#endif 7951 xmlParsePI(ctxt); 7952 } else if ((cur == '<') && (next == '!') && 7953 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 7954 if ((!terminate) && 7955 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7956 goto done; 7957#ifdef DEBUG_PUSH 7958 xmlGenericError(xmlGenericErrorContext, 7959 "PP: Parsing Comment\n"); 7960#endif 7961 xmlParseComment(ctxt); 7962 ctxt->instate = XML_PARSER_CONTENT; 7963 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 7964 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && 7965 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && 7966 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && 7967 (ctxt->input->cur[8] == '[')) { 7968 SKIP(9); 7969 ctxt->instate = XML_PARSER_CDATA_SECTION; 7970#ifdef DEBUG_PUSH 7971 xmlGenericError(xmlGenericErrorContext, 7972 "PP: entering CDATA_SECTION\n"); 7973#endif 7974 break; 7975 } else if ((cur == '<') && (next == '!') && 7976 (avail < 9)) { 7977 goto done; 7978 } else if ((cur == '<') && (next == '/')) { 7979 ctxt->instate = XML_PARSER_END_TAG; 7980#ifdef DEBUG_PUSH 7981 xmlGenericError(xmlGenericErrorContext, 7982 "PP: entering END_TAG\n"); 7983#endif 7984 break; 7985 } else if (cur == '<') { 7986 ctxt->instate = XML_PARSER_START_TAG; 7987#ifdef DEBUG_PUSH 7988 xmlGenericError(xmlGenericErrorContext, 7989 "PP: entering START_TAG\n"); 7990#endif 7991 break; 7992 } else if (cur == '&') { 7993 if ((!terminate) && 7994 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 7995 goto done; 7996#ifdef DEBUG_PUSH 7997 xmlGenericError(xmlGenericErrorContext, 7998 "PP: Parsing Reference\n"); 7999#endif 8000 xmlParseReference(ctxt); 8001 } else { 8002 /* TODO Avoid the extra copy, handle directly !!! */ 8003 /* 8004 * Goal of the following test is: 8005 * - minimize calls to the SAX 'character' callback 8006 * when they are mergeable 8007 * - handle an problem for isBlank when we only parse 8008 * a sequence of blank chars and the next one is 8009 * not available to check against '<' presence. 8010 * - tries to homogenize the differences in SAX 8011 * callbacks beween the push and pull versions 8012 * of the parser. 8013 */ 8014 if ((ctxt->inputNr == 1) && 8015 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 8016 if ((!terminate) && 8017 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 8018 goto done; 8019 } 8020 ctxt->checkIndex = 0; 8021#ifdef DEBUG_PUSH 8022 xmlGenericError(xmlGenericErrorContext, 8023 "PP: Parsing char data\n"); 8024#endif 8025 xmlParseCharData(ctxt, 0); 8026 } 8027 /* 8028 * Pop-up of finished entities. 8029 */ 8030 while ((RAW == 0) && (ctxt->inputNr > 1)) 8031 xmlPopInput(ctxt); 8032 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 8033 (tok == ctxt->token)) { 8034 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 8035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8036 ctxt->sax->error(ctxt->userData, 8037 "detected an error in element content\n"); 8038 ctxt->wellFormed = 0; 8039 ctxt->disableSAX = 1; 8040 ctxt->instate = XML_PARSER_EOF; 8041 break; 8042 } 8043 break; 8044 } 8045 case XML_PARSER_CDATA_SECTION: { 8046 /* 8047 * The Push mode need to have the SAX callback for 8048 * cdataBlock merge back contiguous callbacks. 8049 */ 8050 int base; 8051 8052 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 8053 if (base < 0) { 8054 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 8055 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8056 if (ctxt->sax->cdataBlock != NULL) 8057 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, 8058 XML_PARSER_BIG_BUFFER_SIZE); 8059 } 8060 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 8061 ctxt->checkIndex = 0; 8062 } 8063 goto done; 8064 } else { 8065 if ((ctxt->sax != NULL) && (base > 0) && 8066 (!ctxt->disableSAX)) { 8067 if (ctxt->sax->cdataBlock != NULL) 8068 ctxt->sax->cdataBlock(ctxt->userData, 8069 ctxt->input->cur, base); 8070 } 8071 SKIP(base + 3); 8072 ctxt->checkIndex = 0; 8073 ctxt->instate = XML_PARSER_CONTENT; 8074#ifdef DEBUG_PUSH 8075 xmlGenericError(xmlGenericErrorContext, 8076 "PP: entering CONTENT\n"); 8077#endif 8078 } 8079 break; 8080 } 8081 case XML_PARSER_END_TAG: 8082 if (avail < 2) 8083 goto done; 8084 if ((!terminate) && 8085 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 8086 goto done; 8087 xmlParseEndTag(ctxt); 8088 if (ctxt->name == NULL) { 8089 ctxt->instate = XML_PARSER_EPILOG; 8090#ifdef DEBUG_PUSH 8091 xmlGenericError(xmlGenericErrorContext, 8092 "PP: entering EPILOG\n"); 8093#endif 8094 } else { 8095 ctxt->instate = XML_PARSER_CONTENT; 8096#ifdef DEBUG_PUSH 8097 xmlGenericError(xmlGenericErrorContext, 8098 "PP: entering CONTENT\n"); 8099#endif 8100 } 8101 break; 8102 case XML_PARSER_DTD: { 8103 /* 8104 * Sorry but progressive parsing of the internal subset 8105 * is not expected to be supported. We first check that 8106 * the full content of the internal subset is available and 8107 * the parsing is launched only at that point. 8108 * Internal subset ends up with "']' S? '>'" in an unescaped 8109 * section and not in a ']]>' sequence which are conditional 8110 * sections (whoever argued to keep that crap in XML deserve 8111 * a place in hell !). 8112 */ 8113 int base, i; 8114 xmlChar *buf; 8115 xmlChar quote = 0; 8116 8117 base = ctxt->input->cur - ctxt->input->base; 8118 if (base < 0) return(0); 8119 if (ctxt->checkIndex > base) 8120 base = ctxt->checkIndex; 8121 buf = ctxt->input->buf->buffer->content; 8122 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 8123 base++) { 8124 if (quote != 0) { 8125 if (buf[base] == quote) 8126 quote = 0; 8127 continue; 8128 } 8129 if (buf[base] == '"') { 8130 quote = '"'; 8131 continue; 8132 } 8133 if (buf[base] == '\'') { 8134 quote = '\''; 8135 continue; 8136 } 8137 if (buf[base] == ']') { 8138 if ((unsigned int) base +1 >= 8139 ctxt->input->buf->buffer->use) 8140 break; 8141 if (buf[base + 1] == ']') { 8142 /* conditional crap, skip both ']' ! */ 8143 base++; 8144 continue; 8145 } 8146 for (i = 0; 8147 (unsigned int) base + i < ctxt->input->buf->buffer->use; 8148 i++) { 8149 if (buf[base + i] == '>') 8150 goto found_end_int_subset; 8151 } 8152 break; 8153 } 8154 } 8155 /* 8156 * We didn't found the end of the Internal subset 8157 */ 8158 if (quote == 0) 8159 ctxt->checkIndex = base; 8160#ifdef DEBUG_PUSH 8161 if (next == 0) 8162 xmlGenericError(xmlGenericErrorContext, 8163 "PP: lookup of int subset end filed\n"); 8164#endif 8165 goto done; 8166 8167found_end_int_subset: 8168 xmlParseInternalSubset(ctxt); 8169 ctxt->inSubset = 2; 8170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 8171 (ctxt->sax->externalSubset != NULL)) 8172 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8173 ctxt->extSubSystem, ctxt->extSubURI); 8174 ctxt->inSubset = 0; 8175 ctxt->instate = XML_PARSER_PROLOG; 8176 ctxt->checkIndex = 0; 8177#ifdef DEBUG_PUSH 8178 xmlGenericError(xmlGenericErrorContext, 8179 "PP: entering PROLOG\n"); 8180#endif 8181 break; 8182 } 8183 case XML_PARSER_COMMENT: 8184 xmlGenericError(xmlGenericErrorContext, 8185 "PP: internal error, state == COMMENT\n"); 8186 ctxt->instate = XML_PARSER_CONTENT; 8187#ifdef DEBUG_PUSH 8188 xmlGenericError(xmlGenericErrorContext, 8189 "PP: entering CONTENT\n"); 8190#endif 8191 break; 8192 case XML_PARSER_PI: 8193 xmlGenericError(xmlGenericErrorContext, 8194 "PP: internal error, state == PI\n"); 8195 ctxt->instate = XML_PARSER_CONTENT; 8196#ifdef DEBUG_PUSH 8197 xmlGenericError(xmlGenericErrorContext, 8198 "PP: entering CONTENT\n"); 8199#endif 8200 break; 8201 case XML_PARSER_ENTITY_DECL: 8202 xmlGenericError(xmlGenericErrorContext, 8203 "PP: internal error, state == ENTITY_DECL\n"); 8204 ctxt->instate = XML_PARSER_DTD; 8205#ifdef DEBUG_PUSH 8206 xmlGenericError(xmlGenericErrorContext, 8207 "PP: entering DTD\n"); 8208#endif 8209 break; 8210 case XML_PARSER_ENTITY_VALUE: 8211 xmlGenericError(xmlGenericErrorContext, 8212 "PP: internal error, state == ENTITY_VALUE\n"); 8213 ctxt->instate = XML_PARSER_CONTENT; 8214#ifdef DEBUG_PUSH 8215 xmlGenericError(xmlGenericErrorContext, 8216 "PP: entering DTD\n"); 8217#endif 8218 break; 8219 case XML_PARSER_ATTRIBUTE_VALUE: 8220 xmlGenericError(xmlGenericErrorContext, 8221 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 8222 ctxt->instate = XML_PARSER_START_TAG; 8223#ifdef DEBUG_PUSH 8224 xmlGenericError(xmlGenericErrorContext, 8225 "PP: entering START_TAG\n"); 8226#endif 8227 break; 8228 case XML_PARSER_SYSTEM_LITERAL: 8229 xmlGenericError(xmlGenericErrorContext, 8230 "PP: internal error, state == SYSTEM_LITERAL\n"); 8231 ctxt->instate = XML_PARSER_START_TAG; 8232#ifdef DEBUG_PUSH 8233 xmlGenericError(xmlGenericErrorContext, 8234 "PP: entering START_TAG\n"); 8235#endif 8236 break; 8237 } 8238 } 8239done: 8240#ifdef DEBUG_PUSH 8241 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 8242#endif 8243 return(ret); 8244} 8245 8246/** 8247 * xmlParseTry: 8248 * @ctxt: an XML parser context 8249 * 8250 * Try to progress on parsing 8251 * 8252 * Returns zero if no parsing was possible 8253 */ 8254int 8255xmlParseTry(xmlParserCtxtPtr ctxt) { 8256 return(xmlParseTryOrFinish(ctxt, 0)); 8257} 8258 8259/** 8260 * xmlParseChunk: 8261 * @ctxt: an XML parser context 8262 * @chunk: an char array 8263 * @size: the size in byte of the chunk 8264 * @terminate: last chunk indicator 8265 * 8266 * Parse a Chunk of memory 8267 * 8268 * Returns zero if no error, the xmlParserErrors otherwise. 8269 */ 8270int 8271xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 8272 int terminate) { 8273 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8274 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 8275 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 8276 int cur = ctxt->input->cur - ctxt->input->base; 8277 8278 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8279 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8280 ctxt->input->cur = ctxt->input->base + cur; 8281 ctxt->input->end = 8282 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 8283#ifdef DEBUG_PUSH 8284 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8285#endif 8286 8287 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 8288 xmlParseTryOrFinish(ctxt, terminate); 8289 } else if (ctxt->instate != XML_PARSER_EOF) { 8290 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 8291 xmlParserInputBufferPtr in = ctxt->input->buf; 8292 if ((in->encoder != NULL) && (in->buffer != NULL) && 8293 (in->raw != NULL)) { 8294 int nbchars; 8295 8296 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 8297 if (nbchars < 0) { 8298 xmlGenericError(xmlGenericErrorContext, 8299 "xmlParseChunk: encoder error\n"); 8300 return(XML_ERR_INVALID_ENCODING); 8301 } 8302 } 8303 } 8304 } 8305 xmlParseTryOrFinish(ctxt, terminate); 8306 if (terminate) { 8307 /* 8308 * Check for termination 8309 */ 8310 if ((ctxt->instate != XML_PARSER_EOF) && 8311 (ctxt->instate != XML_PARSER_EPILOG)) { 8312 ctxt->errNo = XML_ERR_DOCUMENT_END; 8313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8314 ctxt->sax->error(ctxt->userData, 8315 "Extra content at the end of the document\n"); 8316 ctxt->wellFormed = 0; 8317 ctxt->disableSAX = 1; 8318 } 8319 if (ctxt->instate != XML_PARSER_EOF) { 8320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && 8321 (!ctxt->disableSAX)) 8322 ctxt->sax->endDocument(ctxt->userData); 8323 } 8324 ctxt->instate = XML_PARSER_EOF; 8325 } 8326 return((xmlParserErrors) ctxt->errNo); 8327} 8328 8329/************************************************************************ 8330 * * 8331 * I/O front end functions to the parser * 8332 * * 8333 ************************************************************************/ 8334 8335/** 8336 * xmlStopParser: 8337 * @ctxt: an XML parser context 8338 * 8339 * Blocks further parser processing 8340 */ 8341void 8342xmlStopParser(xmlParserCtxtPtr ctxt) { 8343 ctxt->instate = XML_PARSER_EOF; 8344 if (ctxt->input != NULL) 8345 ctxt->input->cur = BAD_CAST""; 8346} 8347 8348/** 8349 * xmlCreatePushParserCtxt: 8350 * @sax: a SAX handler 8351 * @user_data: The user data returned on SAX callbacks 8352 * @chunk: a pointer to an array of chars 8353 * @size: number of chars in the array 8354 * @filename: an optional file name or URI 8355 * 8356 * Create a parser context for using the XML parser in push mode 8357 * To allow content encoding detection, @size should be >= 4 8358 * The value of @filename is used for fetching external entities 8359 * and error/warning reports. 8360 * 8361 * Returns the new parser context or NULL 8362 */ 8363xmlParserCtxtPtr 8364xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8365 const char *chunk, int size, const char *filename) { 8366 xmlParserCtxtPtr ctxt; 8367 xmlParserInputPtr inputStream; 8368 xmlParserInputBufferPtr buf; 8369 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 8370 8371 /* 8372 * plug some encoding conversion routines 8373 */ 8374 if ((chunk != NULL) && (size >= 4)) 8375 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 8376 8377 buf = xmlAllocParserInputBuffer(enc); 8378 if (buf == NULL) return(NULL); 8379 8380 ctxt = xmlNewParserCtxt(); 8381 if (ctxt == NULL) { 8382 xmlFree(buf); 8383 return(NULL); 8384 } 8385 if (sax != NULL) { 8386 if (ctxt->sax != &xmlDefaultSAXHandler) 8387 xmlFree(ctxt->sax); 8388 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8389 if (ctxt->sax == NULL) { 8390 xmlFree(buf); 8391 xmlFree(ctxt); 8392 return(NULL); 8393 } 8394 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8395 if (user_data != NULL) 8396 ctxt->userData = user_data; 8397 } 8398 if (filename == NULL) { 8399 ctxt->directory = NULL; 8400 } else { 8401 ctxt->directory = xmlParserGetDirectory(filename); 8402 } 8403 8404 inputStream = xmlNewInputStream(ctxt); 8405 if (inputStream == NULL) { 8406 xmlFreeParserCtxt(ctxt); 8407 return(NULL); 8408 } 8409 8410 if (filename == NULL) 8411 inputStream->filename = NULL; 8412 else 8413 inputStream->filename = xmlMemStrdup(filename); 8414 inputStream->buf = buf; 8415 inputStream->base = inputStream->buf->buffer->content; 8416 inputStream->cur = inputStream->buf->buffer->content; 8417 inputStream->end = 8418 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 8419 if (enc != XML_CHAR_ENCODING_NONE) { 8420 xmlSwitchEncoding(ctxt, enc); 8421 } 8422 8423 inputPush(ctxt, inputStream); 8424 8425 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 8426 (ctxt->input->buf != NULL)) { 8427 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 8428#ifdef DEBUG_PUSH 8429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 8430#endif 8431 } 8432 8433 return(ctxt); 8434} 8435 8436/** 8437 * xmlCreateIOParserCtxt: 8438 * @sax: a SAX handler 8439 * @user_data: The user data returned on SAX callbacks 8440 * @ioread: an I/O read function 8441 * @ioclose: an I/O close function 8442 * @ioctx: an I/O handler 8443 * @enc: the charset encoding if known 8444 * 8445 * Create a parser context for using the XML parser with an existing 8446 * I/O stream 8447 * 8448 * Returns the new parser context or NULL 8449 */ 8450xmlParserCtxtPtr 8451xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 8452 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 8453 void *ioctx, xmlCharEncoding enc) { 8454 xmlParserCtxtPtr ctxt; 8455 xmlParserInputPtr inputStream; 8456 xmlParserInputBufferPtr buf; 8457 8458 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 8459 if (buf == NULL) return(NULL); 8460 8461 ctxt = xmlNewParserCtxt(); 8462 if (ctxt == NULL) { 8463 xmlFree(buf); 8464 return(NULL); 8465 } 8466 if (sax != NULL) { 8467 if (ctxt->sax != &xmlDefaultSAXHandler) 8468 xmlFree(ctxt->sax); 8469 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 8470 if (ctxt->sax == NULL) { 8471 xmlFree(buf); 8472 xmlFree(ctxt); 8473 return(NULL); 8474 } 8475 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 8476 if (user_data != NULL) 8477 ctxt->userData = user_data; 8478 } 8479 8480 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 8481 if (inputStream == NULL) { 8482 xmlFreeParserCtxt(ctxt); 8483 return(NULL); 8484 } 8485 inputPush(ctxt, inputStream); 8486 8487 return(ctxt); 8488} 8489 8490/************************************************************************ 8491 * * 8492 * Front ends when parsing a Dtd * 8493 * * 8494 ************************************************************************/ 8495 8496/** 8497 * xmlIOParseDTD: 8498 * @sax: the SAX handler block or NULL 8499 * @input: an Input Buffer 8500 * @enc: the charset encoding if known 8501 * 8502 * Load and parse a DTD 8503 * 8504 * Returns the resulting xmlDtdPtr or NULL in case of error. 8505 * @input will be freed at parsing end. 8506 */ 8507 8508xmlDtdPtr 8509xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 8510 xmlCharEncoding enc) { 8511 xmlDtdPtr ret = NULL; 8512 xmlParserCtxtPtr ctxt; 8513 xmlParserInputPtr pinput = NULL; 8514 8515 if (input == NULL) 8516 return(NULL); 8517 8518 ctxt = xmlNewParserCtxt(); 8519 if (ctxt == NULL) { 8520 return(NULL); 8521 } 8522 8523 /* 8524 * Set-up the SAX context 8525 */ 8526 if (sax != NULL) { 8527 if (ctxt->sax != NULL) 8528 xmlFree(ctxt->sax); 8529 ctxt->sax = sax; 8530 ctxt->userData = NULL; 8531 } 8532 8533 /* 8534 * generate a parser input from the I/O handler 8535 */ 8536 8537 pinput = xmlNewIOInputStream(ctxt, input, enc); 8538 if (pinput == NULL) { 8539 if (sax != NULL) ctxt->sax = NULL; 8540 xmlFreeParserCtxt(ctxt); 8541 return(NULL); 8542 } 8543 8544 /* 8545 * plug some encoding conversion routines here. 8546 */ 8547 xmlPushInput(ctxt, pinput); 8548 8549 pinput->filename = NULL; 8550 pinput->line = 1; 8551 pinput->col = 1; 8552 pinput->base = ctxt->input->cur; 8553 pinput->cur = ctxt->input->cur; 8554 pinput->free = NULL; 8555 8556 /* 8557 * let's parse that entity knowing it's an external subset. 8558 */ 8559 ctxt->inSubset = 2; 8560 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8561 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8562 BAD_CAST "none", BAD_CAST "none"); 8563 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 8564 8565 if (ctxt->myDoc != NULL) { 8566 if (ctxt->wellFormed) { 8567 ret = ctxt->myDoc->extSubset; 8568 ctxt->myDoc->extSubset = NULL; 8569 } else { 8570 ret = NULL; 8571 } 8572 xmlFreeDoc(ctxt->myDoc); 8573 ctxt->myDoc = NULL; 8574 } 8575 if (sax != NULL) ctxt->sax = NULL; 8576 xmlFreeParserCtxt(ctxt); 8577 8578 return(ret); 8579} 8580 8581/** 8582 * xmlSAXParseDTD: 8583 * @sax: the SAX handler block 8584 * @ExternalID: a NAME* containing the External ID of the DTD 8585 * @SystemID: a NAME* containing the URL to the DTD 8586 * 8587 * Load and parse an external subset. 8588 * 8589 * Returns the resulting xmlDtdPtr or NULL in case of error. 8590 */ 8591 8592xmlDtdPtr 8593xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 8594 const xmlChar *SystemID) { 8595 xmlDtdPtr ret = NULL; 8596 xmlParserCtxtPtr ctxt; 8597 xmlParserInputPtr input = NULL; 8598 xmlCharEncoding enc; 8599 8600 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 8601 8602 ctxt = xmlNewParserCtxt(); 8603 if (ctxt == NULL) { 8604 return(NULL); 8605 } 8606 8607 /* 8608 * Set-up the SAX context 8609 */ 8610 if (sax != NULL) { 8611 if (ctxt->sax != NULL) 8612 xmlFree(ctxt->sax); 8613 ctxt->sax = sax; 8614 ctxt->userData = NULL; 8615 } 8616 8617 /* 8618 * Ask the Entity resolver to load the damn thing 8619 */ 8620 8621 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8622 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8623 if (input == NULL) { 8624 if (sax != NULL) ctxt->sax = NULL; 8625 xmlFreeParserCtxt(ctxt); 8626 return(NULL); 8627 } 8628 8629 /* 8630 * plug some encoding conversion routines here. 8631 */ 8632 xmlPushInput(ctxt, input); 8633 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 8634 xmlSwitchEncoding(ctxt, enc); 8635 8636 if (input->filename == NULL) 8637 input->filename = (char *) xmlStrdup(SystemID); 8638 input->line = 1; 8639 input->col = 1; 8640 input->base = ctxt->input->cur; 8641 input->cur = ctxt->input->cur; 8642 input->free = NULL; 8643 8644 /* 8645 * let's parse that entity knowing it's an external subset. 8646 */ 8647 ctxt->inSubset = 2; 8648 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8649 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 8650 ExternalID, SystemID); 8651 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8652 8653 if (ctxt->myDoc != NULL) { 8654 if (ctxt->wellFormed) { 8655 ret = ctxt->myDoc->extSubset; 8656 ctxt->myDoc->extSubset = NULL; 8657 } else { 8658 ret = NULL; 8659 } 8660 xmlFreeDoc(ctxt->myDoc); 8661 ctxt->myDoc = NULL; 8662 } 8663 if (sax != NULL) ctxt->sax = NULL; 8664 xmlFreeParserCtxt(ctxt); 8665 8666 return(ret); 8667} 8668 8669/** 8670 * xmlParseDTD: 8671 * @ExternalID: a NAME* containing the External ID of the DTD 8672 * @SystemID: a NAME* containing the URL to the DTD 8673 * 8674 * Load and parse an external subset. 8675 * 8676 * Returns the resulting xmlDtdPtr or NULL in case of error. 8677 */ 8678 8679xmlDtdPtr 8680xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 8681 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 8682} 8683 8684/************************************************************************ 8685 * * 8686 * Front ends when parsing an Entity * 8687 * * 8688 ************************************************************************/ 8689 8690/** 8691 * xmlSAXParseBalancedChunk: 8692 * @ctx: an XML parser context (possibly NULL) 8693 * @sax: the SAX handler bloc (possibly NULL) 8694 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8695 * @input: a parser input stream 8696 * @enc: the encoding 8697 * 8698 * Parse a well-balanced chunk of an XML document 8699 * The user has to provide SAX callback block whose routines will be 8700 * called by the parser 8701 * The allowed sequence for the Well Balanced Chunk is the one defined by 8702 * the content production in the XML grammar: 8703 * 8704 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8705 * 8706 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 8707 * the error code otherwise 8708 */ 8709 8710int 8711xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax, 8712 void *user_data, xmlParserInputPtr input, 8713 xmlCharEncoding enc) { 8714 xmlParserCtxtPtr ctxt; 8715 int ret; 8716 8717 if (input == NULL) return(-1); 8718 8719 if (ctx != NULL) 8720 ctxt = ctx; 8721 else { 8722 ctxt = xmlNewParserCtxt(); 8723 if (ctxt == NULL) 8724 return(-1); 8725 if (sax == NULL) 8726 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8727 } 8728 8729 /* 8730 * Set-up the SAX context 8731 */ 8732 if (sax != NULL) { 8733 if (ctxt->sax != NULL) 8734 xmlFree(ctxt->sax); 8735 ctxt->sax = sax; 8736 ctxt->userData = user_data; 8737 } 8738 8739 /* 8740 * plug some encoding conversion routines here. 8741 */ 8742 xmlPushInput(ctxt, input); 8743 if (enc != XML_CHAR_ENCODING_NONE) 8744 xmlSwitchEncoding(ctxt, enc); 8745 8746 /* 8747 * let's parse that entity knowing it's an external subset. 8748 */ 8749 xmlParseContent(ctxt); 8750 ret = ctxt->errNo; 8751 8752 if (ctx == NULL) { 8753 if (sax != NULL) 8754 ctxt->sax = NULL; 8755 else 8756 xmlFreeDoc(ctxt->myDoc); 8757 xmlFreeParserCtxt(ctxt); 8758 } 8759 return(ret); 8760} 8761 8762/** 8763 * xmlParseCtxtExternalEntity: 8764 * @ctx: the existing parsing context 8765 * @URL: the URL for the entity to load 8766 * @ID: the System ID for the entity to load 8767 * @list: the return value for the set of parsed nodes 8768 * 8769 * Parse an external general entity within an existing parsing context 8770 * An external general parsed entity is well-formed if it matches the 8771 * production labeled extParsedEnt. 8772 * 8773 * [78] extParsedEnt ::= TextDecl? content 8774 * 8775 * Returns 0 if the entity is well formed, -1 in case of args problem and 8776 * the parser error code otherwise 8777 */ 8778 8779int 8780xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 8781 const xmlChar *ID, xmlNodePtr *list) { 8782 xmlParserCtxtPtr ctxt; 8783 xmlDocPtr newDoc; 8784 xmlSAXHandlerPtr oldsax = NULL; 8785 int ret = 0; 8786 8787 if (ctx->depth > 40) { 8788 return(XML_ERR_ENTITY_LOOP); 8789 } 8790 8791 if (list != NULL) 8792 *list = NULL; 8793 if ((URL == NULL) && (ID == NULL)) 8794 return(-1); 8795 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 8796 return(-1); 8797 8798 8799 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8800 if (ctxt == NULL) return(-1); 8801 ctxt->userData = ctxt; 8802 oldsax = ctxt->sax; 8803 ctxt->sax = ctx->sax; 8804 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8805 if (newDoc == NULL) { 8806 xmlFreeParserCtxt(ctxt); 8807 return(-1); 8808 } 8809 if (ctx->myDoc != NULL) { 8810 newDoc->intSubset = ctx->myDoc->intSubset; 8811 newDoc->extSubset = ctx->myDoc->extSubset; 8812 } 8813 if (ctx->myDoc->URL != NULL) { 8814 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 8815 } 8816 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8817 if (newDoc->children == NULL) { 8818 ctxt->sax = oldsax; 8819 xmlFreeParserCtxt(ctxt); 8820 newDoc->intSubset = NULL; 8821 newDoc->extSubset = NULL; 8822 xmlFreeDoc(newDoc); 8823 return(-1); 8824 } 8825 nodePush(ctxt, newDoc->children); 8826 if (ctx->myDoc == NULL) { 8827 ctxt->myDoc = newDoc; 8828 } else { 8829 ctxt->myDoc = ctx->myDoc; 8830 newDoc->children->doc = ctx->myDoc; 8831 } 8832 8833 /* 8834 * Parse a possible text declaration first 8835 */ 8836 GROW; 8837 if ((RAW == '<') && (NXT(1) == '?') && 8838 (NXT(2) == 'x') && (NXT(3) == 'm') && 8839 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 8840 xmlParseTextDecl(ctxt); 8841 } 8842 8843 /* 8844 * Doing validity checking on chunk doesn't make sense 8845 */ 8846 ctxt->instate = XML_PARSER_CONTENT; 8847 ctxt->validate = ctx->validate; 8848 ctxt->loadsubset = ctx->loadsubset; 8849 ctxt->depth = ctx->depth + 1; 8850 ctxt->replaceEntities = ctx->replaceEntities; 8851 if (ctxt->validate) { 8852 ctxt->vctxt.error = ctx->vctxt.error; 8853 ctxt->vctxt.warning = ctx->vctxt.warning; 8854 /* Allocate the Node stack */ 8855 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); 8856 if (ctxt->vctxt.nodeTab == NULL) { 8857 xmlGenericError(xmlGenericErrorContext, 8858 "xmlParseCtxtExternalEntity: out of memory\n"); 8859 ctxt->validate = 0; 8860 ctxt->vctxt.error = NULL; 8861 ctxt->vctxt.warning = NULL; 8862 } else { 8863 ctxt->vctxt.nodeNr = 0; 8864 ctxt->vctxt.nodeMax = 4; 8865 ctxt->vctxt.node = NULL; 8866 } 8867 } else { 8868 ctxt->vctxt.error = NULL; 8869 ctxt->vctxt.warning = NULL; 8870 } 8871 8872 xmlParseContent(ctxt); 8873 8874 if ((RAW == '<') && (NXT(1) == '/')) { 8875 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8877 ctxt->sax->error(ctxt->userData, 8878 "chunk is not well balanced\n"); 8879 ctxt->wellFormed = 0; 8880 ctxt->disableSAX = 1; 8881 } else if (RAW != 0) { 8882 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 8883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8884 ctxt->sax->error(ctxt->userData, 8885 "extra content at the end of well balanced chunk\n"); 8886 ctxt->wellFormed = 0; 8887 ctxt->disableSAX = 1; 8888 } 8889 if (ctxt->node != newDoc->children) { 8890 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 8891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8892 ctxt->sax->error(ctxt->userData, 8893 "chunk is not well balanced\n"); 8894 ctxt->wellFormed = 0; 8895 ctxt->disableSAX = 1; 8896 } 8897 8898 if (!ctxt->wellFormed) { 8899 if (ctxt->errNo == 0) 8900 ret = 1; 8901 else 8902 ret = ctxt->errNo; 8903 } else { 8904 if (list != NULL) { 8905 xmlNodePtr cur; 8906 8907 /* 8908 * Return the newly created nodeset after unlinking it from 8909 * they pseudo parent. 8910 */ 8911 cur = newDoc->children->children; 8912 *list = cur; 8913 while (cur != NULL) { 8914 cur->parent = NULL; 8915 cur = cur->next; 8916 } 8917 newDoc->children->children = NULL; 8918 } 8919 ret = 0; 8920 } 8921 ctxt->sax = oldsax; 8922 xmlFreeParserCtxt(ctxt); 8923 newDoc->intSubset = NULL; 8924 newDoc->extSubset = NULL; 8925 xmlFreeDoc(newDoc); 8926 8927 return(ret); 8928} 8929 8930/** 8931 * xmlParseExternalEntity: 8932 * @doc: the document the chunk pertains to 8933 * @sax: the SAX handler bloc (possibly NULL) 8934 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8935 * @depth: Used for loop detection, use 0 8936 * @URL: the URL for the entity to load 8937 * @ID: the System ID for the entity to load 8938 * @list: the return value for the set of parsed nodes 8939 * 8940 * Parse an external general entity 8941 * An external general parsed entity is well-formed if it matches the 8942 * production labeled extParsedEnt. 8943 * 8944 * [78] extParsedEnt ::= TextDecl? content 8945 * 8946 * Returns 0 if the entity is well formed, -1 in case of args problem and 8947 * the parser error code otherwise 8948 */ 8949 8950int 8951xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 8952 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) { 8953 xmlParserCtxtPtr ctxt; 8954 xmlDocPtr newDoc; 8955 xmlSAXHandlerPtr oldsax = NULL; 8956 int ret = 0; 8957 8958 if (depth > 40) { 8959 return(XML_ERR_ENTITY_LOOP); 8960 } 8961 8962 8963 8964 if (list != NULL) 8965 *list = NULL; 8966 if ((URL == NULL) && (ID == NULL)) 8967 return(-1); 8968 if (doc == NULL) /* @@ relax but check for dereferences */ 8969 return(-1); 8970 8971 8972 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 8973 if (ctxt == NULL) return(-1); 8974 ctxt->userData = ctxt; 8975 if (sax != NULL) { 8976 oldsax = ctxt->sax; 8977 ctxt->sax = sax; 8978 if (user_data != NULL) 8979 ctxt->userData = user_data; 8980 } 8981 newDoc = xmlNewDoc(BAD_CAST "1.0"); 8982 if (newDoc == NULL) { 8983 xmlFreeParserCtxt(ctxt); 8984 return(-1); 8985 } 8986 if (doc != NULL) { 8987 newDoc->intSubset = doc->intSubset; 8988 newDoc->extSubset = doc->extSubset; 8989 } 8990 if (doc->URL != NULL) { 8991 newDoc->URL = xmlStrdup(doc->URL); 8992 } 8993 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 8994 if (newDoc->children == NULL) { 8995 if (sax != NULL) 8996 ctxt->sax = oldsax; 8997 xmlFreeParserCtxt(ctxt); 8998 newDoc->intSubset = NULL; 8999 newDoc->extSubset = NULL; 9000 xmlFreeDoc(newDoc); 9001 return(-1); 9002 } 9003 nodePush(ctxt, newDoc->children); 9004 if (doc == NULL) { 9005 ctxt->myDoc = newDoc; 9006 } else { 9007 ctxt->myDoc = doc; 9008 newDoc->children->doc = doc; 9009 } 9010 9011 /* 9012 * Parse a possible text declaration first 9013 */ 9014 GROW; 9015 if ((RAW == '<') && (NXT(1) == '?') && 9016 (NXT(2) == 'x') && (NXT(3) == 'm') && 9017 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 9018 xmlParseTextDecl(ctxt); 9019 } 9020 9021 /* 9022 * Doing validity checking on chunk doesn't make sense 9023 */ 9024 ctxt->instate = XML_PARSER_CONTENT; 9025 ctxt->validate = 0; 9026 ctxt->loadsubset = 0; 9027 ctxt->depth = depth; 9028 9029 xmlParseContent(ctxt); 9030 9031 if ((RAW == '<') && (NXT(1) == '/')) { 9032 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9034 ctxt->sax->error(ctxt->userData, 9035 "chunk is not well balanced\n"); 9036 ctxt->wellFormed = 0; 9037 ctxt->disableSAX = 1; 9038 } else if (RAW != 0) { 9039 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9041 ctxt->sax->error(ctxt->userData, 9042 "extra content at the end of well balanced chunk\n"); 9043 ctxt->wellFormed = 0; 9044 ctxt->disableSAX = 1; 9045 } 9046 if (ctxt->node != newDoc->children) { 9047 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9049 ctxt->sax->error(ctxt->userData, 9050 "chunk is not well balanced\n"); 9051 ctxt->wellFormed = 0; 9052 ctxt->disableSAX = 1; 9053 } 9054 9055 if (!ctxt->wellFormed) { 9056 if (ctxt->errNo == 0) 9057 ret = 1; 9058 else 9059 ret = ctxt->errNo; 9060 } else { 9061 if (list != NULL) { 9062 xmlNodePtr cur; 9063 9064 /* 9065 * Return the newly created nodeset after unlinking it from 9066 * they pseudo parent. 9067 */ 9068 cur = newDoc->children->children; 9069 *list = cur; 9070 while (cur != NULL) { 9071 cur->parent = NULL; 9072 cur = cur->next; 9073 } 9074 newDoc->children->children = NULL; 9075 } 9076 ret = 0; 9077 } 9078 if (sax != NULL) 9079 ctxt->sax = oldsax; 9080 xmlFreeParserCtxt(ctxt); 9081 newDoc->intSubset = NULL; 9082 newDoc->extSubset = NULL; 9083 xmlFreeDoc(newDoc); 9084 9085 return(ret); 9086} 9087 9088/** 9089 * xmlParseBalancedChunk: 9090 * @doc: the document the chunk pertains to 9091 * @sax: the SAX handler bloc (possibly NULL) 9092 * @user_data: The user data returned on SAX callbacks (possibly NULL) 9093 * @depth: Used for loop detection, use 0 9094 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 9095 * @list: the return value for the set of parsed nodes 9096 * 9097 * Parse a well-balanced chunk of an XML document 9098 * called by the parser 9099 * The allowed sequence for the Well Balanced Chunk is the one defined by 9100 * the content production in the XML grammar: 9101 * 9102 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9103 * 9104 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 9105 * the parser error code otherwise 9106 */ 9107 9108int 9109xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 9110 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) { 9111 xmlParserCtxtPtr ctxt; 9112 xmlDocPtr newDoc; 9113 xmlSAXHandlerPtr oldsax = NULL; 9114 int size; 9115 int ret = 0; 9116 9117 if (depth > 40) { 9118 return(XML_ERR_ENTITY_LOOP); 9119 } 9120 9121 9122 if (list != NULL) 9123 *list = NULL; 9124 if (string == NULL) 9125 return(-1); 9126 9127 size = xmlStrlen(string); 9128 9129 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 9130 if (ctxt == NULL) return(-1); 9131 ctxt->userData = ctxt; 9132 if (sax != NULL) { 9133 oldsax = ctxt->sax; 9134 ctxt->sax = sax; 9135 if (user_data != NULL) 9136 ctxt->userData = user_data; 9137 } 9138 newDoc = xmlNewDoc(BAD_CAST "1.0"); 9139 if (newDoc == NULL) { 9140 xmlFreeParserCtxt(ctxt); 9141 return(-1); 9142 } 9143 if (doc != NULL) { 9144 newDoc->intSubset = doc->intSubset; 9145 newDoc->extSubset = doc->extSubset; 9146 } 9147 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 9148 if (newDoc->children == NULL) { 9149 if (sax != NULL) 9150 ctxt->sax = oldsax; 9151 xmlFreeParserCtxt(ctxt); 9152 newDoc->intSubset = NULL; 9153 newDoc->extSubset = NULL; 9154 xmlFreeDoc(newDoc); 9155 return(-1); 9156 } 9157 nodePush(ctxt, newDoc->children); 9158 if (doc == NULL) { 9159 ctxt->myDoc = newDoc; 9160 } else { 9161 ctxt->myDoc = doc; 9162 newDoc->children->doc = doc; 9163 } 9164 ctxt->instate = XML_PARSER_CONTENT; 9165 ctxt->depth = depth; 9166 9167 /* 9168 * Doing validity checking on chunk doesn't make sense 9169 */ 9170 ctxt->validate = 0; 9171 ctxt->loadsubset = 0; 9172 9173 xmlParseContent(ctxt); 9174 9175 if ((RAW == '<') && (NXT(1) == '/')) { 9176 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9178 ctxt->sax->error(ctxt->userData, 9179 "chunk is not well balanced\n"); 9180 ctxt->wellFormed = 0; 9181 ctxt->disableSAX = 1; 9182 } else if (RAW != 0) { 9183 ctxt->errNo = XML_ERR_EXTRA_CONTENT; 9184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9185 ctxt->sax->error(ctxt->userData, 9186 "extra content at the end of well balanced chunk\n"); 9187 ctxt->wellFormed = 0; 9188 ctxt->disableSAX = 1; 9189 } 9190 if (ctxt->node != newDoc->children) { 9191 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; 9192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 9193 ctxt->sax->error(ctxt->userData, 9194 "chunk is not well balanced\n"); 9195 ctxt->wellFormed = 0; 9196 ctxt->disableSAX = 1; 9197 } 9198 9199 if (!ctxt->wellFormed) { 9200 if (ctxt->errNo == 0) 9201 ret = 1; 9202 else 9203 ret = ctxt->errNo; 9204 } else { 9205 if (list != NULL) { 9206 xmlNodePtr cur; 9207 9208 /* 9209 * Return the newly created nodeset after unlinking it from 9210 * they pseudo parent. 9211 */ 9212 cur = newDoc->children->children; 9213 *list = cur; 9214 while (cur != NULL) { 9215 cur->parent = NULL; 9216 cur = cur->next; 9217 } 9218 newDoc->children->children = NULL; 9219 } 9220 ret = 0; 9221 } 9222 if (sax != NULL) 9223 ctxt->sax = oldsax; 9224 xmlFreeParserCtxt(ctxt); 9225 newDoc->intSubset = NULL; 9226 newDoc->extSubset = NULL; 9227 xmlFreeDoc(newDoc); 9228 9229 return(ret); 9230} 9231 9232/** 9233 * xmlSAXParseEntity: 9234 * @sax: the SAX handler block 9235 * @filename: the filename 9236 * 9237 * parse an XML external entity out of context and build a tree. 9238 * It use the given SAX function block to handle the parsing callback. 9239 * If sax is NULL, fallback to the default DOM tree building routines. 9240 * 9241 * [78] extParsedEnt ::= TextDecl? content 9242 * 9243 * This correspond to a "Well Balanced" chunk 9244 * 9245 * Returns the resulting document tree 9246 */ 9247 9248xmlDocPtr 9249xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 9250 xmlDocPtr ret; 9251 xmlParserCtxtPtr ctxt; 9252 char *directory = NULL; 9253 9254 ctxt = xmlCreateFileParserCtxt(filename); 9255 if (ctxt == NULL) { 9256 return(NULL); 9257 } 9258 if (sax != NULL) { 9259 if (ctxt->sax != NULL) 9260 xmlFree(ctxt->sax); 9261 ctxt->sax = sax; 9262 ctxt->userData = NULL; 9263 } 9264 9265 if ((ctxt->directory == NULL) && (directory == NULL)) 9266 directory = xmlParserGetDirectory(filename); 9267 9268 xmlParseExtParsedEnt(ctxt); 9269 9270 if (ctxt->wellFormed) 9271 ret = ctxt->myDoc; 9272 else { 9273 ret = NULL; 9274 xmlFreeDoc(ctxt->myDoc); 9275 ctxt->myDoc = NULL; 9276 } 9277 if (sax != NULL) 9278 ctxt->sax = NULL; 9279 xmlFreeParserCtxt(ctxt); 9280 9281 return(ret); 9282} 9283 9284/** 9285 * xmlParseEntity: 9286 * @filename: the filename 9287 * 9288 * parse an XML external entity out of context and build a tree. 9289 * 9290 * [78] extParsedEnt ::= TextDecl? content 9291 * 9292 * This correspond to a "Well Balanced" chunk 9293 * 9294 * Returns the resulting document tree 9295 */ 9296 9297xmlDocPtr 9298xmlParseEntity(const char *filename) { 9299 return(xmlSAXParseEntity(NULL, filename)); 9300} 9301 9302/** 9303 * xmlCreateEntityParserCtxt: 9304 * @URL: the entity URL 9305 * @ID: the entity PUBLIC ID 9306 * @base: a posible base for the target URI 9307 * 9308 * Create a parser context for an external entity 9309 * Automatic support for ZLIB/Compress compressed document is provided 9310 * by default if found at compile-time. 9311 * 9312 * Returns the new parser context or NULL 9313 */ 9314xmlParserCtxtPtr 9315xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 9316 const xmlChar *base) { 9317 xmlParserCtxtPtr ctxt; 9318 xmlParserInputPtr inputStream; 9319 char *directory = NULL; 9320 xmlChar *uri; 9321 9322 ctxt = xmlNewParserCtxt(); 9323 if (ctxt == NULL) { 9324 return(NULL); 9325 } 9326 9327 uri = xmlBuildURI(URL, base); 9328 9329 if (uri == NULL) { 9330 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 9331 if (inputStream == NULL) { 9332 xmlFreeParserCtxt(ctxt); 9333 return(NULL); 9334 } 9335 9336 inputPush(ctxt, inputStream); 9337 9338 if ((ctxt->directory == NULL) && (directory == NULL)) 9339 directory = xmlParserGetDirectory((char *)URL); 9340 if ((ctxt->directory == NULL) && (directory != NULL)) 9341 ctxt->directory = directory; 9342 } else { 9343 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 9344 if (inputStream == NULL) { 9345 xmlFree(uri); 9346 xmlFreeParserCtxt(ctxt); 9347 return(NULL); 9348 } 9349 9350 inputPush(ctxt, inputStream); 9351 9352 if ((ctxt->directory == NULL) && (directory == NULL)) 9353 directory = xmlParserGetDirectory((char *)uri); 9354 if ((ctxt->directory == NULL) && (directory != NULL)) 9355 ctxt->directory = directory; 9356 xmlFree(uri); 9357 } 9358 9359 return(ctxt); 9360} 9361 9362/************************************************************************ 9363 * * 9364 * Front ends when parsing from a file * 9365 * * 9366 ************************************************************************/ 9367 9368/** 9369 * xmlCreateFileParserCtxt: 9370 * @filename: the filename 9371 * 9372 * Create a parser context for a file content. 9373 * Automatic support for ZLIB/Compress compressed document is provided 9374 * by default if found at compile-time. 9375 * 9376 * Returns the new parser context or NULL 9377 */ 9378xmlParserCtxtPtr 9379xmlCreateFileParserCtxt(const char *filename) 9380{ 9381 xmlParserCtxtPtr ctxt; 9382 xmlParserInputPtr inputStream; 9383 xmlParserInputBufferPtr buf; 9384 char *directory = NULL; 9385 9386 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 9387 if (buf == NULL) { 9388 return(NULL); 9389 } 9390 9391 ctxt = xmlNewParserCtxt(); 9392 if (ctxt == NULL) { 9393 if (xmlDefaultSAXHandler.error != NULL) { 9394 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 9395 } 9396 return(NULL); 9397 } 9398 9399 inputStream = xmlNewInputStream(ctxt); 9400 if (inputStream == NULL) { 9401 xmlFreeParserCtxt(ctxt); 9402 return(NULL); 9403 } 9404 9405 inputStream->filename = xmlMemStrdup(filename); 9406 inputStream->buf = buf; 9407 inputStream->base = inputStream->buf->buffer->content; 9408 inputStream->cur = inputStream->buf->buffer->content; 9409 inputStream->end = 9410 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 9411 9412 inputPush(ctxt, inputStream); 9413 if ((ctxt->directory == NULL) && (directory == NULL)) 9414 directory = xmlParserGetDirectory(filename); 9415 if ((ctxt->directory == NULL) && (directory != NULL)) 9416 ctxt->directory = directory; 9417 9418 return(ctxt); 9419} 9420 9421/** 9422 * xmlSAXParseFile: 9423 * @sax: the SAX handler block 9424 * @filename: the filename 9425 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9426 * documents 9427 * 9428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9429 * compressed document is provided by default if found at compile-time. 9430 * It use the given SAX function block to handle the parsing callback. 9431 * If sax is NULL, fallback to the default DOM tree building routines. 9432 * 9433 * Returns the resulting document tree 9434 */ 9435 9436xmlDocPtr 9437xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 9438 int recovery) { 9439 xmlDocPtr ret; 9440 xmlParserCtxtPtr ctxt; 9441 char *directory = NULL; 9442 9443 ctxt = xmlCreateFileParserCtxt(filename); 9444 if (ctxt == NULL) { 9445 return(NULL); 9446 } 9447 if (sax != NULL) { 9448 if (ctxt->sax != NULL) 9449 xmlFree(ctxt->sax); 9450 ctxt->sax = sax; 9451 ctxt->userData = NULL; 9452 } 9453 9454 if ((ctxt->directory == NULL) && (directory == NULL)) 9455 directory = xmlParserGetDirectory(filename); 9456 if ((ctxt->directory == NULL) && (directory != NULL)) 9457 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 9458 9459 xmlParseDocument(ctxt); 9460 9461 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9462 else { 9463 ret = NULL; 9464 xmlFreeDoc(ctxt->myDoc); 9465 ctxt->myDoc = NULL; 9466 } 9467 if (sax != NULL) 9468 ctxt->sax = NULL; 9469 xmlFreeParserCtxt(ctxt); 9470 9471 return(ret); 9472} 9473 9474/** 9475 * xmlRecoverDoc: 9476 * @cur: a pointer to an array of xmlChar 9477 * 9478 * parse an XML in-memory document and build a tree. 9479 * In the case the document is not Well Formed, a tree is built anyway 9480 * 9481 * Returns the resulting document tree 9482 */ 9483 9484xmlDocPtr 9485xmlRecoverDoc(xmlChar *cur) { 9486 return(xmlSAXParseDoc(NULL, cur, 1)); 9487} 9488 9489/** 9490 * xmlParseFile: 9491 * @filename: the filename 9492 * 9493 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9494 * compressed document is provided by default if found at compile-time. 9495 * 9496 * Returns the resulting document tree 9497 */ 9498 9499xmlDocPtr 9500xmlParseFile(const char *filename) { 9501 return(xmlSAXParseFile(NULL, filename, 0)); 9502} 9503 9504/** 9505 * xmlRecoverFile: 9506 * @filename: the filename 9507 * 9508 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 9509 * compressed document is provided by default if found at compile-time. 9510 * In the case the document is not Well Formed, a tree is built anyway 9511 * 9512 * Returns the resulting document tree 9513 */ 9514 9515xmlDocPtr 9516xmlRecoverFile(const char *filename) { 9517 return(xmlSAXParseFile(NULL, filename, 1)); 9518} 9519 9520 9521/** 9522 * xmlSetupParserForBuffer: 9523 * @ctxt: an XML parser context 9524 * @buffer: a xmlChar * buffer 9525 * @filename: a file name 9526 * 9527 * Setup the parser context to parse a new buffer; Clears any prior 9528 * contents from the parser context. The buffer parameter must not be 9529 * NULL, but the filename parameter can be 9530 */ 9531void 9532xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 9533 const char* filename) 9534{ 9535 xmlParserInputPtr input; 9536 9537 input = xmlNewInputStream(ctxt); 9538 if (input == NULL) { 9539 perror("malloc"); 9540 xmlFree(ctxt); 9541 return; 9542 } 9543 9544 xmlClearParserCtxt(ctxt); 9545 if (filename != NULL) 9546 input->filename = xmlMemStrdup(filename); 9547 input->base = buffer; 9548 input->cur = buffer; 9549 input->end = &buffer[xmlStrlen(buffer)]; 9550 inputPush(ctxt, input); 9551} 9552 9553/** 9554 * xmlSAXUserParseFile: 9555 * @sax: a SAX handler 9556 * @user_data: The user data returned on SAX callbacks 9557 * @filename: a file name 9558 * 9559 * parse an XML file and call the given SAX handler routines. 9560 * Automatic support for ZLIB/Compress compressed document is provided 9561 * 9562 * Returns 0 in case of success or a error number otherwise 9563 */ 9564int 9565xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 9566 const char *filename) { 9567 int ret = 0; 9568 xmlParserCtxtPtr ctxt; 9569 9570 ctxt = xmlCreateFileParserCtxt(filename); 9571 if (ctxt == NULL) return -1; 9572 if (ctxt->sax != &xmlDefaultSAXHandler) 9573 xmlFree(ctxt->sax); 9574 ctxt->sax = sax; 9575 if (user_data != NULL) 9576 ctxt->userData = user_data; 9577 9578 xmlParseDocument(ctxt); 9579 9580 if (ctxt->wellFormed) 9581 ret = 0; 9582 else { 9583 if (ctxt->errNo != 0) 9584 ret = ctxt->errNo; 9585 else 9586 ret = -1; 9587 } 9588 if (sax != NULL) 9589 ctxt->sax = NULL; 9590 xmlFreeParserCtxt(ctxt); 9591 9592 return ret; 9593} 9594 9595/************************************************************************ 9596 * * 9597 * Front ends when parsing from memory * 9598 * * 9599 ************************************************************************/ 9600 9601/** 9602 * xmlCreateMemoryParserCtxt: 9603 * @buffer: a pointer to a char array 9604 * @size: the size of the array 9605 * 9606 * Create a parser context for an XML in-memory document. 9607 * 9608 * Returns the new parser context or NULL 9609 */ 9610xmlParserCtxtPtr 9611xmlCreateMemoryParserCtxt(char *buffer, int size) { 9612 xmlParserCtxtPtr ctxt; 9613 xmlParserInputPtr input; 9614 xmlParserInputBufferPtr buf; 9615 9616 if (buffer == NULL) 9617 return(NULL); 9618 if (size <= 0) 9619 return(NULL); 9620 9621 ctxt = xmlNewParserCtxt(); 9622 if (ctxt == NULL) 9623 return(NULL); 9624 9625 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 9626 if (buf == NULL) return(NULL); 9627 9628 input = xmlNewInputStream(ctxt); 9629 if (input == NULL) { 9630 xmlFreeParserCtxt(ctxt); 9631 return(NULL); 9632 } 9633 9634 input->filename = NULL; 9635 input->buf = buf; 9636 input->base = input->buf->buffer->content; 9637 input->cur = input->buf->buffer->content; 9638 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 9639 9640 inputPush(ctxt, input); 9641 return(ctxt); 9642} 9643 9644/** 9645 * xmlSAXParseMemory: 9646 * @sax: the SAX handler block 9647 * @buffer: an pointer to a char array 9648 * @size: the size of the array 9649 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 9650 * documents 9651 * 9652 * parse an XML in-memory block and use the given SAX function block 9653 * to handle the parsing callback. If sax is NULL, fallback to the default 9654 * DOM tree building routines. 9655 * 9656 * Returns the resulting document tree 9657 */ 9658xmlDocPtr 9659xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) { 9660 xmlDocPtr ret; 9661 xmlParserCtxtPtr ctxt; 9662 9663 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9664 if (ctxt == NULL) return(NULL); 9665 if (sax != NULL) { 9666 ctxt->sax = sax; 9667 ctxt->userData = NULL; 9668 } 9669 9670 xmlParseDocument(ctxt); 9671 9672 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9673 else { 9674 ret = NULL; 9675 xmlFreeDoc(ctxt->myDoc); 9676 ctxt->myDoc = NULL; 9677 } 9678 if (sax != NULL) 9679 ctxt->sax = NULL; 9680 xmlFreeParserCtxt(ctxt); 9681 9682 return(ret); 9683} 9684 9685/** 9686 * xmlParseMemory: 9687 * @buffer: an pointer to a char array 9688 * @size: the size of the array 9689 * 9690 * parse an XML in-memory block and build a tree. 9691 * 9692 * Returns the resulting document tree 9693 */ 9694 9695xmlDocPtr xmlParseMemory(char *buffer, int size) { 9696 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 9697} 9698 9699/** 9700 * xmlRecoverMemory: 9701 * @buffer: an pointer to a char array 9702 * @size: the size of the array 9703 * 9704 * parse an XML in-memory block and build a tree. 9705 * In the case the document is not Well Formed, a tree is built anyway 9706 * 9707 * Returns the resulting document tree 9708 */ 9709 9710xmlDocPtr xmlRecoverMemory(char *buffer, int size) { 9711 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 9712} 9713 9714/** 9715 * xmlSAXUserParseMemory: 9716 * @sax: a SAX handler 9717 * @user_data: The user data returned on SAX callbacks 9718 * @buffer: an in-memory XML document input 9719 * @size: the length of the XML document in bytes 9720 * 9721 * A better SAX parsing routine. 9722 * parse an XML in-memory buffer and call the given SAX handler routines. 9723 * 9724 * Returns 0 in case of success or a error number otherwise 9725 */ 9726int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 9727 char *buffer, int size) { 9728 int ret = 0; 9729 xmlParserCtxtPtr ctxt; 9730 xmlSAXHandlerPtr oldsax = NULL; 9731 9732 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 9733 if (ctxt == NULL) return -1; 9734 if (sax != NULL) { 9735 oldsax = ctxt->sax; 9736 ctxt->sax = sax; 9737 } 9738 ctxt->userData = user_data; 9739 9740 xmlParseDocument(ctxt); 9741 9742 if (ctxt->wellFormed) 9743 ret = 0; 9744 else { 9745 if (ctxt->errNo != 0) 9746 ret = ctxt->errNo; 9747 else 9748 ret = -1; 9749 } 9750 if (sax != NULL) { 9751 ctxt->sax = oldsax; 9752 } 9753 xmlFreeParserCtxt(ctxt); 9754 9755 return ret; 9756} 9757 9758/** 9759 * xmlCreateDocParserCtxt: 9760 * @cur: a pointer to an array of xmlChar 9761 * 9762 * Creates a parser context for an XML in-memory document. 9763 * 9764 * Returns the new parser context or NULL 9765 */ 9766xmlParserCtxtPtr 9767xmlCreateDocParserCtxt(xmlChar *cur) { 9768 int len; 9769 9770 if (cur == NULL) 9771 return(NULL); 9772 len = xmlStrlen(cur); 9773 return(xmlCreateMemoryParserCtxt((char *)cur, len)); 9774} 9775 9776/** 9777 * xmlSAXParseDoc: 9778 * @sax: the SAX handler block 9779 * @cur: a pointer to an array of xmlChar 9780 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 9781 * documents 9782 * 9783 * parse an XML in-memory document and build a tree. 9784 * It use the given SAX function block to handle the parsing callback. 9785 * If sax is NULL, fallback to the default DOM tree building routines. 9786 * 9787 * Returns the resulting document tree 9788 */ 9789 9790xmlDocPtr 9791xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 9792 xmlDocPtr ret; 9793 xmlParserCtxtPtr ctxt; 9794 9795 if (cur == NULL) return(NULL); 9796 9797 9798 ctxt = xmlCreateDocParserCtxt(cur); 9799 if (ctxt == NULL) return(NULL); 9800 if (sax != NULL) { 9801 ctxt->sax = sax; 9802 ctxt->userData = NULL; 9803 } 9804 9805 xmlParseDocument(ctxt); 9806 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 9807 else { 9808 ret = NULL; 9809 xmlFreeDoc(ctxt->myDoc); 9810 ctxt->myDoc = NULL; 9811 } 9812 if (sax != NULL) 9813 ctxt->sax = NULL; 9814 xmlFreeParserCtxt(ctxt); 9815 9816 return(ret); 9817} 9818 9819/** 9820 * xmlParseDoc: 9821 * @cur: a pointer to an array of xmlChar 9822 * 9823 * parse an XML in-memory document and build a tree. 9824 * 9825 * Returns the resulting document tree 9826 */ 9827 9828xmlDocPtr 9829xmlParseDoc(xmlChar *cur) { 9830 return(xmlSAXParseDoc(NULL, cur, 0)); 9831} 9832 9833 9834/************************************************************************ 9835 * * 9836 * Miscellaneous * 9837 * * 9838 ************************************************************************/ 9839 9840#ifdef LIBXML_XPATH_ENABLED 9841#include <libxml/xpath.h> 9842#endif 9843 9844static int xmlParserInitialized = 0; 9845 9846/** 9847 * xmlInitParser: 9848 * 9849 * Initialization function for the XML parser. 9850 * This is not reentrant. Call once before processing in case of 9851 * use in multithreaded programs. 9852 */ 9853 9854void 9855xmlInitParser(void) { 9856 if (xmlParserInitialized) return; 9857 9858 xmlInitCharEncodingHandlers(); 9859 xmlInitializePredefinedEntities(); 9860 xmlDefaultSAXHandlerInit(); 9861 xmlRegisterDefaultInputCallbacks(); 9862 xmlRegisterDefaultOutputCallbacks(); 9863#ifdef LIBXML_HTML_ENABLED 9864 htmlInitAutoClose(); 9865 htmlDefaultSAXHandlerInit(); 9866#endif 9867#ifdef LIBXML_XPATH_ENABLED 9868 xmlXPathInit(); 9869#endif 9870 xmlParserInitialized = 1; 9871} 9872 9873/** 9874 * xmlCleanupParser: 9875 * 9876 * Cleanup function for the XML parser. It tries to reclaim all 9877 * parsing related global memory allocated for the parser processing. 9878 * It doesn't deallocate any document related memory. Calling this 9879 * function should not prevent reusing the parser. 9880 */ 9881 9882void 9883xmlCleanupParser(void) { 9884 xmlParserInitialized = 0; 9885 xmlCleanupCharEncodingHandlers(); 9886 xmlCleanupPredefinedEntities(); 9887} 9888 9889/** 9890 * xmlPedanticParserDefault: 9891 * @val: int 0 or 1 9892 * 9893 * Set and return the previous value for enabling pedantic warnings. 9894 * 9895 * Returns the last value for 0 for no substitution, 1 for substitution. 9896 */ 9897 9898int 9899xmlPedanticParserDefault(int val) { 9900 int old = xmlPedanticParserDefaultValue; 9901 9902 xmlPedanticParserDefaultValue = val; 9903 return(old); 9904} 9905 9906/** 9907 * xmlSubstituteEntitiesDefault: 9908 * @val: int 0 or 1 9909 * 9910 * Set and return the previous value for default entity support. 9911 * Initially the parser always keep entity references instead of substituting 9912 * entity values in the output. This function has to be used to change the 9913 * default parser behaviour 9914 * SAX::subtituteEntities() has to be used for changing that on a file by 9915 * file basis. 9916 * 9917 * Returns the last value for 0 for no substitution, 1 for substitution. 9918 */ 9919 9920int 9921xmlSubstituteEntitiesDefault(int val) { 9922 int old = xmlSubstituteEntitiesDefaultValue; 9923 9924 xmlSubstituteEntitiesDefaultValue = val; 9925 return(old); 9926} 9927 9928/** 9929 * xmlKeepBlanksDefault: 9930 * @val: int 0 or 1 9931 * 9932 * Set and return the previous value for default blanks text nodes support. 9933 * The 1.x version of the parser used an heuristic to try to detect 9934 * ignorable white spaces. As a result the SAX callback was generating 9935 * ignorableWhitespace() callbacks instead of characters() one, and when 9936 * using the DOM output text nodes containing those blanks were not generated. 9937 * The 2.x and later version will switch to the XML standard way and 9938 * ignorableWhitespace() are only generated when running the parser in 9939 * validating mode and when the current element doesn't allow CDATA or 9940 * mixed content. 9941 * This function is provided as a way to force the standard behaviour 9942 * on 1.X libs and to switch back to the old mode for compatibility when 9943 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 9944 * by using xmlIsBlankNode() commodity function to detect the "empty" 9945 * nodes generated. 9946 * This value also affect autogeneration of indentation when saving code 9947 * if blanks sections are kept, indentation is not generated. 9948 * 9949 * Returns the last value for 0 for no substitution, 1 for substitution. 9950 */ 9951 9952int 9953xmlKeepBlanksDefault(int val) { 9954 int old = xmlKeepBlanksDefaultValue; 9955 9956 xmlKeepBlanksDefaultValue = val; 9957 xmlIndentTreeOutput = !val; 9958 return(old); 9959} 9960 9961