parser.c revision db07dd613e461df93dde7902c6505629bf0734e9
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <limits.h> 44#include <string.h> 45#include <stdarg.h> 46#include <libxml/xmlmemory.h> 47#include <libxml/threads.h> 48#include <libxml/globals.h> 49#include <libxml/tree.h> 50#include <libxml/parser.h> 51#include <libxml/parserInternals.h> 52#include <libxml/valid.h> 53#include <libxml/entities.h> 54#include <libxml/xmlerror.h> 55#include <libxml/encoding.h> 56#include <libxml/xmlIO.h> 57#include <libxml/uri.h> 58#ifdef LIBXML_CATALOG_ENABLED 59#include <libxml/catalog.h> 60#endif 61#ifdef LIBXML_SCHEMAS_ENABLED 62#include <libxml/xmlschemastypes.h> 63#include <libxml/relaxng.h> 64#endif 65#ifdef HAVE_CTYPE_H 66#include <ctype.h> 67#endif 68#ifdef HAVE_STDLIB_H 69#include <stdlib.h> 70#endif 71#ifdef HAVE_SYS_STAT_H 72#include <sys/stat.h> 73#endif 74#ifdef HAVE_FCNTL_H 75#include <fcntl.h> 76#endif 77#ifdef HAVE_UNISTD_H 78#include <unistd.h> 79#endif 80#ifdef HAVE_ZLIB_H 81#include <zlib.h> 82#endif 83#ifdef HAVE_LZMA_H 84#include <lzma.h> 85#endif 86 87#include "buf.h" 88#include "enc.h" 89 90static void 91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93static xmlParserCtxtPtr 94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97static void xmlHaltParser(xmlParserCtxtPtr ctxt); 98 99/************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105#define XML_PARSER_BIG_ENTITY 1000 106#define XML_PARSER_LOT_ENTITY 5000 107 108/* 109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 110 * replacement over the size in byte of the input indicates that you have 111 * and eponential behaviour. A value of 10 correspond to at least 3 entity 112 * replacement per byte of input. 113 */ 114#define XML_PARSER_NON_LINEAR 10 115 116/* 117 * xmlParserEntityCheck 118 * 119 * Function to check non-linear entity expansion behaviour 120 * This is here to detect and stop exponential linear entity expansion 121 * This is not a limitation of the parser but a safety 122 * boundary feature. It can be disabled with the XML_PARSE_HUGE 123 * parser option. 124 */ 125static int 126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 127 xmlEntityPtr ent, size_t replacement) 128{ 129 size_t consumed = 0; 130 131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 132 return (0); 133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 134 return (1); 135 136 /* 137 * This may look absurd but is needed to detect 138 * entities problems 139 */ 140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 141 (ent->content != NULL) && (ent->checked == 0)) { 142 unsigned long oldnbent = ctxt->nbentities; 143 xmlChar *rep; 144 145 ent->checked = 1; 146 147 ++ctxt->depth; 148 rep = xmlStringDecodeEntities(ctxt, ent->content, 149 XML_SUBSTITUTE_REF, 0, 0, 0); 150 --ctxt->depth; 151 152 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 153 if (rep != NULL) { 154 if (xmlStrchr(rep, '<')) 155 ent->checked |= 1; 156 xmlFree(rep); 157 rep = NULL; 158 } 159 } 160 if (replacement != 0) { 161 if (replacement < XML_MAX_TEXT_LENGTH) 162 return(0); 163 164 /* 165 * If the volume of entity copy reaches 10 times the 166 * amount of parsed data and over the large text threshold 167 * then that's very likely to be an abuse. 168 */ 169 if (ctxt->input != NULL) { 170 consumed = ctxt->input->consumed + 171 (ctxt->input->cur - ctxt->input->base); 172 } 173 consumed += ctxt->sizeentities; 174 175 if (replacement < XML_PARSER_NON_LINEAR * consumed) 176 return(0); 177 } else if (size != 0) { 178 /* 179 * Do the check based on the replacement size of the entity 180 */ 181 if (size < XML_PARSER_BIG_ENTITY) 182 return(0); 183 184 /* 185 * A limit on the amount of text data reasonably used 186 */ 187 if (ctxt->input != NULL) { 188 consumed = ctxt->input->consumed + 189 (ctxt->input->cur - ctxt->input->base); 190 } 191 consumed += ctxt->sizeentities; 192 193 if ((size < XML_PARSER_NON_LINEAR * consumed) && 194 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 195 return (0); 196 } else if (ent != NULL) { 197 /* 198 * use the number of parsed entities in the replacement 199 */ 200 size = ent->checked / 2; 201 202 /* 203 * The amount of data parsed counting entities size only once 204 */ 205 if (ctxt->input != NULL) { 206 consumed = ctxt->input->consumed + 207 (ctxt->input->cur - ctxt->input->base); 208 } 209 consumed += ctxt->sizeentities; 210 211 /* 212 * Check the density of entities for the amount of data 213 * knowing an entity reference will take at least 3 bytes 214 */ 215 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 216 return (0); 217 } else { 218 /* 219 * strange we got no data for checking 220 */ 221 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 222 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 223 (ctxt->nbentities <= 10000)) 224 return (0); 225 } 226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 227 return (1); 228} 229 230/** 231 * xmlParserMaxDepth: 232 * 233 * arbitrary depth limit for the XML documents that we allow to 234 * process. This is not a limitation of the parser but a safety 235 * boundary feature. It can be disabled with the XML_PARSE_HUGE 236 * parser option. 237 */ 238unsigned int xmlParserMaxDepth = 256; 239 240 241 242#define SAX2 1 243#define XML_PARSER_BIG_BUFFER_SIZE 300 244#define XML_PARSER_BUFFER_SIZE 100 245#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 246 247/** 248 * XML_PARSER_CHUNK_SIZE 249 * 250 * When calling GROW that's the minimal amount of data 251 * the parser expected to have received. It is not a hard 252 * limit but an optimization when reading strings like Names 253 * It is not strictly needed as long as inputs available characters 254 * are followed by 0, which should be provided by the I/O level 255 */ 256#define XML_PARSER_CHUNK_SIZE 100 257 258/* 259 * List of XML prefixed PI allowed by W3C specs 260 */ 261 262static const char *xmlW3CPIs[] = { 263 "xml-stylesheet", 264 "xml-model", 265 NULL 266}; 267 268 269/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 270static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 271 const xmlChar **str); 272 273static xmlParserErrors 274xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 275 xmlSAXHandlerPtr sax, 276 void *user_data, int depth, const xmlChar *URL, 277 const xmlChar *ID, xmlNodePtr *list); 278 279static int 280xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 281 const char *encoding); 282#ifdef LIBXML_LEGACY_ENABLED 283static void 284xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 285 xmlNodePtr lastNode); 286#endif /* LIBXML_LEGACY_ENABLED */ 287 288static xmlParserErrors 289xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 290 const xmlChar *string, void *user_data, xmlNodePtr *lst); 291 292static int 293xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 294 295/************************************************************************ 296 * * 297 * Some factorized error routines * 298 * * 299 ************************************************************************/ 300 301/** 302 * xmlErrAttributeDup: 303 * @ctxt: an XML parser context 304 * @prefix: the attribute prefix 305 * @localname: the attribute localname 306 * 307 * Handle a redefinition of attribute error 308 */ 309static void 310xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 311 const xmlChar * localname) 312{ 313 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 314 (ctxt->instate == XML_PARSER_EOF)) 315 return; 316 if (ctxt != NULL) 317 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 318 319 if (prefix == NULL) 320 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 321 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 322 (const char *) localname, NULL, NULL, 0, 0, 323 "Attribute %s redefined\n", localname); 324 else 325 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 326 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 327 (const char *) prefix, (const char *) localname, 328 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 329 localname); 330 if (ctxt != NULL) { 331 ctxt->wellFormed = 0; 332 if (ctxt->recovery == 0) 333 ctxt->disableSAX = 1; 334 } 335} 336 337/** 338 * xmlFatalErr: 339 * @ctxt: an XML parser context 340 * @error: the error number 341 * @extra: extra information string 342 * 343 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 344 */ 345static void 346xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 347{ 348 const char *errmsg; 349 char errstr[129] = ""; 350 351 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 352 (ctxt->instate == XML_PARSER_EOF)) 353 return; 354 switch (error) { 355 case XML_ERR_INVALID_HEX_CHARREF: 356 errmsg = "CharRef: invalid hexadecimal value"; 357 break; 358 case XML_ERR_INVALID_DEC_CHARREF: 359 errmsg = "CharRef: invalid decimal value"; 360 break; 361 case XML_ERR_INVALID_CHARREF: 362 errmsg = "CharRef: invalid value"; 363 break; 364 case XML_ERR_INTERNAL_ERROR: 365 errmsg = "internal error"; 366 break; 367 case XML_ERR_PEREF_AT_EOF: 368 errmsg = "PEReference at end of document"; 369 break; 370 case XML_ERR_PEREF_IN_PROLOG: 371 errmsg = "PEReference in prolog"; 372 break; 373 case XML_ERR_PEREF_IN_EPILOG: 374 errmsg = "PEReference in epilog"; 375 break; 376 case XML_ERR_PEREF_NO_NAME: 377 errmsg = "PEReference: no name"; 378 break; 379 case XML_ERR_PEREF_SEMICOL_MISSING: 380 errmsg = "PEReference: expecting ';'"; 381 break; 382 case XML_ERR_ENTITY_LOOP: 383 errmsg = "Detected an entity reference loop"; 384 break; 385 case XML_ERR_ENTITY_NOT_STARTED: 386 errmsg = "EntityValue: \" or ' expected"; 387 break; 388 case XML_ERR_ENTITY_PE_INTERNAL: 389 errmsg = "PEReferences forbidden in internal subset"; 390 break; 391 case XML_ERR_ENTITY_NOT_FINISHED: 392 errmsg = "EntityValue: \" or ' expected"; 393 break; 394 case XML_ERR_ATTRIBUTE_NOT_STARTED: 395 errmsg = "AttValue: \" or ' expected"; 396 break; 397 case XML_ERR_LT_IN_ATTRIBUTE: 398 errmsg = "Unescaped '<' not allowed in attributes values"; 399 break; 400 case XML_ERR_LITERAL_NOT_STARTED: 401 errmsg = "SystemLiteral \" or ' expected"; 402 break; 403 case XML_ERR_LITERAL_NOT_FINISHED: 404 errmsg = "Unfinished System or Public ID \" or ' expected"; 405 break; 406 case XML_ERR_MISPLACED_CDATA_END: 407 errmsg = "Sequence ']]>' not allowed in content"; 408 break; 409 case XML_ERR_URI_REQUIRED: 410 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 411 break; 412 case XML_ERR_PUBID_REQUIRED: 413 errmsg = "PUBLIC, the Public Identifier is missing"; 414 break; 415 case XML_ERR_HYPHEN_IN_COMMENT: 416 errmsg = "Comment must not contain '--' (double-hyphen)"; 417 break; 418 case XML_ERR_PI_NOT_STARTED: 419 errmsg = "xmlParsePI : no target name"; 420 break; 421 case XML_ERR_RESERVED_XML_NAME: 422 errmsg = "Invalid PI name"; 423 break; 424 case XML_ERR_NOTATION_NOT_STARTED: 425 errmsg = "NOTATION: Name expected here"; 426 break; 427 case XML_ERR_NOTATION_NOT_FINISHED: 428 errmsg = "'>' required to close NOTATION declaration"; 429 break; 430 case XML_ERR_VALUE_REQUIRED: 431 errmsg = "Entity value required"; 432 break; 433 case XML_ERR_URI_FRAGMENT: 434 errmsg = "Fragment not allowed"; 435 break; 436 case XML_ERR_ATTLIST_NOT_STARTED: 437 errmsg = "'(' required to start ATTLIST enumeration"; 438 break; 439 case XML_ERR_NMTOKEN_REQUIRED: 440 errmsg = "NmToken expected in ATTLIST enumeration"; 441 break; 442 case XML_ERR_ATTLIST_NOT_FINISHED: 443 errmsg = "')' required to finish ATTLIST enumeration"; 444 break; 445 case XML_ERR_MIXED_NOT_STARTED: 446 errmsg = "MixedContentDecl : '|' or ')*' expected"; 447 break; 448 case XML_ERR_PCDATA_REQUIRED: 449 errmsg = "MixedContentDecl : '#PCDATA' expected"; 450 break; 451 case XML_ERR_ELEMCONTENT_NOT_STARTED: 452 errmsg = "ContentDecl : Name or '(' expected"; 453 break; 454 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 455 errmsg = "ContentDecl : ',' '|' or ')' expected"; 456 break; 457 case XML_ERR_PEREF_IN_INT_SUBSET: 458 errmsg = 459 "PEReference: forbidden within markup decl in internal subset"; 460 break; 461 case XML_ERR_GT_REQUIRED: 462 errmsg = "expected '>'"; 463 break; 464 case XML_ERR_CONDSEC_INVALID: 465 errmsg = "XML conditional section '[' expected"; 466 break; 467 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 468 errmsg = "Content error in the external subset"; 469 break; 470 case XML_ERR_CONDSEC_INVALID_KEYWORD: 471 errmsg = 472 "conditional section INCLUDE or IGNORE keyword expected"; 473 break; 474 case XML_ERR_CONDSEC_NOT_FINISHED: 475 errmsg = "XML conditional section not closed"; 476 break; 477 case XML_ERR_XMLDECL_NOT_STARTED: 478 errmsg = "Text declaration '<?xml' required"; 479 break; 480 case XML_ERR_XMLDECL_NOT_FINISHED: 481 errmsg = "parsing XML declaration: '?>' expected"; 482 break; 483 case XML_ERR_EXT_ENTITY_STANDALONE: 484 errmsg = "external parsed entities cannot be standalone"; 485 break; 486 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 487 errmsg = "EntityRef: expecting ';'"; 488 break; 489 case XML_ERR_DOCTYPE_NOT_FINISHED: 490 errmsg = "DOCTYPE improperly terminated"; 491 break; 492 case XML_ERR_LTSLASH_REQUIRED: 493 errmsg = "EndTag: '</' not found"; 494 break; 495 case XML_ERR_EQUAL_REQUIRED: 496 errmsg = "expected '='"; 497 break; 498 case XML_ERR_STRING_NOT_CLOSED: 499 errmsg = "String not closed expecting \" or '"; 500 break; 501 case XML_ERR_STRING_NOT_STARTED: 502 errmsg = "String not started expecting ' or \""; 503 break; 504 case XML_ERR_ENCODING_NAME: 505 errmsg = "Invalid XML encoding name"; 506 break; 507 case XML_ERR_STANDALONE_VALUE: 508 errmsg = "standalone accepts only 'yes' or 'no'"; 509 break; 510 case XML_ERR_DOCUMENT_EMPTY: 511 errmsg = "Document is empty"; 512 break; 513 case XML_ERR_DOCUMENT_END: 514 errmsg = "Extra content at the end of the document"; 515 break; 516 case XML_ERR_NOT_WELL_BALANCED: 517 errmsg = "chunk is not well balanced"; 518 break; 519 case XML_ERR_EXTRA_CONTENT: 520 errmsg = "extra content at the end of well balanced chunk"; 521 break; 522 case XML_ERR_VERSION_MISSING: 523 errmsg = "Malformed declaration expecting version"; 524 break; 525 case XML_ERR_NAME_TOO_LONG: 526 errmsg = "Name too long use XML_PARSE_HUGE option"; 527 break; 528#if 0 529 case: 530 errmsg = ""; 531 break; 532#endif 533 default: 534 errmsg = "Unregistered error message"; 535 } 536 if (info == NULL) 537 snprintf(errstr, 128, "%s\n", errmsg); 538 else 539 snprintf(errstr, 128, "%s: %%s\n", errmsg); 540 if (ctxt != NULL) 541 ctxt->errNo = error; 542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 544 info); 545 if (ctxt != NULL) { 546 ctxt->wellFormed = 0; 547 if (ctxt->recovery == 0) 548 ctxt->disableSAX = 1; 549 } 550} 551 552/** 553 * xmlFatalErrMsg: 554 * @ctxt: an XML parser context 555 * @error: the error number 556 * @msg: the error message 557 * 558 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 559 */ 560static void 561xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 562 const char *msg) 563{ 564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 565 (ctxt->instate == XML_PARSER_EOF)) 566 return; 567 if (ctxt != NULL) 568 ctxt->errNo = error; 569 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 570 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 571 if (ctxt != NULL) { 572 ctxt->wellFormed = 0; 573 if (ctxt->recovery == 0) 574 ctxt->disableSAX = 1; 575 } 576} 577 578/** 579 * xmlWarningMsg: 580 * @ctxt: an XML parser context 581 * @error: the error number 582 * @msg: the error message 583 * @str1: extra data 584 * @str2: extra data 585 * 586 * Handle a warning. 587 */ 588static void 589xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 590 const char *msg, const xmlChar *str1, const xmlChar *str2) 591{ 592 xmlStructuredErrorFunc schannel = NULL; 593 594 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 595 (ctxt->instate == XML_PARSER_EOF)) 596 return; 597 if ((ctxt != NULL) && (ctxt->sax != NULL) && 598 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 599 schannel = ctxt->sax->serror; 600 if (ctxt != NULL) { 601 __xmlRaiseError(schannel, 602 (ctxt->sax) ? ctxt->sax->warning : NULL, 603 ctxt->userData, 604 ctxt, NULL, XML_FROM_PARSER, error, 605 XML_ERR_WARNING, NULL, 0, 606 (const char *) str1, (const char *) str2, NULL, 0, 0, 607 msg, (const char *) str1, (const char *) str2); 608 } else { 609 __xmlRaiseError(schannel, NULL, NULL, 610 ctxt, NULL, XML_FROM_PARSER, error, 611 XML_ERR_WARNING, NULL, 0, 612 (const char *) str1, (const char *) str2, NULL, 0, 0, 613 msg, (const char *) str1, (const char *) str2); 614 } 615} 616 617/** 618 * xmlValidityError: 619 * @ctxt: an XML parser context 620 * @error: the error number 621 * @msg: the error message 622 * @str1: extra data 623 * 624 * Handle a validity error. 625 */ 626static void 627xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 628 const char *msg, const xmlChar *str1, const xmlChar *str2) 629{ 630 xmlStructuredErrorFunc schannel = NULL; 631 632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 633 (ctxt->instate == XML_PARSER_EOF)) 634 return; 635 if (ctxt != NULL) { 636 ctxt->errNo = error; 637 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 638 schannel = ctxt->sax->serror; 639 } 640 if (ctxt != NULL) { 641 __xmlRaiseError(schannel, 642 ctxt->vctxt.error, ctxt->vctxt.userData, 643 ctxt, NULL, XML_FROM_DTD, error, 644 XML_ERR_ERROR, NULL, 0, (const char *) str1, 645 (const char *) str2, NULL, 0, 0, 646 msg, (const char *) str1, (const char *) str2); 647 ctxt->valid = 0; 648 } else { 649 __xmlRaiseError(schannel, NULL, NULL, 650 ctxt, NULL, XML_FROM_DTD, error, 651 XML_ERR_ERROR, NULL, 0, (const char *) str1, 652 (const char *) str2, NULL, 0, 0, 653 msg, (const char *) str1, (const char *) str2); 654 } 655} 656 657/** 658 * xmlFatalErrMsgInt: 659 * @ctxt: an XML parser context 660 * @error: the error number 661 * @msg: the error message 662 * @val: an integer value 663 * 664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 665 */ 666static void 667xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 668 const char *msg, int val) 669{ 670 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 671 (ctxt->instate == XML_PARSER_EOF)) 672 return; 673 if (ctxt != NULL) 674 ctxt->errNo = error; 675 __xmlRaiseError(NULL, NULL, NULL, 676 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 677 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 678 if (ctxt != NULL) { 679 ctxt->wellFormed = 0; 680 if (ctxt->recovery == 0) 681 ctxt->disableSAX = 1; 682 } 683} 684 685/** 686 * xmlFatalErrMsgStrIntStr: 687 * @ctxt: an XML parser context 688 * @error: the error number 689 * @msg: the error message 690 * @str1: an string info 691 * @val: an integer value 692 * @str2: an string info 693 * 694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 695 */ 696static void 697xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 698 const char *msg, const xmlChar *str1, int val, 699 const xmlChar *str2) 700{ 701 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 702 (ctxt->instate == XML_PARSER_EOF)) 703 return; 704 if (ctxt != NULL) 705 ctxt->errNo = error; 706 __xmlRaiseError(NULL, NULL, NULL, 707 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 708 NULL, 0, (const char *) str1, (const char *) str2, 709 NULL, val, 0, msg, str1, val, str2); 710 if (ctxt != NULL) { 711 ctxt->wellFormed = 0; 712 if (ctxt->recovery == 0) 713 ctxt->disableSAX = 1; 714 } 715} 716 717/** 718 * xmlFatalErrMsgStr: 719 * @ctxt: an XML parser context 720 * @error: the error number 721 * @msg: the error message 722 * @val: a string value 723 * 724 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 725 */ 726static void 727xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 728 const char *msg, const xmlChar * val) 729{ 730 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 731 (ctxt->instate == XML_PARSER_EOF)) 732 return; 733 if (ctxt != NULL) 734 ctxt->errNo = error; 735 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 736 XML_FROM_PARSER, error, XML_ERR_FATAL, 737 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 738 val); 739 if (ctxt != NULL) { 740 ctxt->wellFormed = 0; 741 if (ctxt->recovery == 0) 742 ctxt->disableSAX = 1; 743 } 744} 745 746/** 747 * xmlErrMsgStr: 748 * @ctxt: an XML parser context 749 * @error: the error number 750 * @msg: the error message 751 * @val: a string value 752 * 753 * Handle a non fatal parser error 754 */ 755static void 756xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 757 const char *msg, const xmlChar * val) 758{ 759 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 760 (ctxt->instate == XML_PARSER_EOF)) 761 return; 762 if (ctxt != NULL) 763 ctxt->errNo = error; 764 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 765 XML_FROM_PARSER, error, XML_ERR_ERROR, 766 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 767 val); 768} 769 770/** 771 * xmlNsErr: 772 * @ctxt: an XML parser context 773 * @error: the error number 774 * @msg: the message 775 * @info1: extra information string 776 * @info2: extra information string 777 * 778 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 779 */ 780static void 781xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 782 const char *msg, 783 const xmlChar * info1, const xmlChar * info2, 784 const xmlChar * info3) 785{ 786 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 787 (ctxt->instate == XML_PARSER_EOF)) 788 return; 789 if (ctxt != NULL) 790 ctxt->errNo = error; 791 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 792 XML_ERR_ERROR, NULL, 0, (const char *) info1, 793 (const char *) info2, (const char *) info3, 0, 0, msg, 794 info1, info2, info3); 795 if (ctxt != NULL) 796 ctxt->nsWellFormed = 0; 797} 798 799/** 800 * xmlNsWarn 801 * @ctxt: an XML parser context 802 * @error: the error number 803 * @msg: the message 804 * @info1: extra information string 805 * @info2: extra information string 806 * 807 * Handle a namespace warning error 808 */ 809static void 810xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 811 const char *msg, 812 const xmlChar * info1, const xmlChar * info2, 813 const xmlChar * info3) 814{ 815 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 816 (ctxt->instate == XML_PARSER_EOF)) 817 return; 818 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 819 XML_ERR_WARNING, NULL, 0, (const char *) info1, 820 (const char *) info2, (const char *) info3, 0, 0, msg, 821 info1, info2, info3); 822} 823 824/************************************************************************ 825 * * 826 * Library wide options * 827 * * 828 ************************************************************************/ 829 830/** 831 * xmlHasFeature: 832 * @feature: the feature to be examined 833 * 834 * Examines if the library has been compiled with a given feature. 835 * 836 * Returns a non-zero value if the feature exist, otherwise zero. 837 * Returns zero (0) if the feature does not exist or an unknown 838 * unknown feature is requested, non-zero otherwise. 839 */ 840int 841xmlHasFeature(xmlFeature feature) 842{ 843 switch (feature) { 844 case XML_WITH_THREAD: 845#ifdef LIBXML_THREAD_ENABLED 846 return(1); 847#else 848 return(0); 849#endif 850 case XML_WITH_TREE: 851#ifdef LIBXML_TREE_ENABLED 852 return(1); 853#else 854 return(0); 855#endif 856 case XML_WITH_OUTPUT: 857#ifdef LIBXML_OUTPUT_ENABLED 858 return(1); 859#else 860 return(0); 861#endif 862 case XML_WITH_PUSH: 863#ifdef LIBXML_PUSH_ENABLED 864 return(1); 865#else 866 return(0); 867#endif 868 case XML_WITH_READER: 869#ifdef LIBXML_READER_ENABLED 870 return(1); 871#else 872 return(0); 873#endif 874 case XML_WITH_PATTERN: 875#ifdef LIBXML_PATTERN_ENABLED 876 return(1); 877#else 878 return(0); 879#endif 880 case XML_WITH_WRITER: 881#ifdef LIBXML_WRITER_ENABLED 882 return(1); 883#else 884 return(0); 885#endif 886 case XML_WITH_SAX1: 887#ifdef LIBXML_SAX1_ENABLED 888 return(1); 889#else 890 return(0); 891#endif 892 case XML_WITH_FTP: 893#ifdef LIBXML_FTP_ENABLED 894 return(1); 895#else 896 return(0); 897#endif 898 case XML_WITH_HTTP: 899#ifdef LIBXML_HTTP_ENABLED 900 return(1); 901#else 902 return(0); 903#endif 904 case XML_WITH_VALID: 905#ifdef LIBXML_VALID_ENABLED 906 return(1); 907#else 908 return(0); 909#endif 910 case XML_WITH_HTML: 911#ifdef LIBXML_HTML_ENABLED 912 return(1); 913#else 914 return(0); 915#endif 916 case XML_WITH_LEGACY: 917#ifdef LIBXML_LEGACY_ENABLED 918 return(1); 919#else 920 return(0); 921#endif 922 case XML_WITH_C14N: 923#ifdef LIBXML_C14N_ENABLED 924 return(1); 925#else 926 return(0); 927#endif 928 case XML_WITH_CATALOG: 929#ifdef LIBXML_CATALOG_ENABLED 930 return(1); 931#else 932 return(0); 933#endif 934 case XML_WITH_XPATH: 935#ifdef LIBXML_XPATH_ENABLED 936 return(1); 937#else 938 return(0); 939#endif 940 case XML_WITH_XPTR: 941#ifdef LIBXML_XPTR_ENABLED 942 return(1); 943#else 944 return(0); 945#endif 946 case XML_WITH_XINCLUDE: 947#ifdef LIBXML_XINCLUDE_ENABLED 948 return(1); 949#else 950 return(0); 951#endif 952 case XML_WITH_ICONV: 953#ifdef LIBXML_ICONV_ENABLED 954 return(1); 955#else 956 return(0); 957#endif 958 case XML_WITH_ISO8859X: 959#ifdef LIBXML_ISO8859X_ENABLED 960 return(1); 961#else 962 return(0); 963#endif 964 case XML_WITH_UNICODE: 965#ifdef LIBXML_UNICODE_ENABLED 966 return(1); 967#else 968 return(0); 969#endif 970 case XML_WITH_REGEXP: 971#ifdef LIBXML_REGEXP_ENABLED 972 return(1); 973#else 974 return(0); 975#endif 976 case XML_WITH_AUTOMATA: 977#ifdef LIBXML_AUTOMATA_ENABLED 978 return(1); 979#else 980 return(0); 981#endif 982 case XML_WITH_EXPR: 983#ifdef LIBXML_EXPR_ENABLED 984 return(1); 985#else 986 return(0); 987#endif 988 case XML_WITH_SCHEMAS: 989#ifdef LIBXML_SCHEMAS_ENABLED 990 return(1); 991#else 992 return(0); 993#endif 994 case XML_WITH_SCHEMATRON: 995#ifdef LIBXML_SCHEMATRON_ENABLED 996 return(1); 997#else 998 return(0); 999#endif 1000 case XML_WITH_MODULES: 1001#ifdef LIBXML_MODULES_ENABLED 1002 return(1); 1003#else 1004 return(0); 1005#endif 1006 case XML_WITH_DEBUG: 1007#ifdef LIBXML_DEBUG_ENABLED 1008 return(1); 1009#else 1010 return(0); 1011#endif 1012 case XML_WITH_DEBUG_MEM: 1013#ifdef DEBUG_MEMORY_LOCATION 1014 return(1); 1015#else 1016 return(0); 1017#endif 1018 case XML_WITH_DEBUG_RUN: 1019#ifdef LIBXML_DEBUG_RUNTIME 1020 return(1); 1021#else 1022 return(0); 1023#endif 1024 case XML_WITH_ZLIB: 1025#ifdef LIBXML_ZLIB_ENABLED 1026 return(1); 1027#else 1028 return(0); 1029#endif 1030 case XML_WITH_LZMA: 1031#ifdef LIBXML_LZMA_ENABLED 1032 return(1); 1033#else 1034 return(0); 1035#endif 1036 case XML_WITH_ICU: 1037#ifdef LIBXML_ICU_ENABLED 1038 return(1); 1039#else 1040 return(0); 1041#endif 1042 default: 1043 break; 1044 } 1045 return(0); 1046} 1047 1048/************************************************************************ 1049 * * 1050 * SAX2 defaulted attributes handling * 1051 * * 1052 ************************************************************************/ 1053 1054/** 1055 * xmlDetectSAX2: 1056 * @ctxt: an XML parser context 1057 * 1058 * Do the SAX2 detection and specific intialization 1059 */ 1060static void 1061xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1062 if (ctxt == NULL) return; 1063#ifdef LIBXML_SAX1_ENABLED 1064 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1065 ((ctxt->sax->startElementNs != NULL) || 1066 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1067#else 1068 ctxt->sax2 = 1; 1069#endif /* LIBXML_SAX1_ENABLED */ 1070 1071 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1072 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1073 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1074 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1075 (ctxt->str_xml_ns == NULL)) { 1076 xmlErrMemory(ctxt, NULL); 1077 } 1078} 1079 1080typedef struct _xmlDefAttrs xmlDefAttrs; 1081typedef xmlDefAttrs *xmlDefAttrsPtr; 1082struct _xmlDefAttrs { 1083 int nbAttrs; /* number of defaulted attributes on that element */ 1084 int maxAttrs; /* the size of the array */ 1085 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1086}; 1087 1088/** 1089 * xmlAttrNormalizeSpace: 1090 * @src: the source string 1091 * @dst: the target string 1092 * 1093 * Normalize the space in non CDATA attribute values: 1094 * If the attribute type is not CDATA, then the XML processor MUST further 1095 * process the normalized attribute value by discarding any leading and 1096 * trailing space (#x20) characters, and by replacing sequences of space 1097 * (#x20) characters by a single space (#x20) character. 1098 * Note that the size of dst need to be at least src, and if one doesn't need 1099 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1100 * passing src as dst is just fine. 1101 * 1102 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1103 * is needed. 1104 */ 1105static xmlChar * 1106xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1107{ 1108 if ((src == NULL) || (dst == NULL)) 1109 return(NULL); 1110 1111 while (*src == 0x20) src++; 1112 while (*src != 0) { 1113 if (*src == 0x20) { 1114 while (*src == 0x20) src++; 1115 if (*src != 0) 1116 *dst++ = 0x20; 1117 } else { 1118 *dst++ = *src++; 1119 } 1120 } 1121 *dst = 0; 1122 if (dst == src) 1123 return(NULL); 1124 return(dst); 1125} 1126 1127/** 1128 * xmlAttrNormalizeSpace2: 1129 * @src: the source string 1130 * 1131 * Normalize the space in non CDATA attribute values, a slightly more complex 1132 * front end to avoid allocation problems when running on attribute values 1133 * coming from the input. 1134 * 1135 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1136 * is needed. 1137 */ 1138static const xmlChar * 1139xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1140{ 1141 int i; 1142 int remove_head = 0; 1143 int need_realloc = 0; 1144 const xmlChar *cur; 1145 1146 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1147 return(NULL); 1148 i = *len; 1149 if (i <= 0) 1150 return(NULL); 1151 1152 cur = src; 1153 while (*cur == 0x20) { 1154 cur++; 1155 remove_head++; 1156 } 1157 while (*cur != 0) { 1158 if (*cur == 0x20) { 1159 cur++; 1160 if ((*cur == 0x20) || (*cur == 0)) { 1161 need_realloc = 1; 1162 break; 1163 } 1164 } else 1165 cur++; 1166 } 1167 if (need_realloc) { 1168 xmlChar *ret; 1169 1170 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1171 if (ret == NULL) { 1172 xmlErrMemory(ctxt, NULL); 1173 return(NULL); 1174 } 1175 xmlAttrNormalizeSpace(ret, ret); 1176 *len = (int) strlen((const char *)ret); 1177 return(ret); 1178 } else if (remove_head) { 1179 *len -= remove_head; 1180 memmove(src, src + remove_head, 1 + *len); 1181 return(src); 1182 } 1183 return(NULL); 1184} 1185 1186/** 1187 * xmlAddDefAttrs: 1188 * @ctxt: an XML parser context 1189 * @fullname: the element fullname 1190 * @fullattr: the attribute fullname 1191 * @value: the attribute value 1192 * 1193 * Add a defaulted attribute for an element 1194 */ 1195static void 1196xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1197 const xmlChar *fullname, 1198 const xmlChar *fullattr, 1199 const xmlChar *value) { 1200 xmlDefAttrsPtr defaults; 1201 int len; 1202 const xmlChar *name; 1203 const xmlChar *prefix; 1204 1205 /* 1206 * Allows to detect attribute redefinitions 1207 */ 1208 if (ctxt->attsSpecial != NULL) { 1209 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1210 return; 1211 } 1212 1213 if (ctxt->attsDefault == NULL) { 1214 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1215 if (ctxt->attsDefault == NULL) 1216 goto mem_error; 1217 } 1218 1219 /* 1220 * split the element name into prefix:localname , the string found 1221 * are within the DTD and then not associated to namespace names. 1222 */ 1223 name = xmlSplitQName3(fullname, &len); 1224 if (name == NULL) { 1225 name = xmlDictLookup(ctxt->dict, fullname, -1); 1226 prefix = NULL; 1227 } else { 1228 name = xmlDictLookup(ctxt->dict, name, -1); 1229 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1230 } 1231 1232 /* 1233 * make sure there is some storage 1234 */ 1235 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1236 if (defaults == NULL) { 1237 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1238 (4 * 5) * sizeof(const xmlChar *)); 1239 if (defaults == NULL) 1240 goto mem_error; 1241 defaults->nbAttrs = 0; 1242 defaults->maxAttrs = 4; 1243 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1244 defaults, NULL) < 0) { 1245 xmlFree(defaults); 1246 goto mem_error; 1247 } 1248 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1249 xmlDefAttrsPtr temp; 1250 1251 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1252 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1253 if (temp == NULL) 1254 goto mem_error; 1255 defaults = temp; 1256 defaults->maxAttrs *= 2; 1257 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1258 defaults, NULL) < 0) { 1259 xmlFree(defaults); 1260 goto mem_error; 1261 } 1262 } 1263 1264 /* 1265 * Split the element name into prefix:localname , the string found 1266 * are within the DTD and hen not associated to namespace names. 1267 */ 1268 name = xmlSplitQName3(fullattr, &len); 1269 if (name == NULL) { 1270 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1271 prefix = NULL; 1272 } else { 1273 name = xmlDictLookup(ctxt->dict, name, -1); 1274 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1275 } 1276 1277 defaults->values[5 * defaults->nbAttrs] = name; 1278 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1279 /* intern the string and precompute the end */ 1280 len = xmlStrlen(value); 1281 value = xmlDictLookup(ctxt->dict, value, len); 1282 defaults->values[5 * defaults->nbAttrs + 2] = value; 1283 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1284 if (ctxt->external) 1285 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1286 else 1287 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1288 defaults->nbAttrs++; 1289 1290 return; 1291 1292mem_error: 1293 xmlErrMemory(ctxt, NULL); 1294 return; 1295} 1296 1297/** 1298 * xmlAddSpecialAttr: 1299 * @ctxt: an XML parser context 1300 * @fullname: the element fullname 1301 * @fullattr: the attribute fullname 1302 * @type: the attribute type 1303 * 1304 * Register this attribute type 1305 */ 1306static void 1307xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1308 const xmlChar *fullname, 1309 const xmlChar *fullattr, 1310 int type) 1311{ 1312 if (ctxt->attsSpecial == NULL) { 1313 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1314 if (ctxt->attsSpecial == NULL) 1315 goto mem_error; 1316 } 1317 1318 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1319 return; 1320 1321 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1322 (void *) (long) type); 1323 return; 1324 1325mem_error: 1326 xmlErrMemory(ctxt, NULL); 1327 return; 1328} 1329 1330/** 1331 * xmlCleanSpecialAttrCallback: 1332 * 1333 * Removes CDATA attributes from the special attribute table 1334 */ 1335static void 1336xmlCleanSpecialAttrCallback(void *payload, void *data, 1337 const xmlChar *fullname, const xmlChar *fullattr, 1338 const xmlChar *unused ATTRIBUTE_UNUSED) { 1339 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1340 1341 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1342 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1343 } 1344} 1345 1346/** 1347 * xmlCleanSpecialAttr: 1348 * @ctxt: an XML parser context 1349 * 1350 * Trim the list of attributes defined to remove all those of type 1351 * CDATA as they are not special. This call should be done when finishing 1352 * to parse the DTD and before starting to parse the document root. 1353 */ 1354static void 1355xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1356{ 1357 if (ctxt->attsSpecial == NULL) 1358 return; 1359 1360 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1361 1362 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1363 xmlHashFree(ctxt->attsSpecial, NULL); 1364 ctxt->attsSpecial = NULL; 1365 } 1366 return; 1367} 1368 1369/** 1370 * xmlCheckLanguageID: 1371 * @lang: pointer to the string value 1372 * 1373 * Checks that the value conforms to the LanguageID production: 1374 * 1375 * NOTE: this is somewhat deprecated, those productions were removed from 1376 * the XML Second edition. 1377 * 1378 * [33] LanguageID ::= Langcode ('-' Subcode)* 1379 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1380 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1381 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1382 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1383 * [38] Subcode ::= ([a-z] | [A-Z])+ 1384 * 1385 * The current REC reference the sucessors of RFC 1766, currently 5646 1386 * 1387 * http://www.rfc-editor.org/rfc/rfc5646.txt 1388 * langtag = language 1389 * ["-" script] 1390 * ["-" region] 1391 * *("-" variant) 1392 * *("-" extension) 1393 * ["-" privateuse] 1394 * language = 2*3ALPHA ; shortest ISO 639 code 1395 * ["-" extlang] ; sometimes followed by 1396 * ; extended language subtags 1397 * / 4ALPHA ; or reserved for future use 1398 * / 5*8ALPHA ; or registered language subtag 1399 * 1400 * extlang = 3ALPHA ; selected ISO 639 codes 1401 * *2("-" 3ALPHA) ; permanently reserved 1402 * 1403 * script = 4ALPHA ; ISO 15924 code 1404 * 1405 * region = 2ALPHA ; ISO 3166-1 code 1406 * / 3DIGIT ; UN M.49 code 1407 * 1408 * variant = 5*8alphanum ; registered variants 1409 * / (DIGIT 3alphanum) 1410 * 1411 * extension = singleton 1*("-" (2*8alphanum)) 1412 * 1413 * ; Single alphanumerics 1414 * ; "x" reserved for private use 1415 * singleton = DIGIT ; 0 - 9 1416 * / %x41-57 ; A - W 1417 * / %x59-5A ; Y - Z 1418 * / %x61-77 ; a - w 1419 * / %x79-7A ; y - z 1420 * 1421 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1422 * The parser below doesn't try to cope with extension or privateuse 1423 * that could be added but that's not interoperable anyway 1424 * 1425 * Returns 1 if correct 0 otherwise 1426 **/ 1427int 1428xmlCheckLanguageID(const xmlChar * lang) 1429{ 1430 const xmlChar *cur = lang, *nxt; 1431 1432 if (cur == NULL) 1433 return (0); 1434 if (((cur[0] == 'i') && (cur[1] == '-')) || 1435 ((cur[0] == 'I') && (cur[1] == '-')) || 1436 ((cur[0] == 'x') && (cur[1] == '-')) || 1437 ((cur[0] == 'X') && (cur[1] == '-'))) { 1438 /* 1439 * Still allow IANA code and user code which were coming 1440 * from the previous version of the XML-1.0 specification 1441 * it's deprecated but we should not fail 1442 */ 1443 cur += 2; 1444 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1445 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1446 cur++; 1447 return(cur[0] == 0); 1448 } 1449 nxt = cur; 1450 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1451 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1452 nxt++; 1453 if (nxt - cur >= 4) { 1454 /* 1455 * Reserved 1456 */ 1457 if ((nxt - cur > 8) || (nxt[0] != 0)) 1458 return(0); 1459 return(1); 1460 } 1461 if (nxt - cur < 2) 1462 return(0); 1463 /* we got an ISO 639 code */ 1464 if (nxt[0] == 0) 1465 return(1); 1466 if (nxt[0] != '-') 1467 return(0); 1468 1469 nxt++; 1470 cur = nxt; 1471 /* now we can have extlang or script or region or variant */ 1472 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1473 goto region_m49; 1474 1475 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1476 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1477 nxt++; 1478 if (nxt - cur == 4) 1479 goto script; 1480 if (nxt - cur == 2) 1481 goto region; 1482 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1483 goto variant; 1484 if (nxt - cur != 3) 1485 return(0); 1486 /* we parsed an extlang */ 1487 if (nxt[0] == 0) 1488 return(1); 1489 if (nxt[0] != '-') 1490 return(0); 1491 1492 nxt++; 1493 cur = nxt; 1494 /* now we can have script or region or variant */ 1495 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1496 goto region_m49; 1497 1498 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1499 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1500 nxt++; 1501 if (nxt - cur == 2) 1502 goto region; 1503 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1504 goto variant; 1505 if (nxt - cur != 4) 1506 return(0); 1507 /* we parsed a script */ 1508script: 1509 if (nxt[0] == 0) 1510 return(1); 1511 if (nxt[0] != '-') 1512 return(0); 1513 1514 nxt++; 1515 cur = nxt; 1516 /* now we can have region or variant */ 1517 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1518 goto region_m49; 1519 1520 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1521 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1522 nxt++; 1523 1524 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1525 goto variant; 1526 if (nxt - cur != 2) 1527 return(0); 1528 /* we parsed a region */ 1529region: 1530 if (nxt[0] == 0) 1531 return(1); 1532 if (nxt[0] != '-') 1533 return(0); 1534 1535 nxt++; 1536 cur = nxt; 1537 /* now we can just have a variant */ 1538 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1539 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1540 nxt++; 1541 1542 if ((nxt - cur < 5) || (nxt - cur > 8)) 1543 return(0); 1544 1545 /* we parsed a variant */ 1546variant: 1547 if (nxt[0] == 0) 1548 return(1); 1549 if (nxt[0] != '-') 1550 return(0); 1551 /* extensions and private use subtags not checked */ 1552 return (1); 1553 1554region_m49: 1555 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1556 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1557 nxt += 3; 1558 goto region; 1559 } 1560 return(0); 1561} 1562 1563/************************************************************************ 1564 * * 1565 * Parser stacks related functions and macros * 1566 * * 1567 ************************************************************************/ 1568 1569static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1570 const xmlChar ** str); 1571 1572#ifdef SAX2 1573/** 1574 * nsPush: 1575 * @ctxt: an XML parser context 1576 * @prefix: the namespace prefix or NULL 1577 * @URL: the namespace name 1578 * 1579 * Pushes a new parser namespace on top of the ns stack 1580 * 1581 * Returns -1 in case of error, -2 if the namespace should be discarded 1582 * and the index in the stack otherwise. 1583 */ 1584static int 1585nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1586{ 1587 if (ctxt->options & XML_PARSE_NSCLEAN) { 1588 int i; 1589 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1590 if (ctxt->nsTab[i] == prefix) { 1591 /* in scope */ 1592 if (ctxt->nsTab[i + 1] == URL) 1593 return(-2); 1594 /* out of scope keep it */ 1595 break; 1596 } 1597 } 1598 } 1599 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1600 ctxt->nsMax = 10; 1601 ctxt->nsNr = 0; 1602 ctxt->nsTab = (const xmlChar **) 1603 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1604 if (ctxt->nsTab == NULL) { 1605 xmlErrMemory(ctxt, NULL); 1606 ctxt->nsMax = 0; 1607 return (-1); 1608 } 1609 } else if (ctxt->nsNr >= ctxt->nsMax) { 1610 const xmlChar ** tmp; 1611 ctxt->nsMax *= 2; 1612 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1613 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1614 if (tmp == NULL) { 1615 xmlErrMemory(ctxt, NULL); 1616 ctxt->nsMax /= 2; 1617 return (-1); 1618 } 1619 ctxt->nsTab = tmp; 1620 } 1621 ctxt->nsTab[ctxt->nsNr++] = prefix; 1622 ctxt->nsTab[ctxt->nsNr++] = URL; 1623 return (ctxt->nsNr); 1624} 1625/** 1626 * nsPop: 1627 * @ctxt: an XML parser context 1628 * @nr: the number to pop 1629 * 1630 * Pops the top @nr parser prefix/namespace from the ns stack 1631 * 1632 * Returns the number of namespaces removed 1633 */ 1634static int 1635nsPop(xmlParserCtxtPtr ctxt, int nr) 1636{ 1637 int i; 1638 1639 if (ctxt->nsTab == NULL) return(0); 1640 if (ctxt->nsNr < nr) { 1641 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1642 nr = ctxt->nsNr; 1643 } 1644 if (ctxt->nsNr <= 0) 1645 return (0); 1646 1647 for (i = 0;i < nr;i++) { 1648 ctxt->nsNr--; 1649 ctxt->nsTab[ctxt->nsNr] = NULL; 1650 } 1651 return(nr); 1652} 1653#endif 1654 1655static int 1656xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1657 const xmlChar **atts; 1658 int *attallocs; 1659 int maxatts; 1660 1661 if (ctxt->atts == NULL) { 1662 maxatts = 55; /* allow for 10 attrs by default */ 1663 atts = (const xmlChar **) 1664 xmlMalloc(maxatts * sizeof(xmlChar *)); 1665 if (atts == NULL) goto mem_error; 1666 ctxt->atts = atts; 1667 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1668 if (attallocs == NULL) goto mem_error; 1669 ctxt->attallocs = attallocs; 1670 ctxt->maxatts = maxatts; 1671 } else if (nr + 5 > ctxt->maxatts) { 1672 maxatts = (nr + 5) * 2; 1673 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1674 maxatts * sizeof(const xmlChar *)); 1675 if (atts == NULL) goto mem_error; 1676 ctxt->atts = atts; 1677 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1678 (maxatts / 5) * sizeof(int)); 1679 if (attallocs == NULL) goto mem_error; 1680 ctxt->attallocs = attallocs; 1681 ctxt->maxatts = maxatts; 1682 } 1683 return(ctxt->maxatts); 1684mem_error: 1685 xmlErrMemory(ctxt, NULL); 1686 return(-1); 1687} 1688 1689/** 1690 * inputPush: 1691 * @ctxt: an XML parser context 1692 * @value: the parser input 1693 * 1694 * Pushes a new parser input on top of the input stack 1695 * 1696 * Returns -1 in case of error, the index in the stack otherwise 1697 */ 1698int 1699inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1700{ 1701 if ((ctxt == NULL) || (value == NULL)) 1702 return(-1); 1703 if (ctxt->inputNr >= ctxt->inputMax) { 1704 ctxt->inputMax *= 2; 1705 ctxt->inputTab = 1706 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1707 ctxt->inputMax * 1708 sizeof(ctxt->inputTab[0])); 1709 if (ctxt->inputTab == NULL) { 1710 xmlErrMemory(ctxt, NULL); 1711 xmlFreeInputStream(value); 1712 ctxt->inputMax /= 2; 1713 value = NULL; 1714 return (-1); 1715 } 1716 } 1717 ctxt->inputTab[ctxt->inputNr] = value; 1718 ctxt->input = value; 1719 return (ctxt->inputNr++); 1720} 1721/** 1722 * inputPop: 1723 * @ctxt: an XML parser context 1724 * 1725 * Pops the top parser input from the input stack 1726 * 1727 * Returns the input just removed 1728 */ 1729xmlParserInputPtr 1730inputPop(xmlParserCtxtPtr ctxt) 1731{ 1732 xmlParserInputPtr ret; 1733 1734 if (ctxt == NULL) 1735 return(NULL); 1736 if (ctxt->inputNr <= 0) 1737 return (NULL); 1738 ctxt->inputNr--; 1739 if (ctxt->inputNr > 0) 1740 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1741 else 1742 ctxt->input = NULL; 1743 ret = ctxt->inputTab[ctxt->inputNr]; 1744 ctxt->inputTab[ctxt->inputNr] = NULL; 1745 return (ret); 1746} 1747/** 1748 * nodePush: 1749 * @ctxt: an XML parser context 1750 * @value: the element node 1751 * 1752 * Pushes a new element node on top of the node stack 1753 * 1754 * Returns -1 in case of error, the index in the stack otherwise 1755 */ 1756int 1757nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1758{ 1759 if (ctxt == NULL) return(0); 1760 if (ctxt->nodeNr >= ctxt->nodeMax) { 1761 xmlNodePtr *tmp; 1762 1763 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1764 ctxt->nodeMax * 2 * 1765 sizeof(ctxt->nodeTab[0])); 1766 if (tmp == NULL) { 1767 xmlErrMemory(ctxt, NULL); 1768 return (-1); 1769 } 1770 ctxt->nodeTab = tmp; 1771 ctxt->nodeMax *= 2; 1772 } 1773 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1774 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1775 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1776 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1777 xmlParserMaxDepth); 1778 xmlHaltParser(ctxt); 1779 return(-1); 1780 } 1781 ctxt->nodeTab[ctxt->nodeNr] = value; 1782 ctxt->node = value; 1783 return (ctxt->nodeNr++); 1784} 1785 1786/** 1787 * nodePop: 1788 * @ctxt: an XML parser context 1789 * 1790 * Pops the top element node from the node stack 1791 * 1792 * Returns the node just removed 1793 */ 1794xmlNodePtr 1795nodePop(xmlParserCtxtPtr ctxt) 1796{ 1797 xmlNodePtr ret; 1798 1799 if (ctxt == NULL) return(NULL); 1800 if (ctxt->nodeNr <= 0) 1801 return (NULL); 1802 ctxt->nodeNr--; 1803 if (ctxt->nodeNr > 0) 1804 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1805 else 1806 ctxt->node = NULL; 1807 ret = ctxt->nodeTab[ctxt->nodeNr]; 1808 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1809 return (ret); 1810} 1811 1812#ifdef LIBXML_PUSH_ENABLED 1813/** 1814 * nameNsPush: 1815 * @ctxt: an XML parser context 1816 * @value: the element name 1817 * @prefix: the element prefix 1818 * @URI: the element namespace name 1819 * 1820 * Pushes a new element name/prefix/URL on top of the name stack 1821 * 1822 * Returns -1 in case of error, the index in the stack otherwise 1823 */ 1824static int 1825nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1826 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1827{ 1828 if (ctxt->nameNr >= ctxt->nameMax) { 1829 const xmlChar * *tmp; 1830 void **tmp2; 1831 ctxt->nameMax *= 2; 1832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1833 ctxt->nameMax * 1834 sizeof(ctxt->nameTab[0])); 1835 if (tmp == NULL) { 1836 ctxt->nameMax /= 2; 1837 goto mem_error; 1838 } 1839 ctxt->nameTab = tmp; 1840 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1841 ctxt->nameMax * 3 * 1842 sizeof(ctxt->pushTab[0])); 1843 if (tmp2 == NULL) { 1844 ctxt->nameMax /= 2; 1845 goto mem_error; 1846 } 1847 ctxt->pushTab = tmp2; 1848 } 1849 ctxt->nameTab[ctxt->nameNr] = value; 1850 ctxt->name = value; 1851 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1852 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1853 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1854 return (ctxt->nameNr++); 1855mem_error: 1856 xmlErrMemory(ctxt, NULL); 1857 return (-1); 1858} 1859/** 1860 * nameNsPop: 1861 * @ctxt: an XML parser context 1862 * 1863 * Pops the top element/prefix/URI name from the name stack 1864 * 1865 * Returns the name just removed 1866 */ 1867static const xmlChar * 1868nameNsPop(xmlParserCtxtPtr ctxt) 1869{ 1870 const xmlChar *ret; 1871 1872 if (ctxt->nameNr <= 0) 1873 return (NULL); 1874 ctxt->nameNr--; 1875 if (ctxt->nameNr > 0) 1876 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1877 else 1878 ctxt->name = NULL; 1879 ret = ctxt->nameTab[ctxt->nameNr]; 1880 ctxt->nameTab[ctxt->nameNr] = NULL; 1881 return (ret); 1882} 1883#endif /* LIBXML_PUSH_ENABLED */ 1884 1885/** 1886 * namePush: 1887 * @ctxt: an XML parser context 1888 * @value: the element name 1889 * 1890 * Pushes a new element name on top of the name stack 1891 * 1892 * Returns -1 in case of error, the index in the stack otherwise 1893 */ 1894int 1895namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1896{ 1897 if (ctxt == NULL) return (-1); 1898 1899 if (ctxt->nameNr >= ctxt->nameMax) { 1900 const xmlChar * *tmp; 1901 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1902 ctxt->nameMax * 2 * 1903 sizeof(ctxt->nameTab[0])); 1904 if (tmp == NULL) { 1905 goto mem_error; 1906 } 1907 ctxt->nameTab = tmp; 1908 ctxt->nameMax *= 2; 1909 } 1910 ctxt->nameTab[ctxt->nameNr] = value; 1911 ctxt->name = value; 1912 return (ctxt->nameNr++); 1913mem_error: 1914 xmlErrMemory(ctxt, NULL); 1915 return (-1); 1916} 1917/** 1918 * namePop: 1919 * @ctxt: an XML parser context 1920 * 1921 * Pops the top element name from the name stack 1922 * 1923 * Returns the name just removed 1924 */ 1925const xmlChar * 1926namePop(xmlParserCtxtPtr ctxt) 1927{ 1928 const xmlChar *ret; 1929 1930 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1931 return (NULL); 1932 ctxt->nameNr--; 1933 if (ctxt->nameNr > 0) 1934 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1935 else 1936 ctxt->name = NULL; 1937 ret = ctxt->nameTab[ctxt->nameNr]; 1938 ctxt->nameTab[ctxt->nameNr] = NULL; 1939 return (ret); 1940} 1941 1942static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1943 if (ctxt->spaceNr >= ctxt->spaceMax) { 1944 int *tmp; 1945 1946 ctxt->spaceMax *= 2; 1947 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1948 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1949 if (tmp == NULL) { 1950 xmlErrMemory(ctxt, NULL); 1951 ctxt->spaceMax /=2; 1952 return(-1); 1953 } 1954 ctxt->spaceTab = tmp; 1955 } 1956 ctxt->spaceTab[ctxt->spaceNr] = val; 1957 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1958 return(ctxt->spaceNr++); 1959} 1960 1961static int spacePop(xmlParserCtxtPtr ctxt) { 1962 int ret; 1963 if (ctxt->spaceNr <= 0) return(0); 1964 ctxt->spaceNr--; 1965 if (ctxt->spaceNr > 0) 1966 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1967 else 1968 ctxt->space = &ctxt->spaceTab[0]; 1969 ret = ctxt->spaceTab[ctxt->spaceNr]; 1970 ctxt->spaceTab[ctxt->spaceNr] = -1; 1971 return(ret); 1972} 1973 1974/* 1975 * Macros for accessing the content. Those should be used only by the parser, 1976 * and not exported. 1977 * 1978 * Dirty macros, i.e. one often need to make assumption on the context to 1979 * use them 1980 * 1981 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1982 * To be used with extreme caution since operations consuming 1983 * characters may move the input buffer to a different location ! 1984 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1985 * This should be used internally by the parser 1986 * only to compare to ASCII values otherwise it would break when 1987 * running with UTF-8 encoding. 1988 * RAW same as CUR but in the input buffer, bypass any token 1989 * extraction that may have been done 1990 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1991 * to compare on ASCII based substring. 1992 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1993 * strings without newlines within the parser. 1994 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1995 * defined char within the parser. 1996 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1997 * 1998 * NEXT Skip to the next character, this does the proper decoding 1999 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2000 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2001 * CUR_CHAR(l) returns the current unicode character (int), set l 2002 * to the number of xmlChars used for the encoding [0-5]. 2003 * CUR_SCHAR same but operate on a string instead of the context 2004 * COPY_BUF copy the current unicode char to the target buffer, increment 2005 * the index 2006 * GROW, SHRINK handling of input buffers 2007 */ 2008 2009#define RAW (*ctxt->input->cur) 2010#define CUR (*ctxt->input->cur) 2011#define NXT(val) ctxt->input->cur[(val)] 2012#define CUR_PTR ctxt->input->cur 2013 2014#define CMP4( s, c1, c2, c3, c4 ) \ 2015 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2016 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2017#define CMP5( s, c1, c2, c3, c4, c5 ) \ 2018 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2019#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2020 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2021#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2022 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2023#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2024 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2025#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2026 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2027 ((unsigned char *) s)[ 8 ] == c9 ) 2028#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2029 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2030 ((unsigned char *) s)[ 9 ] == c10 ) 2031 2032#define SKIP(val) do { \ 2033 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2034 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2035 if ((*ctxt->input->cur == 0) && \ 2036 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2037 xmlPopInput(ctxt); \ 2038 } while (0) 2039 2040#define SKIPL(val) do { \ 2041 int skipl; \ 2042 for(skipl=0; skipl<val; skipl++) { \ 2043 if (*(ctxt->input->cur) == '\n') { \ 2044 ctxt->input->line++; ctxt->input->col = 1; \ 2045 } else ctxt->input->col++; \ 2046 ctxt->nbChars++; \ 2047 ctxt->input->cur++; \ 2048 } \ 2049 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2050 if ((*ctxt->input->cur == 0) && \ 2051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2052 xmlPopInput(ctxt); \ 2053 } while (0) 2054 2055#define SHRINK if ((ctxt->progressive == 0) && \ 2056 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2057 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2058 xmlSHRINK (ctxt); 2059 2060static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2061 xmlParserInputShrink(ctxt->input); 2062 if ((*ctxt->input->cur == 0) && 2063 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2064 xmlPopInput(ctxt); 2065 } 2066 2067#define GROW if ((ctxt->progressive == 0) && \ 2068 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2069 xmlGROW (ctxt); 2070 2071static void xmlGROW (xmlParserCtxtPtr ctxt) { 2072 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2073 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2074 2075 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2076 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2077 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2078 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2080 xmlHaltParser(ctxt); 2081 return; 2082 } 2083 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2084 if ((ctxt->input->cur > ctxt->input->end) || 2085 (ctxt->input->cur < ctxt->input->base)) { 2086 xmlHaltParser(ctxt); 2087 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2088 return; 2089 } 2090 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2091 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2092 xmlPopInput(ctxt); 2093} 2094 2095#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2096 2097#define NEXT xmlNextChar(ctxt) 2098 2099#define NEXT1 { \ 2100 ctxt->input->col++; \ 2101 ctxt->input->cur++; \ 2102 ctxt->nbChars++; \ 2103 if (*ctxt->input->cur == 0) \ 2104 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2105 } 2106 2107#define NEXTL(l) do { \ 2108 if (*(ctxt->input->cur) == '\n') { \ 2109 ctxt->input->line++; ctxt->input->col = 1; \ 2110 } else ctxt->input->col++; \ 2111 ctxt->input->cur += l; \ 2112 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2113 } while (0) 2114 2115#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2116#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2117 2118#define COPY_BUF(l,b,i,v) \ 2119 if (l == 1) b[i++] = (xmlChar) v; \ 2120 else i += xmlCopyCharMultiByte(&b[i],v) 2121 2122/** 2123 * xmlSkipBlankChars: 2124 * @ctxt: the XML parser context 2125 * 2126 * skip all blanks character found at that point in the input streams. 2127 * It pops up finished entities in the process if allowable at that point. 2128 * 2129 * Returns the number of space chars skipped 2130 */ 2131 2132int 2133xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2134 int res = 0; 2135 2136 /* 2137 * It's Okay to use CUR/NEXT here since all the blanks are on 2138 * the ASCII range. 2139 */ 2140 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2141 const xmlChar *cur; 2142 /* 2143 * if we are in the document content, go really fast 2144 */ 2145 cur = ctxt->input->cur; 2146 while (IS_BLANK_CH(*cur)) { 2147 if (*cur == '\n') { 2148 ctxt->input->line++; ctxt->input->col = 1; 2149 } else { 2150 ctxt->input->col++; 2151 } 2152 cur++; 2153 res++; 2154 if (*cur == 0) { 2155 ctxt->input->cur = cur; 2156 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2157 cur = ctxt->input->cur; 2158 } 2159 } 2160 ctxt->input->cur = cur; 2161 } else { 2162 int cur; 2163 do { 2164 cur = CUR; 2165 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ 2166 (ctxt->instate != XML_PARSER_EOF))) { 2167 NEXT; 2168 cur = CUR; 2169 res++; 2170 } 2171 while ((cur == 0) && (ctxt->inputNr > 1) && 2172 (ctxt->instate != XML_PARSER_COMMENT)) { 2173 xmlPopInput(ctxt); 2174 cur = CUR; 2175 } 2176 /* 2177 * Need to handle support of entities branching here 2178 */ 2179 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2180 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ 2181 (ctxt->instate != XML_PARSER_EOF)); 2182 } 2183 return(res); 2184} 2185 2186/************************************************************************ 2187 * * 2188 * Commodity functions to handle entities * 2189 * * 2190 ************************************************************************/ 2191 2192/** 2193 * xmlPopInput: 2194 * @ctxt: an XML parser context 2195 * 2196 * xmlPopInput: the current input pointed by ctxt->input came to an end 2197 * pop it and return the next char. 2198 * 2199 * Returns the current xmlChar in the parser context 2200 */ 2201xmlChar 2202xmlPopInput(xmlParserCtxtPtr ctxt) { 2203 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2204 if (xmlParserDebugEntities) 2205 xmlGenericError(xmlGenericErrorContext, 2206 "Popping input %d\n", ctxt->inputNr); 2207 xmlFreeInputStream(inputPop(ctxt)); 2208 if ((*ctxt->input->cur == 0) && 2209 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2210 return(xmlPopInput(ctxt)); 2211 return(CUR); 2212} 2213 2214/** 2215 * xmlPushInput: 2216 * @ctxt: an XML parser context 2217 * @input: an XML parser input fragment (entity, XML fragment ...). 2218 * 2219 * xmlPushInput: switch to a new input stream which is stacked on top 2220 * of the previous one(s). 2221 * Returns -1 in case of error or the index in the input stack 2222 */ 2223int 2224xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2225 int ret; 2226 if (input == NULL) return(-1); 2227 2228 if (xmlParserDebugEntities) { 2229 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2230 xmlGenericError(xmlGenericErrorContext, 2231 "%s(%d): ", ctxt->input->filename, 2232 ctxt->input->line); 2233 xmlGenericError(xmlGenericErrorContext, 2234 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2235 } 2236 ret = inputPush(ctxt, input); 2237 if (ctxt->instate == XML_PARSER_EOF) 2238 return(-1); 2239 GROW; 2240 return(ret); 2241} 2242 2243/** 2244 * xmlParseCharRef: 2245 * @ctxt: an XML parser context 2246 * 2247 * parse Reference declarations 2248 * 2249 * [66] CharRef ::= '&#' [0-9]+ ';' | 2250 * '&#x' [0-9a-fA-F]+ ';' 2251 * 2252 * [ WFC: Legal Character ] 2253 * Characters referred to using character references must match the 2254 * production for Char. 2255 * 2256 * Returns the value parsed (as an int), 0 in case of error 2257 */ 2258int 2259xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2260 unsigned int val = 0; 2261 int count = 0; 2262 unsigned int outofrange = 0; 2263 2264 /* 2265 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2266 */ 2267 if ((RAW == '&') && (NXT(1) == '#') && 2268 (NXT(2) == 'x')) { 2269 SKIP(3); 2270 GROW; 2271 while (RAW != ';') { /* loop blocked by count */ 2272 if (count++ > 20) { 2273 count = 0; 2274 GROW; 2275 if (ctxt->instate == XML_PARSER_EOF) 2276 return(0); 2277 } 2278 if ((RAW >= '0') && (RAW <= '9')) 2279 val = val * 16 + (CUR - '0'); 2280 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2281 val = val * 16 + (CUR - 'a') + 10; 2282 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2283 val = val * 16 + (CUR - 'A') + 10; 2284 else { 2285 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2286 val = 0; 2287 break; 2288 } 2289 if (val > 0x10FFFF) 2290 outofrange = val; 2291 2292 NEXT; 2293 count++; 2294 } 2295 if (RAW == ';') { 2296 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2297 ctxt->input->col++; 2298 ctxt->nbChars ++; 2299 ctxt->input->cur++; 2300 } 2301 } else if ((RAW == '&') && (NXT(1) == '#')) { 2302 SKIP(2); 2303 GROW; 2304 while (RAW != ';') { /* loop blocked by count */ 2305 if (count++ > 20) { 2306 count = 0; 2307 GROW; 2308 if (ctxt->instate == XML_PARSER_EOF) 2309 return(0); 2310 } 2311 if ((RAW >= '0') && (RAW <= '9')) 2312 val = val * 10 + (CUR - '0'); 2313 else { 2314 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2315 val = 0; 2316 break; 2317 } 2318 if (val > 0x10FFFF) 2319 outofrange = val; 2320 2321 NEXT; 2322 count++; 2323 } 2324 if (RAW == ';') { 2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2326 ctxt->input->col++; 2327 ctxt->nbChars ++; 2328 ctxt->input->cur++; 2329 } 2330 } else { 2331 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2332 } 2333 2334 /* 2335 * [ WFC: Legal Character ] 2336 * Characters referred to using character references must match the 2337 * production for Char. 2338 */ 2339 if ((IS_CHAR(val) && (outofrange == 0))) { 2340 return(val); 2341 } else { 2342 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2343 "xmlParseCharRef: invalid xmlChar value %d\n", 2344 val); 2345 } 2346 return(0); 2347} 2348 2349/** 2350 * xmlParseStringCharRef: 2351 * @ctxt: an XML parser context 2352 * @str: a pointer to an index in the string 2353 * 2354 * parse Reference declarations, variant parsing from a string rather 2355 * than an an input flow. 2356 * 2357 * [66] CharRef ::= '&#' [0-9]+ ';' | 2358 * '&#x' [0-9a-fA-F]+ ';' 2359 * 2360 * [ WFC: Legal Character ] 2361 * Characters referred to using character references must match the 2362 * production for Char. 2363 * 2364 * Returns the value parsed (as an int), 0 in case of error, str will be 2365 * updated to the current value of the index 2366 */ 2367static int 2368xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2369 const xmlChar *ptr; 2370 xmlChar cur; 2371 unsigned int val = 0; 2372 unsigned int outofrange = 0; 2373 2374 if ((str == NULL) || (*str == NULL)) return(0); 2375 ptr = *str; 2376 cur = *ptr; 2377 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2378 ptr += 3; 2379 cur = *ptr; 2380 while (cur != ';') { /* Non input consuming loop */ 2381 if ((cur >= '0') && (cur <= '9')) 2382 val = val * 16 + (cur - '0'); 2383 else if ((cur >= 'a') && (cur <= 'f')) 2384 val = val * 16 + (cur - 'a') + 10; 2385 else if ((cur >= 'A') && (cur <= 'F')) 2386 val = val * 16 + (cur - 'A') + 10; 2387 else { 2388 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2389 val = 0; 2390 break; 2391 } 2392 if (val > 0x10FFFF) 2393 outofrange = val; 2394 2395 ptr++; 2396 cur = *ptr; 2397 } 2398 if (cur == ';') 2399 ptr++; 2400 } else if ((cur == '&') && (ptr[1] == '#')){ 2401 ptr += 2; 2402 cur = *ptr; 2403 while (cur != ';') { /* Non input consuming loops */ 2404 if ((cur >= '0') && (cur <= '9')) 2405 val = val * 10 + (cur - '0'); 2406 else { 2407 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2408 val = 0; 2409 break; 2410 } 2411 if (val > 0x10FFFF) 2412 outofrange = val; 2413 2414 ptr++; 2415 cur = *ptr; 2416 } 2417 if (cur == ';') 2418 ptr++; 2419 } else { 2420 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2421 return(0); 2422 } 2423 *str = ptr; 2424 2425 /* 2426 * [ WFC: Legal Character ] 2427 * Characters referred to using character references must match the 2428 * production for Char. 2429 */ 2430 if ((IS_CHAR(val) && (outofrange == 0))) { 2431 return(val); 2432 } else { 2433 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2434 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2435 val); 2436 } 2437 return(0); 2438} 2439 2440/** 2441 * xmlNewBlanksWrapperInputStream: 2442 * @ctxt: an XML parser context 2443 * @entity: an Entity pointer 2444 * 2445 * Create a new input stream for wrapping 2446 * blanks around a PEReference 2447 * 2448 * Returns the new input stream or NULL 2449 */ 2450 2451static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2452 2453static xmlParserInputPtr 2454xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2455 xmlParserInputPtr input; 2456 xmlChar *buffer; 2457 size_t length; 2458 if (entity == NULL) { 2459 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2460 "xmlNewBlanksWrapperInputStream entity\n"); 2461 return(NULL); 2462 } 2463 if (xmlParserDebugEntities) 2464 xmlGenericError(xmlGenericErrorContext, 2465 "new blanks wrapper for entity: %s\n", entity->name); 2466 input = xmlNewInputStream(ctxt); 2467 if (input == NULL) { 2468 return(NULL); 2469 } 2470 length = xmlStrlen(entity->name) + 5; 2471 buffer = xmlMallocAtomic(length); 2472 if (buffer == NULL) { 2473 xmlErrMemory(ctxt, NULL); 2474 xmlFree(input); 2475 return(NULL); 2476 } 2477 buffer [0] = ' '; 2478 buffer [1] = '%'; 2479 buffer [length-3] = ';'; 2480 buffer [length-2] = ' '; 2481 buffer [length-1] = 0; 2482 memcpy(buffer + 2, entity->name, length - 5); 2483 input->free = deallocblankswrapper; 2484 input->base = buffer; 2485 input->cur = buffer; 2486 input->length = length; 2487 input->end = &buffer[length]; 2488 return(input); 2489} 2490 2491/** 2492 * xmlParserHandlePEReference: 2493 * @ctxt: the parser context 2494 * 2495 * [69] PEReference ::= '%' Name ';' 2496 * 2497 * [ WFC: No Recursion ] 2498 * A parsed entity must not contain a recursive 2499 * reference to itself, either directly or indirectly. 2500 * 2501 * [ WFC: Entity Declared ] 2502 * In a document without any DTD, a document with only an internal DTD 2503 * subset which contains no parameter entity references, or a document 2504 * with "standalone='yes'", ... ... The declaration of a parameter 2505 * entity must precede any reference to it... 2506 * 2507 * [ VC: Entity Declared ] 2508 * In a document with an external subset or external parameter entities 2509 * with "standalone='no'", ... ... The declaration of a parameter entity 2510 * must precede any reference to it... 2511 * 2512 * [ WFC: In DTD ] 2513 * Parameter-entity references may only appear in the DTD. 2514 * NOTE: misleading but this is handled. 2515 * 2516 * A PEReference may have been detected in the current input stream 2517 * the handling is done accordingly to 2518 * http://www.w3.org/TR/REC-xml#entproc 2519 * i.e. 2520 * - Included in literal in entity values 2521 * - Included as Parameter Entity reference within DTDs 2522 */ 2523void 2524xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2525 const xmlChar *name; 2526 xmlEntityPtr entity = NULL; 2527 xmlParserInputPtr input; 2528 2529 if (RAW != '%') return; 2530 switch(ctxt->instate) { 2531 case XML_PARSER_CDATA_SECTION: 2532 return; 2533 case XML_PARSER_COMMENT: 2534 return; 2535 case XML_PARSER_START_TAG: 2536 return; 2537 case XML_PARSER_END_TAG: 2538 return; 2539 case XML_PARSER_EOF: 2540 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2541 return; 2542 case XML_PARSER_PROLOG: 2543 case XML_PARSER_START: 2544 case XML_PARSER_MISC: 2545 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2546 return; 2547 case XML_PARSER_ENTITY_DECL: 2548 case XML_PARSER_CONTENT: 2549 case XML_PARSER_ATTRIBUTE_VALUE: 2550 case XML_PARSER_PI: 2551 case XML_PARSER_SYSTEM_LITERAL: 2552 case XML_PARSER_PUBLIC_LITERAL: 2553 /* we just ignore it there */ 2554 return; 2555 case XML_PARSER_EPILOG: 2556 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2557 return; 2558 case XML_PARSER_ENTITY_VALUE: 2559 /* 2560 * NOTE: in the case of entity values, we don't do the 2561 * substitution here since we need the literal 2562 * entity value to be able to save the internal 2563 * subset of the document. 2564 * This will be handled by xmlStringDecodeEntities 2565 */ 2566 return; 2567 case XML_PARSER_DTD: 2568 /* 2569 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2570 * In the internal DTD subset, parameter-entity references 2571 * can occur only where markup declarations can occur, not 2572 * within markup declarations. 2573 * In that case this is handled in xmlParseMarkupDecl 2574 */ 2575 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2576 return; 2577 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2578 return; 2579 break; 2580 case XML_PARSER_IGNORE: 2581 return; 2582 } 2583 2584 NEXT; 2585 name = xmlParseName(ctxt); 2586 if (xmlParserDebugEntities) 2587 xmlGenericError(xmlGenericErrorContext, 2588 "PEReference: %s\n", name); 2589 if (name == NULL) { 2590 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2591 } else { 2592 if (RAW == ';') { 2593 NEXT; 2594 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2595 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2596 if (ctxt->instate == XML_PARSER_EOF) 2597 return; 2598 if (entity == NULL) { 2599 2600 /* 2601 * [ WFC: Entity Declared ] 2602 * In a document without any DTD, a document with only an 2603 * internal DTD subset which contains no parameter entity 2604 * references, or a document with "standalone='yes'", ... 2605 * ... The declaration of a parameter entity must precede 2606 * any reference to it... 2607 */ 2608 if ((ctxt->standalone == 1) || 2609 ((ctxt->hasExternalSubset == 0) && 2610 (ctxt->hasPErefs == 0))) { 2611 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2612 "PEReference: %%%s; not found\n", name); 2613 } else { 2614 /* 2615 * [ VC: Entity Declared ] 2616 * In a document with an external subset or external 2617 * parameter entities with "standalone='no'", ... 2618 * ... The declaration of a parameter entity must precede 2619 * any reference to it... 2620 */ 2621 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2622 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2623 "PEReference: %%%s; not found\n", 2624 name, NULL); 2625 } else 2626 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2627 "PEReference: %%%s; not found\n", 2628 name, NULL); 2629 ctxt->valid = 0; 2630 } 2631 xmlParserEntityCheck(ctxt, 0, NULL, 0); 2632 } else if (ctxt->input->free != deallocblankswrapper) { 2633 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2634 if (xmlPushInput(ctxt, input) < 0) 2635 return; 2636 } else { 2637 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2638 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2639 xmlChar start[4]; 2640 xmlCharEncoding enc; 2641 2642 /* 2643 * Note: external parameter entities will not be loaded, it 2644 * is not required for a non-validating parser, unless the 2645 * option of validating, or substituting entities were 2646 * given. Doing so is far more secure as the parser will 2647 * only process data coming from the document entity by 2648 * default. 2649 */ 2650 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2651 ((ctxt->options & XML_PARSE_NOENT) == 0) && 2652 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2653 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 2654 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 2655 (ctxt->replaceEntities == 0) && 2656 (ctxt->validate == 0)) 2657 return; 2658 2659 /* 2660 * handle the extra spaces added before and after 2661 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2662 * this is done independently. 2663 */ 2664 input = xmlNewEntityInputStream(ctxt, entity); 2665 if (xmlPushInput(ctxt, input) < 0) 2666 return; 2667 2668 /* 2669 * Get the 4 first bytes and decode the charset 2670 * if enc != XML_CHAR_ENCODING_NONE 2671 * plug some encoding conversion routines. 2672 * Note that, since we may have some non-UTF8 2673 * encoding (like UTF16, bug 135229), the 'length' 2674 * is not known, but we can calculate based upon 2675 * the amount of data in the buffer. 2676 */ 2677 GROW 2678 if (ctxt->instate == XML_PARSER_EOF) 2679 return; 2680 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2681 start[0] = RAW; 2682 start[1] = NXT(1); 2683 start[2] = NXT(2); 2684 start[3] = NXT(3); 2685 enc = xmlDetectCharEncoding(start, 4); 2686 if (enc != XML_CHAR_ENCODING_NONE) { 2687 xmlSwitchEncoding(ctxt, enc); 2688 } 2689 } 2690 2691 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2692 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2693 (IS_BLANK_CH(NXT(5)))) { 2694 xmlParseTextDecl(ctxt); 2695 } 2696 } else { 2697 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2698 "PEReference: %s is not a parameter entity\n", 2699 name); 2700 } 2701 } 2702 } else { 2703 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2704 } 2705 } 2706} 2707 2708/* 2709 * Macro used to grow the current buffer. 2710 * buffer##_size is expected to be a size_t 2711 * mem_error: is expected to handle memory allocation failures 2712 */ 2713#define growBuffer(buffer, n) { \ 2714 xmlChar *tmp; \ 2715 size_t new_size = buffer##_size * 2 + n; \ 2716 if (new_size < buffer##_size) goto mem_error; \ 2717 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2718 if (tmp == NULL) goto mem_error; \ 2719 buffer = tmp; \ 2720 buffer##_size = new_size; \ 2721} 2722 2723/** 2724 * xmlStringLenDecodeEntities: 2725 * @ctxt: the parser context 2726 * @str: the input string 2727 * @len: the string length 2728 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2729 * @end: an end marker xmlChar, 0 if none 2730 * @end2: an end marker xmlChar, 0 if none 2731 * @end3: an end marker xmlChar, 0 if none 2732 * 2733 * Takes a entity string content and process to do the adequate substitutions. 2734 * 2735 * [67] Reference ::= EntityRef | CharRef 2736 * 2737 * [69] PEReference ::= '%' Name ';' 2738 * 2739 * Returns A newly allocated string with the substitution done. The caller 2740 * must deallocate it ! 2741 */ 2742xmlChar * 2743xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2744 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2745 xmlChar *buffer = NULL; 2746 size_t buffer_size = 0; 2747 size_t nbchars = 0; 2748 2749 xmlChar *current = NULL; 2750 xmlChar *rep = NULL; 2751 const xmlChar *last; 2752 xmlEntityPtr ent; 2753 int c,l; 2754 2755 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2756 return(NULL); 2757 last = str + len; 2758 2759 if (((ctxt->depth > 40) && 2760 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2761 (ctxt->depth > 1024)) { 2762 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2763 return(NULL); 2764 } 2765 2766 /* 2767 * allocate a translation buffer. 2768 */ 2769 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2770 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2771 if (buffer == NULL) goto mem_error; 2772 2773 /* 2774 * OK loop until we reach one of the ending char or a size limit. 2775 * we are operating on already parsed values. 2776 */ 2777 if (str < last) 2778 c = CUR_SCHAR(str, l); 2779 else 2780 c = 0; 2781 while ((c != 0) && (c != end) && /* non input consuming loop */ 2782 (c != end2) && (c != end3)) { 2783 2784 if (c == 0) break; 2785 if ((c == '&') && (str[1] == '#')) { 2786 int val = xmlParseStringCharRef(ctxt, &str); 2787 if (val != 0) { 2788 COPY_BUF(0,buffer,nbchars,val); 2789 } 2790 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2791 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2792 } 2793 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2794 if (xmlParserDebugEntities) 2795 xmlGenericError(xmlGenericErrorContext, 2796 "String decoding Entity Reference: %.30s\n", 2797 str); 2798 ent = xmlParseStringEntityRef(ctxt, &str); 2799 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2800 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2801 goto int_error; 2802 xmlParserEntityCheck(ctxt, 0, ent, 0); 2803 if (ent != NULL) 2804 ctxt->nbentities += ent->checked / 2; 2805 if ((ent != NULL) && 2806 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2807 if (ent->content != NULL) { 2808 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2809 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2810 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2811 } 2812 } else { 2813 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2814 "predefined entity has no content\n"); 2815 } 2816 } else if ((ent != NULL) && (ent->content != NULL)) { 2817 ctxt->depth++; 2818 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2819 0, 0, 0); 2820 ctxt->depth--; 2821 2822 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2823 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2824 goto int_error; 2825 2826 if (rep != NULL) { 2827 current = rep; 2828 while (*current != 0) { /* non input consuming loop */ 2829 buffer[nbchars++] = *current++; 2830 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2831 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2832 goto int_error; 2833 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2834 } 2835 } 2836 xmlFree(rep); 2837 rep = NULL; 2838 } 2839 } else if (ent != NULL) { 2840 int i = xmlStrlen(ent->name); 2841 const xmlChar *cur = ent->name; 2842 2843 buffer[nbchars++] = '&'; 2844 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2845 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2846 } 2847 for (;i > 0;i--) 2848 buffer[nbchars++] = *cur++; 2849 buffer[nbchars++] = ';'; 2850 } 2851 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2852 if (xmlParserDebugEntities) 2853 xmlGenericError(xmlGenericErrorContext, 2854 "String decoding PE Reference: %.30s\n", str); 2855 ent = xmlParseStringPEReference(ctxt, &str); 2856 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2857 goto int_error; 2858 xmlParserEntityCheck(ctxt, 0, ent, 0); 2859 if (ent != NULL) 2860 ctxt->nbentities += ent->checked / 2; 2861 if (ent != NULL) { 2862 if (ent->content == NULL) { 2863 xmlLoadEntityContent(ctxt, ent); 2864 } 2865 ctxt->depth++; 2866 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2867 0, 0, 0); 2868 ctxt->depth--; 2869 if (rep != NULL) { 2870 current = rep; 2871 while (*current != 0) { /* non input consuming loop */ 2872 buffer[nbchars++] = *current++; 2873 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2874 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2875 goto int_error; 2876 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2877 } 2878 } 2879 xmlFree(rep); 2880 rep = NULL; 2881 } 2882 } 2883 } else { 2884 COPY_BUF(l,buffer,nbchars,c); 2885 str += l; 2886 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2887 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2888 } 2889 } 2890 if (str < last) 2891 c = CUR_SCHAR(str, l); 2892 else 2893 c = 0; 2894 } 2895 buffer[nbchars] = 0; 2896 return(buffer); 2897 2898mem_error: 2899 xmlErrMemory(ctxt, NULL); 2900int_error: 2901 if (rep != NULL) 2902 xmlFree(rep); 2903 if (buffer != NULL) 2904 xmlFree(buffer); 2905 return(NULL); 2906} 2907 2908/** 2909 * xmlStringDecodeEntities: 2910 * @ctxt: the parser context 2911 * @str: the input string 2912 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2913 * @end: an end marker xmlChar, 0 if none 2914 * @end2: an end marker xmlChar, 0 if none 2915 * @end3: an end marker xmlChar, 0 if none 2916 * 2917 * Takes a entity string content and process to do the adequate substitutions. 2918 * 2919 * [67] Reference ::= EntityRef | CharRef 2920 * 2921 * [69] PEReference ::= '%' Name ';' 2922 * 2923 * Returns A newly allocated string with the substitution done. The caller 2924 * must deallocate it ! 2925 */ 2926xmlChar * 2927xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2928 xmlChar end, xmlChar end2, xmlChar end3) { 2929 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2930 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2931 end, end2, end3)); 2932} 2933 2934/************************************************************************ 2935 * * 2936 * Commodity functions, cleanup needed ? * 2937 * * 2938 ************************************************************************/ 2939 2940/** 2941 * areBlanks: 2942 * @ctxt: an XML parser context 2943 * @str: a xmlChar * 2944 * @len: the size of @str 2945 * @blank_chars: we know the chars are blanks 2946 * 2947 * Is this a sequence of blank chars that one can ignore ? 2948 * 2949 * Returns 1 if ignorable 0 otherwise. 2950 */ 2951 2952static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2953 int blank_chars) { 2954 int i, ret; 2955 xmlNodePtr lastChild; 2956 2957 /* 2958 * Don't spend time trying to differentiate them, the same callback is 2959 * used ! 2960 */ 2961 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2962 return(0); 2963 2964 /* 2965 * Check for xml:space value. 2966 */ 2967 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2968 (*(ctxt->space) == -2)) 2969 return(0); 2970 2971 /* 2972 * Check that the string is made of blanks 2973 */ 2974 if (blank_chars == 0) { 2975 for (i = 0;i < len;i++) 2976 if (!(IS_BLANK_CH(str[i]))) return(0); 2977 } 2978 2979 /* 2980 * Look if the element is mixed content in the DTD if available 2981 */ 2982 if (ctxt->node == NULL) return(0); 2983 if (ctxt->myDoc != NULL) { 2984 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2985 if (ret == 0) return(1); 2986 if (ret == 1) return(0); 2987 } 2988 2989 /* 2990 * Otherwise, heuristic :-\ 2991 */ 2992 if ((RAW != '<') && (RAW != 0xD)) return(0); 2993 if ((ctxt->node->children == NULL) && 2994 (RAW == '<') && (NXT(1) == '/')) return(0); 2995 2996 lastChild = xmlGetLastChild(ctxt->node); 2997 if (lastChild == NULL) { 2998 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2999 (ctxt->node->content != NULL)) return(0); 3000 } else if (xmlNodeIsText(lastChild)) 3001 return(0); 3002 else if ((ctxt->node->children != NULL) && 3003 (xmlNodeIsText(ctxt->node->children))) 3004 return(0); 3005 return(1); 3006} 3007 3008/************************************************************************ 3009 * * 3010 * Extra stuff for namespace support * 3011 * Relates to http://www.w3.org/TR/WD-xml-names * 3012 * * 3013 ************************************************************************/ 3014 3015/** 3016 * xmlSplitQName: 3017 * @ctxt: an XML parser context 3018 * @name: an XML parser context 3019 * @prefix: a xmlChar ** 3020 * 3021 * parse an UTF8 encoded XML qualified name string 3022 * 3023 * [NS 5] QName ::= (Prefix ':')? LocalPart 3024 * 3025 * [NS 6] Prefix ::= NCName 3026 * 3027 * [NS 7] LocalPart ::= NCName 3028 * 3029 * Returns the local part, and prefix is updated 3030 * to get the Prefix if any. 3031 */ 3032 3033xmlChar * 3034xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3035 xmlChar buf[XML_MAX_NAMELEN + 5]; 3036 xmlChar *buffer = NULL; 3037 int len = 0; 3038 int max = XML_MAX_NAMELEN; 3039 xmlChar *ret = NULL; 3040 const xmlChar *cur = name; 3041 int c; 3042 3043 if (prefix == NULL) return(NULL); 3044 *prefix = NULL; 3045 3046 if (cur == NULL) return(NULL); 3047 3048#ifndef XML_XML_NAMESPACE 3049 /* xml: prefix is not really a namespace */ 3050 if ((cur[0] == 'x') && (cur[1] == 'm') && 3051 (cur[2] == 'l') && (cur[3] == ':')) 3052 return(xmlStrdup(name)); 3053#endif 3054 3055 /* nasty but well=formed */ 3056 if (cur[0] == ':') 3057 return(xmlStrdup(name)); 3058 3059 c = *cur++; 3060 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3061 buf[len++] = c; 3062 c = *cur++; 3063 } 3064 if (len >= max) { 3065 /* 3066 * Okay someone managed to make a huge name, so he's ready to pay 3067 * for the processing speed. 3068 */ 3069 max = len * 2; 3070 3071 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3072 if (buffer == NULL) { 3073 xmlErrMemory(ctxt, NULL); 3074 return(NULL); 3075 } 3076 memcpy(buffer, buf, len); 3077 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3078 if (len + 10 > max) { 3079 xmlChar *tmp; 3080 3081 max *= 2; 3082 tmp = (xmlChar *) xmlRealloc(buffer, 3083 max * sizeof(xmlChar)); 3084 if (tmp == NULL) { 3085 xmlFree(buffer); 3086 xmlErrMemory(ctxt, NULL); 3087 return(NULL); 3088 } 3089 buffer = tmp; 3090 } 3091 buffer[len++] = c; 3092 c = *cur++; 3093 } 3094 buffer[len] = 0; 3095 } 3096 3097 if ((c == ':') && (*cur == 0)) { 3098 if (buffer != NULL) 3099 xmlFree(buffer); 3100 *prefix = NULL; 3101 return(xmlStrdup(name)); 3102 } 3103 3104 if (buffer == NULL) 3105 ret = xmlStrndup(buf, len); 3106 else { 3107 ret = buffer; 3108 buffer = NULL; 3109 max = XML_MAX_NAMELEN; 3110 } 3111 3112 3113 if (c == ':') { 3114 c = *cur; 3115 *prefix = ret; 3116 if (c == 0) { 3117 return(xmlStrndup(BAD_CAST "", 0)); 3118 } 3119 len = 0; 3120 3121 /* 3122 * Check that the first character is proper to start 3123 * a new name 3124 */ 3125 if (!(((c >= 0x61) && (c <= 0x7A)) || 3126 ((c >= 0x41) && (c <= 0x5A)) || 3127 (c == '_') || (c == ':'))) { 3128 int l; 3129 int first = CUR_SCHAR(cur, l); 3130 3131 if (!IS_LETTER(first) && (first != '_')) { 3132 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3133 "Name %s is not XML Namespace compliant\n", 3134 name); 3135 } 3136 } 3137 cur++; 3138 3139 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3140 buf[len++] = c; 3141 c = *cur++; 3142 } 3143 if (len >= max) { 3144 /* 3145 * Okay someone managed to make a huge name, so he's ready to pay 3146 * for the processing speed. 3147 */ 3148 max = len * 2; 3149 3150 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3151 if (buffer == NULL) { 3152 xmlErrMemory(ctxt, NULL); 3153 return(NULL); 3154 } 3155 memcpy(buffer, buf, len); 3156 while (c != 0) { /* tested bigname2.xml */ 3157 if (len + 10 > max) { 3158 xmlChar *tmp; 3159 3160 max *= 2; 3161 tmp = (xmlChar *) xmlRealloc(buffer, 3162 max * sizeof(xmlChar)); 3163 if (tmp == NULL) { 3164 xmlErrMemory(ctxt, NULL); 3165 xmlFree(buffer); 3166 return(NULL); 3167 } 3168 buffer = tmp; 3169 } 3170 buffer[len++] = c; 3171 c = *cur++; 3172 } 3173 buffer[len] = 0; 3174 } 3175 3176 if (buffer == NULL) 3177 ret = xmlStrndup(buf, len); 3178 else { 3179 ret = buffer; 3180 } 3181 } 3182 3183 return(ret); 3184} 3185 3186/************************************************************************ 3187 * * 3188 * The parser itself * 3189 * Relates to http://www.w3.org/TR/REC-xml * 3190 * * 3191 ************************************************************************/ 3192 3193/************************************************************************ 3194 * * 3195 * Routines to parse Name, NCName and NmToken * 3196 * * 3197 ************************************************************************/ 3198#ifdef DEBUG 3199static unsigned long nbParseName = 0; 3200static unsigned long nbParseNmToken = 0; 3201static unsigned long nbParseNCName = 0; 3202static unsigned long nbParseNCNameComplex = 0; 3203static unsigned long nbParseNameComplex = 0; 3204static unsigned long nbParseStringName = 0; 3205#endif 3206 3207/* 3208 * The two following functions are related to the change of accepted 3209 * characters for Name and NmToken in the Revision 5 of XML-1.0 3210 * They correspond to the modified production [4] and the new production [4a] 3211 * changes in that revision. Also note that the macros used for the 3212 * productions Letter, Digit, CombiningChar and Extender are not needed 3213 * anymore. 3214 * We still keep compatibility to pre-revision5 parsing semantic if the 3215 * new XML_PARSE_OLD10 option is given to the parser. 3216 */ 3217static int 3218xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3219 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3220 /* 3221 * Use the new checks of production [4] [4a] amd [5] of the 3222 * Update 5 of XML-1.0 3223 */ 3224 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3225 (((c >= 'a') && (c <= 'z')) || 3226 ((c >= 'A') && (c <= 'Z')) || 3227 (c == '_') || (c == ':') || 3228 ((c >= 0xC0) && (c <= 0xD6)) || 3229 ((c >= 0xD8) && (c <= 0xF6)) || 3230 ((c >= 0xF8) && (c <= 0x2FF)) || 3231 ((c >= 0x370) && (c <= 0x37D)) || 3232 ((c >= 0x37F) && (c <= 0x1FFF)) || 3233 ((c >= 0x200C) && (c <= 0x200D)) || 3234 ((c >= 0x2070) && (c <= 0x218F)) || 3235 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3236 ((c >= 0x3001) && (c <= 0xD7FF)) || 3237 ((c >= 0xF900) && (c <= 0xFDCF)) || 3238 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3239 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3240 return(1); 3241 } else { 3242 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3243 return(1); 3244 } 3245 return(0); 3246} 3247 3248static int 3249xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3250 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3251 /* 3252 * Use the new checks of production [4] [4a] amd [5] of the 3253 * Update 5 of XML-1.0 3254 */ 3255 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3256 (((c >= 'a') && (c <= 'z')) || 3257 ((c >= 'A') && (c <= 'Z')) || 3258 ((c >= '0') && (c <= '9')) || /* !start */ 3259 (c == '_') || (c == ':') || 3260 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3261 ((c >= 0xC0) && (c <= 0xD6)) || 3262 ((c >= 0xD8) && (c <= 0xF6)) || 3263 ((c >= 0xF8) && (c <= 0x2FF)) || 3264 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3265 ((c >= 0x370) && (c <= 0x37D)) || 3266 ((c >= 0x37F) && (c <= 0x1FFF)) || 3267 ((c >= 0x200C) && (c <= 0x200D)) || 3268 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3269 ((c >= 0x2070) && (c <= 0x218F)) || 3270 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3271 ((c >= 0x3001) && (c <= 0xD7FF)) || 3272 ((c >= 0xF900) && (c <= 0xFDCF)) || 3273 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3274 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3275 return(1); 3276 } else { 3277 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3278 (c == '.') || (c == '-') || 3279 (c == '_') || (c == ':') || 3280 (IS_COMBINING(c)) || 3281 (IS_EXTENDER(c))) 3282 return(1); 3283 } 3284 return(0); 3285} 3286 3287static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3288 int *len, int *alloc, int normalize); 3289 3290static const xmlChar * 3291xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3292 int len = 0, l; 3293 int c; 3294 int count = 0; 3295 3296#ifdef DEBUG 3297 nbParseNameComplex++; 3298#endif 3299 3300 /* 3301 * Handler for more complex cases 3302 */ 3303 GROW; 3304 if (ctxt->instate == XML_PARSER_EOF) 3305 return(NULL); 3306 c = CUR_CHAR(l); 3307 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3308 /* 3309 * Use the new checks of production [4] [4a] amd [5] of the 3310 * Update 5 of XML-1.0 3311 */ 3312 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3313 (!(((c >= 'a') && (c <= 'z')) || 3314 ((c >= 'A') && (c <= 'Z')) || 3315 (c == '_') || (c == ':') || 3316 ((c >= 0xC0) && (c <= 0xD6)) || 3317 ((c >= 0xD8) && (c <= 0xF6)) || 3318 ((c >= 0xF8) && (c <= 0x2FF)) || 3319 ((c >= 0x370) && (c <= 0x37D)) || 3320 ((c >= 0x37F) && (c <= 0x1FFF)) || 3321 ((c >= 0x200C) && (c <= 0x200D)) || 3322 ((c >= 0x2070) && (c <= 0x218F)) || 3323 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3324 ((c >= 0x3001) && (c <= 0xD7FF)) || 3325 ((c >= 0xF900) && (c <= 0xFDCF)) || 3326 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3327 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3328 return(NULL); 3329 } 3330 len += l; 3331 NEXTL(l); 3332 c = CUR_CHAR(l); 3333 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3334 (((c >= 'a') && (c <= 'z')) || 3335 ((c >= 'A') && (c <= 'Z')) || 3336 ((c >= '0') && (c <= '9')) || /* !start */ 3337 (c == '_') || (c == ':') || 3338 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3339 ((c >= 0xC0) && (c <= 0xD6)) || 3340 ((c >= 0xD8) && (c <= 0xF6)) || 3341 ((c >= 0xF8) && (c <= 0x2FF)) || 3342 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3343 ((c >= 0x370) && (c <= 0x37D)) || 3344 ((c >= 0x37F) && (c <= 0x1FFF)) || 3345 ((c >= 0x200C) && (c <= 0x200D)) || 3346 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3347 ((c >= 0x2070) && (c <= 0x218F)) || 3348 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3349 ((c >= 0x3001) && (c <= 0xD7FF)) || 3350 ((c >= 0xF900) && (c <= 0xFDCF)) || 3351 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3352 ((c >= 0x10000) && (c <= 0xEFFFF)) 3353 )) { 3354 if (count++ > XML_PARSER_CHUNK_SIZE) { 3355 count = 0; 3356 GROW; 3357 if (ctxt->instate == XML_PARSER_EOF) 3358 return(NULL); 3359 } 3360 len += l; 3361 NEXTL(l); 3362 c = CUR_CHAR(l); 3363 } 3364 } else { 3365 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3366 (!IS_LETTER(c) && (c != '_') && 3367 (c != ':'))) { 3368 return(NULL); 3369 } 3370 len += l; 3371 NEXTL(l); 3372 c = CUR_CHAR(l); 3373 3374 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3375 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3376 (c == '.') || (c == '-') || 3377 (c == '_') || (c == ':') || 3378 (IS_COMBINING(c)) || 3379 (IS_EXTENDER(c)))) { 3380 if (count++ > XML_PARSER_CHUNK_SIZE) { 3381 count = 0; 3382 GROW; 3383 if (ctxt->instate == XML_PARSER_EOF) 3384 return(NULL); 3385 } 3386 len += l; 3387 NEXTL(l); 3388 c = CUR_CHAR(l); 3389 if (c == 0) { 3390 count = 0; 3391 GROW; 3392 if (ctxt->instate == XML_PARSER_EOF) 3393 return(NULL); 3394 c = CUR_CHAR(l); 3395 } 3396 } 3397 } 3398 if ((len > XML_MAX_NAME_LENGTH) && 3399 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3400 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3401 return(NULL); 3402 } 3403 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3404 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3405 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3406} 3407 3408/** 3409 * xmlParseName: 3410 * @ctxt: an XML parser context 3411 * 3412 * parse an XML name. 3413 * 3414 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3415 * CombiningChar | Extender 3416 * 3417 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3418 * 3419 * [6] Names ::= Name (#x20 Name)* 3420 * 3421 * Returns the Name parsed or NULL 3422 */ 3423 3424const xmlChar * 3425xmlParseName(xmlParserCtxtPtr ctxt) { 3426 const xmlChar *in; 3427 const xmlChar *ret; 3428 int count = 0; 3429 3430 GROW; 3431 3432#ifdef DEBUG 3433 nbParseName++; 3434#endif 3435 3436 /* 3437 * Accelerator for simple ASCII names 3438 */ 3439 in = ctxt->input->cur; 3440 if (((*in >= 0x61) && (*in <= 0x7A)) || 3441 ((*in >= 0x41) && (*in <= 0x5A)) || 3442 (*in == '_') || (*in == ':')) { 3443 in++; 3444 while (((*in >= 0x61) && (*in <= 0x7A)) || 3445 ((*in >= 0x41) && (*in <= 0x5A)) || 3446 ((*in >= 0x30) && (*in <= 0x39)) || 3447 (*in == '_') || (*in == '-') || 3448 (*in == ':') || (*in == '.')) 3449 in++; 3450 if ((*in > 0) && (*in < 0x80)) { 3451 count = in - ctxt->input->cur; 3452 if ((count > XML_MAX_NAME_LENGTH) && 3453 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3454 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3455 return(NULL); 3456 } 3457 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3458 ctxt->input->cur = in; 3459 ctxt->nbChars += count; 3460 ctxt->input->col += count; 3461 if (ret == NULL) 3462 xmlErrMemory(ctxt, NULL); 3463 return(ret); 3464 } 3465 } 3466 /* accelerator for special cases */ 3467 return(xmlParseNameComplex(ctxt)); 3468} 3469 3470static const xmlChar * 3471xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3472 int len = 0, l; 3473 int c; 3474 int count = 0; 3475 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */ 3476 3477#ifdef DEBUG 3478 nbParseNCNameComplex++; 3479#endif 3480 3481 /* 3482 * Handler for more complex cases 3483 */ 3484 GROW; 3485 end = ctxt->input->cur; 3486 c = CUR_CHAR(l); 3487 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3488 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3489 return(NULL); 3490 } 3491 3492 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3493 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3494 if (count++ > XML_PARSER_CHUNK_SIZE) { 3495 if ((len > XML_MAX_NAME_LENGTH) && 3496 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3498 return(NULL); 3499 } 3500 count = 0; 3501 GROW; 3502 if (ctxt->instate == XML_PARSER_EOF) 3503 return(NULL); 3504 } 3505 len += l; 3506 NEXTL(l); 3507 end = ctxt->input->cur; 3508 c = CUR_CHAR(l); 3509 if (c == 0) { 3510 count = 0; 3511 /* 3512 * when shrinking to extend the buffer we really need to preserve 3513 * the part of the name we already parsed. Hence rolling back 3514 * by current lenght. 3515 */ 3516 ctxt->input->cur -= l; 3517 GROW; 3518 ctxt->input->cur += l; 3519 if (ctxt->instate == XML_PARSER_EOF) 3520 return(NULL); 3521 end = ctxt->input->cur; 3522 c = CUR_CHAR(l); 3523 } 3524 } 3525 if ((len > XML_MAX_NAME_LENGTH) && 3526 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3527 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3528 return(NULL); 3529 } 3530 return(xmlDictLookup(ctxt->dict, end - len, len)); 3531} 3532 3533/** 3534 * xmlParseNCName: 3535 * @ctxt: an XML parser context 3536 * @len: length of the string parsed 3537 * 3538 * parse an XML name. 3539 * 3540 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3541 * CombiningChar | Extender 3542 * 3543 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3544 * 3545 * Returns the Name parsed or NULL 3546 */ 3547 3548static const xmlChar * 3549xmlParseNCName(xmlParserCtxtPtr ctxt) { 3550 const xmlChar *in, *e; 3551 const xmlChar *ret; 3552 int count = 0; 3553 3554#ifdef DEBUG 3555 nbParseNCName++; 3556#endif 3557 3558 /* 3559 * Accelerator for simple ASCII names 3560 */ 3561 in = ctxt->input->cur; 3562 e = ctxt->input->end; 3563 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3564 ((*in >= 0x41) && (*in <= 0x5A)) || 3565 (*in == '_')) && (in < e)) { 3566 in++; 3567 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3568 ((*in >= 0x41) && (*in <= 0x5A)) || 3569 ((*in >= 0x30) && (*in <= 0x39)) || 3570 (*in == '_') || (*in == '-') || 3571 (*in == '.')) && (in < e)) 3572 in++; 3573 if (in >= e) 3574 goto complex; 3575 if ((*in > 0) && (*in < 0x80)) { 3576 count = in - ctxt->input->cur; 3577 if ((count > XML_MAX_NAME_LENGTH) && 3578 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3579 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3580 return(NULL); 3581 } 3582 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3583 ctxt->input->cur = in; 3584 ctxt->nbChars += count; 3585 ctxt->input->col += count; 3586 if (ret == NULL) { 3587 xmlErrMemory(ctxt, NULL); 3588 } 3589 return(ret); 3590 } 3591 } 3592complex: 3593 return(xmlParseNCNameComplex(ctxt)); 3594} 3595 3596/** 3597 * xmlParseNameAndCompare: 3598 * @ctxt: an XML parser context 3599 * 3600 * parse an XML name and compares for match 3601 * (specialized for endtag parsing) 3602 * 3603 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3604 * and the name for mismatch 3605 */ 3606 3607static const xmlChar * 3608xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3609 register const xmlChar *cmp = other; 3610 register const xmlChar *in; 3611 const xmlChar *ret; 3612 3613 GROW; 3614 if (ctxt->instate == XML_PARSER_EOF) 3615 return(NULL); 3616 3617 in = ctxt->input->cur; 3618 while (*in != 0 && *in == *cmp) { 3619 ++in; 3620 ++cmp; 3621 ctxt->input->col++; 3622 } 3623 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3624 /* success */ 3625 ctxt->input->cur = in; 3626 return (const xmlChar*) 1; 3627 } 3628 /* failure (or end of input buffer), check with full function */ 3629 ret = xmlParseName (ctxt); 3630 /* strings coming from the dictionary direct compare possible */ 3631 if (ret == other) { 3632 return (const xmlChar*) 1; 3633 } 3634 return ret; 3635} 3636 3637/** 3638 * xmlParseStringName: 3639 * @ctxt: an XML parser context 3640 * @str: a pointer to the string pointer (IN/OUT) 3641 * 3642 * parse an XML name. 3643 * 3644 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3645 * CombiningChar | Extender 3646 * 3647 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3648 * 3649 * [6] Names ::= Name (#x20 Name)* 3650 * 3651 * Returns the Name parsed or NULL. The @str pointer 3652 * is updated to the current location in the string. 3653 */ 3654 3655static xmlChar * 3656xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3657 xmlChar buf[XML_MAX_NAMELEN + 5]; 3658 const xmlChar *cur = *str; 3659 int len = 0, l; 3660 int c; 3661 3662#ifdef DEBUG 3663 nbParseStringName++; 3664#endif 3665 3666 c = CUR_SCHAR(cur, l); 3667 if (!xmlIsNameStartChar(ctxt, c)) { 3668 return(NULL); 3669 } 3670 3671 COPY_BUF(l,buf,len,c); 3672 cur += l; 3673 c = CUR_SCHAR(cur, l); 3674 while (xmlIsNameChar(ctxt, c)) { 3675 COPY_BUF(l,buf,len,c); 3676 cur += l; 3677 c = CUR_SCHAR(cur, l); 3678 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3679 /* 3680 * Okay someone managed to make a huge name, so he's ready to pay 3681 * for the processing speed. 3682 */ 3683 xmlChar *buffer; 3684 int max = len * 2; 3685 3686 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3687 if (buffer == NULL) { 3688 xmlErrMemory(ctxt, NULL); 3689 return(NULL); 3690 } 3691 memcpy(buffer, buf, len); 3692 while (xmlIsNameChar(ctxt, c)) { 3693 if (len + 10 > max) { 3694 xmlChar *tmp; 3695 3696 if ((len > XML_MAX_NAME_LENGTH) && 3697 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3698 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3699 xmlFree(buffer); 3700 return(NULL); 3701 } 3702 max *= 2; 3703 tmp = (xmlChar *) xmlRealloc(buffer, 3704 max * sizeof(xmlChar)); 3705 if (tmp == NULL) { 3706 xmlErrMemory(ctxt, NULL); 3707 xmlFree(buffer); 3708 return(NULL); 3709 } 3710 buffer = tmp; 3711 } 3712 COPY_BUF(l,buffer,len,c); 3713 cur += l; 3714 c = CUR_SCHAR(cur, l); 3715 } 3716 buffer[len] = 0; 3717 *str = cur; 3718 return(buffer); 3719 } 3720 } 3721 if ((len > XML_MAX_NAME_LENGTH) && 3722 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3723 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3724 return(NULL); 3725 } 3726 *str = cur; 3727 return(xmlStrndup(buf, len)); 3728} 3729 3730/** 3731 * xmlParseNmtoken: 3732 * @ctxt: an XML parser context 3733 * 3734 * parse an XML Nmtoken. 3735 * 3736 * [7] Nmtoken ::= (NameChar)+ 3737 * 3738 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3739 * 3740 * Returns the Nmtoken parsed or NULL 3741 */ 3742 3743xmlChar * 3744xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3745 xmlChar buf[XML_MAX_NAMELEN + 5]; 3746 int len = 0, l; 3747 int c; 3748 int count = 0; 3749 3750#ifdef DEBUG 3751 nbParseNmToken++; 3752#endif 3753 3754 GROW; 3755 if (ctxt->instate == XML_PARSER_EOF) 3756 return(NULL); 3757 c = CUR_CHAR(l); 3758 3759 while (xmlIsNameChar(ctxt, c)) { 3760 if (count++ > XML_PARSER_CHUNK_SIZE) { 3761 count = 0; 3762 GROW; 3763 } 3764 COPY_BUF(l,buf,len,c); 3765 NEXTL(l); 3766 c = CUR_CHAR(l); 3767 if (c == 0) { 3768 count = 0; 3769 GROW; 3770 if (ctxt->instate == XML_PARSER_EOF) 3771 return(NULL); 3772 c = CUR_CHAR(l); 3773 } 3774 if (len >= XML_MAX_NAMELEN) { 3775 /* 3776 * Okay someone managed to make a huge token, so he's ready to pay 3777 * for the processing speed. 3778 */ 3779 xmlChar *buffer; 3780 int max = len * 2; 3781 3782 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3783 if (buffer == NULL) { 3784 xmlErrMemory(ctxt, NULL); 3785 return(NULL); 3786 } 3787 memcpy(buffer, buf, len); 3788 while (xmlIsNameChar(ctxt, c)) { 3789 if (count++ > XML_PARSER_CHUNK_SIZE) { 3790 count = 0; 3791 GROW; 3792 if (ctxt->instate == XML_PARSER_EOF) { 3793 xmlFree(buffer); 3794 return(NULL); 3795 } 3796 } 3797 if (len + 10 > max) { 3798 xmlChar *tmp; 3799 3800 if ((max > XML_MAX_NAME_LENGTH) && 3801 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3802 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3803 xmlFree(buffer); 3804 return(NULL); 3805 } 3806 max *= 2; 3807 tmp = (xmlChar *) xmlRealloc(buffer, 3808 max * sizeof(xmlChar)); 3809 if (tmp == NULL) { 3810 xmlErrMemory(ctxt, NULL); 3811 xmlFree(buffer); 3812 return(NULL); 3813 } 3814 buffer = tmp; 3815 } 3816 COPY_BUF(l,buffer,len,c); 3817 NEXTL(l); 3818 c = CUR_CHAR(l); 3819 } 3820 buffer[len] = 0; 3821 return(buffer); 3822 } 3823 } 3824 if (len == 0) 3825 return(NULL); 3826 if ((len > XML_MAX_NAME_LENGTH) && 3827 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3828 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3829 return(NULL); 3830 } 3831 return(xmlStrndup(buf, len)); 3832} 3833 3834/** 3835 * xmlParseEntityValue: 3836 * @ctxt: an XML parser context 3837 * @orig: if non-NULL store a copy of the original entity value 3838 * 3839 * parse a value for ENTITY declarations 3840 * 3841 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3842 * "'" ([^%&'] | PEReference | Reference)* "'" 3843 * 3844 * Returns the EntityValue parsed with reference substituted or NULL 3845 */ 3846 3847xmlChar * 3848xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3849 xmlChar *buf = NULL; 3850 int len = 0; 3851 int size = XML_PARSER_BUFFER_SIZE; 3852 int c, l; 3853 xmlChar stop; 3854 xmlChar *ret = NULL; 3855 const xmlChar *cur = NULL; 3856 xmlParserInputPtr input; 3857 3858 if (RAW == '"') stop = '"'; 3859 else if (RAW == '\'') stop = '\''; 3860 else { 3861 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3862 return(NULL); 3863 } 3864 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3865 if (buf == NULL) { 3866 xmlErrMemory(ctxt, NULL); 3867 return(NULL); 3868 } 3869 3870 /* 3871 * The content of the entity definition is copied in a buffer. 3872 */ 3873 3874 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3875 input = ctxt->input; 3876 GROW; 3877 if (ctxt->instate == XML_PARSER_EOF) { 3878 xmlFree(buf); 3879 return(NULL); 3880 } 3881 NEXT; 3882 c = CUR_CHAR(l); 3883 /* 3884 * NOTE: 4.4.5 Included in Literal 3885 * When a parameter entity reference appears in a literal entity 3886 * value, ... a single or double quote character in the replacement 3887 * text is always treated as a normal data character and will not 3888 * terminate the literal. 3889 * In practice it means we stop the loop only when back at parsing 3890 * the initial entity and the quote is found 3891 */ 3892 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3893 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3894 if (len + 5 >= size) { 3895 xmlChar *tmp; 3896 3897 size *= 2; 3898 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3899 if (tmp == NULL) { 3900 xmlErrMemory(ctxt, NULL); 3901 xmlFree(buf); 3902 return(NULL); 3903 } 3904 buf = tmp; 3905 } 3906 COPY_BUF(l,buf,len,c); 3907 NEXTL(l); 3908 /* 3909 * Pop-up of finished entities. 3910 */ 3911 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3912 xmlPopInput(ctxt); 3913 3914 GROW; 3915 c = CUR_CHAR(l); 3916 if (c == 0) { 3917 GROW; 3918 c = CUR_CHAR(l); 3919 } 3920 } 3921 buf[len] = 0; 3922 if (ctxt->instate == XML_PARSER_EOF) { 3923 xmlFree(buf); 3924 return(NULL); 3925 } 3926 3927 /* 3928 * Raise problem w.r.t. '&' and '%' being used in non-entities 3929 * reference constructs. Note Charref will be handled in 3930 * xmlStringDecodeEntities() 3931 */ 3932 cur = buf; 3933 while (*cur != 0) { /* non input consuming */ 3934 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3935 xmlChar *name; 3936 xmlChar tmp = *cur; 3937 3938 cur++; 3939 name = xmlParseStringName(ctxt, &cur); 3940 if ((name == NULL) || (*cur != ';')) { 3941 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3942 "EntityValue: '%c' forbidden except for entities references\n", 3943 tmp); 3944 } 3945 if ((tmp == '%') && (ctxt->inSubset == 1) && 3946 (ctxt->inputNr == 1)) { 3947 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3948 } 3949 if (name != NULL) 3950 xmlFree(name); 3951 if (*cur == 0) 3952 break; 3953 } 3954 cur++; 3955 } 3956 3957 /* 3958 * Then PEReference entities are substituted. 3959 */ 3960 if (c != stop) { 3961 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3962 xmlFree(buf); 3963 } else { 3964 NEXT; 3965 /* 3966 * NOTE: 4.4.7 Bypassed 3967 * When a general entity reference appears in the EntityValue in 3968 * an entity declaration, it is bypassed and left as is. 3969 * so XML_SUBSTITUTE_REF is not set here. 3970 */ 3971 ++ctxt->depth; 3972 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3973 0, 0, 0); 3974 --ctxt->depth; 3975 if (orig != NULL) 3976 *orig = buf; 3977 else 3978 xmlFree(buf); 3979 } 3980 3981 return(ret); 3982} 3983 3984/** 3985 * xmlParseAttValueComplex: 3986 * @ctxt: an XML parser context 3987 * @len: the resulting attribute len 3988 * @normalize: wether to apply the inner normalization 3989 * 3990 * parse a value for an attribute, this is the fallback function 3991 * of xmlParseAttValue() when the attribute parsing requires handling 3992 * of non-ASCII characters, or normalization compaction. 3993 * 3994 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3995 */ 3996static xmlChar * 3997xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3998 xmlChar limit = 0; 3999 xmlChar *buf = NULL; 4000 xmlChar *rep = NULL; 4001 size_t len = 0; 4002 size_t buf_size = 0; 4003 int c, l, in_space = 0; 4004 xmlChar *current = NULL; 4005 xmlEntityPtr ent; 4006 4007 if (NXT(0) == '"') { 4008 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4009 limit = '"'; 4010 NEXT; 4011 } else if (NXT(0) == '\'') { 4012 limit = '\''; 4013 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4014 NEXT; 4015 } else { 4016 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 4017 return(NULL); 4018 } 4019 4020 /* 4021 * allocate a translation buffer. 4022 */ 4023 buf_size = XML_PARSER_BUFFER_SIZE; 4024 buf = (xmlChar *) xmlMallocAtomic(buf_size); 4025 if (buf == NULL) goto mem_error; 4026 4027 /* 4028 * OK loop until we reach one of the ending char or a size limit. 4029 */ 4030 c = CUR_CHAR(l); 4031 while (((NXT(0) != limit) && /* checked */ 4032 (IS_CHAR(c)) && (c != '<')) && 4033 (ctxt->instate != XML_PARSER_EOF)) { 4034 /* 4035 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 4036 * special option is given 4037 */ 4038 if ((len > XML_MAX_TEXT_LENGTH) && 4039 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4040 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4041 "AttValue length too long\n"); 4042 goto mem_error; 4043 } 4044 if (c == 0) break; 4045 if (c == '&') { 4046 in_space = 0; 4047 if (NXT(1) == '#') { 4048 int val = xmlParseCharRef(ctxt); 4049 4050 if (val == '&') { 4051 if (ctxt->replaceEntities) { 4052 if (len + 10 > buf_size) { 4053 growBuffer(buf, 10); 4054 } 4055 buf[len++] = '&'; 4056 } else { 4057 /* 4058 * The reparsing will be done in xmlStringGetNodeList() 4059 * called by the attribute() function in SAX.c 4060 */ 4061 if (len + 10 > buf_size) { 4062 growBuffer(buf, 10); 4063 } 4064 buf[len++] = '&'; 4065 buf[len++] = '#'; 4066 buf[len++] = '3'; 4067 buf[len++] = '8'; 4068 buf[len++] = ';'; 4069 } 4070 } else if (val != 0) { 4071 if (len + 10 > buf_size) { 4072 growBuffer(buf, 10); 4073 } 4074 len += xmlCopyChar(0, &buf[len], val); 4075 } 4076 } else { 4077 ent = xmlParseEntityRef(ctxt); 4078 ctxt->nbentities++; 4079 if (ent != NULL) 4080 ctxt->nbentities += ent->owner; 4081 if ((ent != NULL) && 4082 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4083 if (len + 10 > buf_size) { 4084 growBuffer(buf, 10); 4085 } 4086 if ((ctxt->replaceEntities == 0) && 4087 (ent->content[0] == '&')) { 4088 buf[len++] = '&'; 4089 buf[len++] = '#'; 4090 buf[len++] = '3'; 4091 buf[len++] = '8'; 4092 buf[len++] = ';'; 4093 } else { 4094 buf[len++] = ent->content[0]; 4095 } 4096 } else if ((ent != NULL) && 4097 (ctxt->replaceEntities != 0)) { 4098 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4099 ++ctxt->depth; 4100 rep = xmlStringDecodeEntities(ctxt, ent->content, 4101 XML_SUBSTITUTE_REF, 4102 0, 0, 0); 4103 --ctxt->depth; 4104 if (rep != NULL) { 4105 current = rep; 4106 while (*current != 0) { /* non input consuming */ 4107 if ((*current == 0xD) || (*current == 0xA) || 4108 (*current == 0x9)) { 4109 buf[len++] = 0x20; 4110 current++; 4111 } else 4112 buf[len++] = *current++; 4113 if (len + 10 > buf_size) { 4114 growBuffer(buf, 10); 4115 } 4116 } 4117 xmlFree(rep); 4118 rep = NULL; 4119 } 4120 } else { 4121 if (len + 10 > buf_size) { 4122 growBuffer(buf, 10); 4123 } 4124 if (ent->content != NULL) 4125 buf[len++] = ent->content[0]; 4126 } 4127 } else if (ent != NULL) { 4128 int i = xmlStrlen(ent->name); 4129 const xmlChar *cur = ent->name; 4130 4131 /* 4132 * This may look absurd but is needed to detect 4133 * entities problems 4134 */ 4135 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4136 (ent->content != NULL) && (ent->checked == 0)) { 4137 unsigned long oldnbent = ctxt->nbentities; 4138 4139 ++ctxt->depth; 4140 rep = xmlStringDecodeEntities(ctxt, ent->content, 4141 XML_SUBSTITUTE_REF, 0, 0, 0); 4142 --ctxt->depth; 4143 4144 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4145 if (rep != NULL) { 4146 if (xmlStrchr(rep, '<')) 4147 ent->checked |= 1; 4148 xmlFree(rep); 4149 rep = NULL; 4150 } 4151 } 4152 4153 /* 4154 * Just output the reference 4155 */ 4156 buf[len++] = '&'; 4157 while (len + i + 10 > buf_size) { 4158 growBuffer(buf, i + 10); 4159 } 4160 for (;i > 0;i--) 4161 buf[len++] = *cur++; 4162 buf[len++] = ';'; 4163 } 4164 } 4165 } else { 4166 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4167 if ((len != 0) || (!normalize)) { 4168 if ((!normalize) || (!in_space)) { 4169 COPY_BUF(l,buf,len,0x20); 4170 while (len + 10 > buf_size) { 4171 growBuffer(buf, 10); 4172 } 4173 } 4174 in_space = 1; 4175 } 4176 } else { 4177 in_space = 0; 4178 COPY_BUF(l,buf,len,c); 4179 if (len + 10 > buf_size) { 4180 growBuffer(buf, 10); 4181 } 4182 } 4183 NEXTL(l); 4184 } 4185 GROW; 4186 c = CUR_CHAR(l); 4187 } 4188 if (ctxt->instate == XML_PARSER_EOF) 4189 goto error; 4190 4191 if ((in_space) && (normalize)) { 4192 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4193 } 4194 buf[len] = 0; 4195 if (RAW == '<') { 4196 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4197 } else if (RAW != limit) { 4198 if ((c != 0) && (!IS_CHAR(c))) { 4199 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4200 "invalid character in attribute value\n"); 4201 } else { 4202 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4203 "AttValue: ' expected\n"); 4204 } 4205 } else 4206 NEXT; 4207 4208 /* 4209 * There we potentially risk an overflow, don't allow attribute value of 4210 * length more than INT_MAX it is a very reasonnable assumption ! 4211 */ 4212 if (len >= INT_MAX) { 4213 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4214 "AttValue length too long\n"); 4215 goto mem_error; 4216 } 4217 4218 if (attlen != NULL) *attlen = (int) len; 4219 return(buf); 4220 4221mem_error: 4222 xmlErrMemory(ctxt, NULL); 4223error: 4224 if (buf != NULL) 4225 xmlFree(buf); 4226 if (rep != NULL) 4227 xmlFree(rep); 4228 return(NULL); 4229} 4230 4231/** 4232 * xmlParseAttValue: 4233 * @ctxt: an XML parser context 4234 * 4235 * parse a value for an attribute 4236 * Note: the parser won't do substitution of entities here, this 4237 * will be handled later in xmlStringGetNodeList 4238 * 4239 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4240 * "'" ([^<&'] | Reference)* "'" 4241 * 4242 * 3.3.3 Attribute-Value Normalization: 4243 * Before the value of an attribute is passed to the application or 4244 * checked for validity, the XML processor must normalize it as follows: 4245 * - a character reference is processed by appending the referenced 4246 * character to the attribute value 4247 * - an entity reference is processed by recursively processing the 4248 * replacement text of the entity 4249 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4250 * appending #x20 to the normalized value, except that only a single 4251 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4252 * parsed entity or the literal entity value of an internal parsed entity 4253 * - other characters are processed by appending them to the normalized value 4254 * If the declared value is not CDATA, then the XML processor must further 4255 * process the normalized attribute value by discarding any leading and 4256 * trailing space (#x20) characters, and by replacing sequences of space 4257 * (#x20) characters by a single space (#x20) character. 4258 * All attributes for which no declaration has been read should be treated 4259 * by a non-validating parser as if declared CDATA. 4260 * 4261 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4262 */ 4263 4264 4265xmlChar * 4266xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4267 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4268 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4269} 4270 4271/** 4272 * xmlParseSystemLiteral: 4273 * @ctxt: an XML parser context 4274 * 4275 * parse an XML Literal 4276 * 4277 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4278 * 4279 * Returns the SystemLiteral parsed or NULL 4280 */ 4281 4282xmlChar * 4283xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4284 xmlChar *buf = NULL; 4285 int len = 0; 4286 int size = XML_PARSER_BUFFER_SIZE; 4287 int cur, l; 4288 xmlChar stop; 4289 int state = ctxt->instate; 4290 int count = 0; 4291 4292 SHRINK; 4293 if (RAW == '"') { 4294 NEXT; 4295 stop = '"'; 4296 } else if (RAW == '\'') { 4297 NEXT; 4298 stop = '\''; 4299 } else { 4300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4301 return(NULL); 4302 } 4303 4304 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4305 if (buf == NULL) { 4306 xmlErrMemory(ctxt, NULL); 4307 return(NULL); 4308 } 4309 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4310 cur = CUR_CHAR(l); 4311 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4312 if (len + 5 >= size) { 4313 xmlChar *tmp; 4314 4315 if ((size > XML_MAX_NAME_LENGTH) && 4316 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4317 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4318 xmlFree(buf); 4319 ctxt->instate = (xmlParserInputState) state; 4320 return(NULL); 4321 } 4322 size *= 2; 4323 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4324 if (tmp == NULL) { 4325 xmlFree(buf); 4326 xmlErrMemory(ctxt, NULL); 4327 ctxt->instate = (xmlParserInputState) state; 4328 return(NULL); 4329 } 4330 buf = tmp; 4331 } 4332 count++; 4333 if (count > 50) { 4334 GROW; 4335 count = 0; 4336 if (ctxt->instate == XML_PARSER_EOF) { 4337 xmlFree(buf); 4338 return(NULL); 4339 } 4340 } 4341 COPY_BUF(l,buf,len,cur); 4342 NEXTL(l); 4343 cur = CUR_CHAR(l); 4344 if (cur == 0) { 4345 GROW; 4346 SHRINK; 4347 cur = CUR_CHAR(l); 4348 } 4349 } 4350 buf[len] = 0; 4351 ctxt->instate = (xmlParserInputState) state; 4352 if (!IS_CHAR(cur)) { 4353 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4354 } else { 4355 NEXT; 4356 } 4357 return(buf); 4358} 4359 4360/** 4361 * xmlParsePubidLiteral: 4362 * @ctxt: an XML parser context 4363 * 4364 * parse an XML public literal 4365 * 4366 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4367 * 4368 * Returns the PubidLiteral parsed or NULL. 4369 */ 4370 4371xmlChar * 4372xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4373 xmlChar *buf = NULL; 4374 int len = 0; 4375 int size = XML_PARSER_BUFFER_SIZE; 4376 xmlChar cur; 4377 xmlChar stop; 4378 int count = 0; 4379 xmlParserInputState oldstate = ctxt->instate; 4380 4381 SHRINK; 4382 if (RAW == '"') { 4383 NEXT; 4384 stop = '"'; 4385 } else if (RAW == '\'') { 4386 NEXT; 4387 stop = '\''; 4388 } else { 4389 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4390 return(NULL); 4391 } 4392 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4393 if (buf == NULL) { 4394 xmlErrMemory(ctxt, NULL); 4395 return(NULL); 4396 } 4397 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4398 cur = CUR; 4399 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4400 if (len + 1 >= size) { 4401 xmlChar *tmp; 4402 4403 if ((size > XML_MAX_NAME_LENGTH) && 4404 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4405 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4406 xmlFree(buf); 4407 return(NULL); 4408 } 4409 size *= 2; 4410 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4411 if (tmp == NULL) { 4412 xmlErrMemory(ctxt, NULL); 4413 xmlFree(buf); 4414 return(NULL); 4415 } 4416 buf = tmp; 4417 } 4418 buf[len++] = cur; 4419 count++; 4420 if (count > 50) { 4421 GROW; 4422 count = 0; 4423 if (ctxt->instate == XML_PARSER_EOF) { 4424 xmlFree(buf); 4425 return(NULL); 4426 } 4427 } 4428 NEXT; 4429 cur = CUR; 4430 if (cur == 0) { 4431 GROW; 4432 SHRINK; 4433 cur = CUR; 4434 } 4435 } 4436 buf[len] = 0; 4437 if (cur != stop) { 4438 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4439 } else { 4440 NEXT; 4441 } 4442 ctxt->instate = oldstate; 4443 return(buf); 4444} 4445 4446static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4447 4448/* 4449 * used for the test in the inner loop of the char data testing 4450 */ 4451static const unsigned char test_char_data[256] = { 4452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4453 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4456 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4457 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4458 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4459 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4460 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4461 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4462 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4463 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4464 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4465 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4466 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4467 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4476 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4477 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4478 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4481 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4482 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4483 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4484}; 4485 4486/** 4487 * xmlParseCharData: 4488 * @ctxt: an XML parser context 4489 * @cdata: int indicating whether we are within a CDATA section 4490 * 4491 * parse a CharData section. 4492 * if we are within a CDATA section ']]>' marks an end of section. 4493 * 4494 * The right angle bracket (>) may be represented using the string ">", 4495 * and must, for compatibility, be escaped using ">" or a character 4496 * reference when it appears in the string "]]>" in content, when that 4497 * string is not marking the end of a CDATA section. 4498 * 4499 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4500 */ 4501 4502void 4503xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4504 const xmlChar *in; 4505 int nbchar = 0; 4506 int line = ctxt->input->line; 4507 int col = ctxt->input->col; 4508 int ccol; 4509 4510 SHRINK; 4511 GROW; 4512 /* 4513 * Accelerated common case where input don't need to be 4514 * modified before passing it to the handler. 4515 */ 4516 if (!cdata) { 4517 in = ctxt->input->cur; 4518 do { 4519get_more_space: 4520 while (*in == 0x20) { in++; ctxt->input->col++; } 4521 if (*in == 0xA) { 4522 do { 4523 ctxt->input->line++; ctxt->input->col = 1; 4524 in++; 4525 } while (*in == 0xA); 4526 goto get_more_space; 4527 } 4528 if (*in == '<') { 4529 nbchar = in - ctxt->input->cur; 4530 if (nbchar > 0) { 4531 const xmlChar *tmp = ctxt->input->cur; 4532 ctxt->input->cur = in; 4533 4534 if ((ctxt->sax != NULL) && 4535 (ctxt->sax->ignorableWhitespace != 4536 ctxt->sax->characters)) { 4537 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4538 if (ctxt->sax->ignorableWhitespace != NULL) 4539 ctxt->sax->ignorableWhitespace(ctxt->userData, 4540 tmp, nbchar); 4541 } else { 4542 if (ctxt->sax->characters != NULL) 4543 ctxt->sax->characters(ctxt->userData, 4544 tmp, nbchar); 4545 if (*ctxt->space == -1) 4546 *ctxt->space = -2; 4547 } 4548 } else if ((ctxt->sax != NULL) && 4549 (ctxt->sax->characters != NULL)) { 4550 ctxt->sax->characters(ctxt->userData, 4551 tmp, nbchar); 4552 } 4553 } 4554 return; 4555 } 4556 4557get_more: 4558 ccol = ctxt->input->col; 4559 while (test_char_data[*in]) { 4560 in++; 4561 ccol++; 4562 } 4563 ctxt->input->col = ccol; 4564 if (*in == 0xA) { 4565 do { 4566 ctxt->input->line++; ctxt->input->col = 1; 4567 in++; 4568 } while (*in == 0xA); 4569 goto get_more; 4570 } 4571 if (*in == ']') { 4572 if ((in[1] == ']') && (in[2] == '>')) { 4573 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4574 ctxt->input->cur = in; 4575 return; 4576 } 4577 in++; 4578 ctxt->input->col++; 4579 goto get_more; 4580 } 4581 nbchar = in - ctxt->input->cur; 4582 if (nbchar > 0) { 4583 if ((ctxt->sax != NULL) && 4584 (ctxt->sax->ignorableWhitespace != 4585 ctxt->sax->characters) && 4586 (IS_BLANK_CH(*ctxt->input->cur))) { 4587 const xmlChar *tmp = ctxt->input->cur; 4588 ctxt->input->cur = in; 4589 4590 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4591 if (ctxt->sax->ignorableWhitespace != NULL) 4592 ctxt->sax->ignorableWhitespace(ctxt->userData, 4593 tmp, nbchar); 4594 } else { 4595 if (ctxt->sax->characters != NULL) 4596 ctxt->sax->characters(ctxt->userData, 4597 tmp, nbchar); 4598 if (*ctxt->space == -1) 4599 *ctxt->space = -2; 4600 } 4601 line = ctxt->input->line; 4602 col = ctxt->input->col; 4603 } else if (ctxt->sax != NULL) { 4604 if (ctxt->sax->characters != NULL) 4605 ctxt->sax->characters(ctxt->userData, 4606 ctxt->input->cur, nbchar); 4607 line = ctxt->input->line; 4608 col = ctxt->input->col; 4609 } 4610 /* something really bad happened in the SAX callback */ 4611 if (ctxt->instate != XML_PARSER_CONTENT) 4612 return; 4613 } 4614 ctxt->input->cur = in; 4615 if (*in == 0xD) { 4616 in++; 4617 if (*in == 0xA) { 4618 ctxt->input->cur = in; 4619 in++; 4620 ctxt->input->line++; ctxt->input->col = 1; 4621 continue; /* while */ 4622 } 4623 in--; 4624 } 4625 if (*in == '<') { 4626 return; 4627 } 4628 if (*in == '&') { 4629 return; 4630 } 4631 SHRINK; 4632 GROW; 4633 if (ctxt->instate == XML_PARSER_EOF) 4634 return; 4635 in = ctxt->input->cur; 4636 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4637 nbchar = 0; 4638 } 4639 ctxt->input->line = line; 4640 ctxt->input->col = col; 4641 xmlParseCharDataComplex(ctxt, cdata); 4642} 4643 4644/** 4645 * xmlParseCharDataComplex: 4646 * @ctxt: an XML parser context 4647 * @cdata: int indicating whether we are within a CDATA section 4648 * 4649 * parse a CharData section.this is the fallback function 4650 * of xmlParseCharData() when the parsing requires handling 4651 * of non-ASCII characters. 4652 */ 4653static void 4654xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4655 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4656 int nbchar = 0; 4657 int cur, l; 4658 int count = 0; 4659 4660 SHRINK; 4661 GROW; 4662 cur = CUR_CHAR(l); 4663 while ((cur != '<') && /* checked */ 4664 (cur != '&') && 4665 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4666 if ((cur == ']') && (NXT(1) == ']') && 4667 (NXT(2) == '>')) { 4668 if (cdata) break; 4669 else { 4670 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4671 } 4672 } 4673 COPY_BUF(l,buf,nbchar,cur); 4674 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4675 buf[nbchar] = 0; 4676 4677 /* 4678 * OK the segment is to be consumed as chars. 4679 */ 4680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4681 if (areBlanks(ctxt, buf, nbchar, 0)) { 4682 if (ctxt->sax->ignorableWhitespace != NULL) 4683 ctxt->sax->ignorableWhitespace(ctxt->userData, 4684 buf, nbchar); 4685 } else { 4686 if (ctxt->sax->characters != NULL) 4687 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4688 if ((ctxt->sax->characters != 4689 ctxt->sax->ignorableWhitespace) && 4690 (*ctxt->space == -1)) 4691 *ctxt->space = -2; 4692 } 4693 } 4694 nbchar = 0; 4695 /* something really bad happened in the SAX callback */ 4696 if (ctxt->instate != XML_PARSER_CONTENT) 4697 return; 4698 } 4699 count++; 4700 if (count > 50) { 4701 GROW; 4702 count = 0; 4703 if (ctxt->instate == XML_PARSER_EOF) 4704 return; 4705 } 4706 NEXTL(l); 4707 cur = CUR_CHAR(l); 4708 } 4709 if (nbchar != 0) { 4710 buf[nbchar] = 0; 4711 /* 4712 * OK the segment is to be consumed as chars. 4713 */ 4714 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4715 if (areBlanks(ctxt, buf, nbchar, 0)) { 4716 if (ctxt->sax->ignorableWhitespace != NULL) 4717 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4718 } else { 4719 if (ctxt->sax->characters != NULL) 4720 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4721 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4722 (*ctxt->space == -1)) 4723 *ctxt->space = -2; 4724 } 4725 } 4726 } 4727 if ((cur != 0) && (!IS_CHAR(cur))) { 4728 /* Generate the error and skip the offending character */ 4729 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4730 "PCDATA invalid Char value %d\n", 4731 cur); 4732 NEXTL(l); 4733 } 4734} 4735 4736/** 4737 * xmlParseExternalID: 4738 * @ctxt: an XML parser context 4739 * @publicID: a xmlChar** receiving PubidLiteral 4740 * @strict: indicate whether we should restrict parsing to only 4741 * production [75], see NOTE below 4742 * 4743 * Parse an External ID or a Public ID 4744 * 4745 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4746 * 'PUBLIC' S PubidLiteral S SystemLiteral 4747 * 4748 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4749 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4750 * 4751 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4752 * 4753 * Returns the function returns SystemLiteral and in the second 4754 * case publicID receives PubidLiteral, is strict is off 4755 * it is possible to return NULL and have publicID set. 4756 */ 4757 4758xmlChar * 4759xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4760 xmlChar *URI = NULL; 4761 4762 SHRINK; 4763 4764 *publicID = NULL; 4765 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4766 SKIP(6); 4767 if (!IS_BLANK_CH(CUR)) { 4768 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4769 "Space required after 'SYSTEM'\n"); 4770 } 4771 SKIP_BLANKS; 4772 URI = xmlParseSystemLiteral(ctxt); 4773 if (URI == NULL) { 4774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4775 } 4776 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4777 SKIP(6); 4778 if (!IS_BLANK_CH(CUR)) { 4779 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4780 "Space required after 'PUBLIC'\n"); 4781 } 4782 SKIP_BLANKS; 4783 *publicID = xmlParsePubidLiteral(ctxt); 4784 if (*publicID == NULL) { 4785 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4786 } 4787 if (strict) { 4788 /* 4789 * We don't handle [83] so "S SystemLiteral" is required. 4790 */ 4791 if (!IS_BLANK_CH(CUR)) { 4792 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4793 "Space required after the Public Identifier\n"); 4794 } 4795 } else { 4796 /* 4797 * We handle [83] so we return immediately, if 4798 * "S SystemLiteral" is not detected. From a purely parsing 4799 * point of view that's a nice mess. 4800 */ 4801 const xmlChar *ptr; 4802 GROW; 4803 4804 ptr = CUR_PTR; 4805 if (!IS_BLANK_CH(*ptr)) return(NULL); 4806 4807 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4808 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4809 } 4810 SKIP_BLANKS; 4811 URI = xmlParseSystemLiteral(ctxt); 4812 if (URI == NULL) { 4813 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4814 } 4815 } 4816 return(URI); 4817} 4818 4819/** 4820 * xmlParseCommentComplex: 4821 * @ctxt: an XML parser context 4822 * @buf: the already parsed part of the buffer 4823 * @len: number of bytes filles in the buffer 4824 * @size: allocated size of the buffer 4825 * 4826 * Skip an XML (SGML) comment <!-- .... --> 4827 * The spec says that "For compatibility, the string "--" (double-hyphen) 4828 * must not occur within comments. " 4829 * This is the slow routine in case the accelerator for ascii didn't work 4830 * 4831 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4832 */ 4833static void 4834xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4835 size_t len, size_t size) { 4836 int q, ql; 4837 int r, rl; 4838 int cur, l; 4839 size_t count = 0; 4840 int inputid; 4841 4842 inputid = ctxt->input->id; 4843 4844 if (buf == NULL) { 4845 len = 0; 4846 size = XML_PARSER_BUFFER_SIZE; 4847 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4848 if (buf == NULL) { 4849 xmlErrMemory(ctxt, NULL); 4850 return; 4851 } 4852 } 4853 GROW; /* Assure there's enough input data */ 4854 q = CUR_CHAR(ql); 4855 if (q == 0) 4856 goto not_terminated; 4857 if (!IS_CHAR(q)) { 4858 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4859 "xmlParseComment: invalid xmlChar value %d\n", 4860 q); 4861 xmlFree (buf); 4862 return; 4863 } 4864 NEXTL(ql); 4865 r = CUR_CHAR(rl); 4866 if (r == 0) 4867 goto not_terminated; 4868 if (!IS_CHAR(r)) { 4869 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4870 "xmlParseComment: invalid xmlChar value %d\n", 4871 q); 4872 xmlFree (buf); 4873 return; 4874 } 4875 NEXTL(rl); 4876 cur = CUR_CHAR(l); 4877 if (cur == 0) 4878 goto not_terminated; 4879 while (IS_CHAR(cur) && /* checked */ 4880 ((cur != '>') || 4881 (r != '-') || (q != '-'))) { 4882 if ((r == '-') && (q == '-')) { 4883 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4884 } 4885 if ((len > XML_MAX_TEXT_LENGTH) && 4886 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4887 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4888 "Comment too big found", NULL); 4889 xmlFree (buf); 4890 return; 4891 } 4892 if (len + 5 >= size) { 4893 xmlChar *new_buf; 4894 size_t new_size; 4895 4896 new_size = size * 2; 4897 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4898 if (new_buf == NULL) { 4899 xmlFree (buf); 4900 xmlErrMemory(ctxt, NULL); 4901 return; 4902 } 4903 buf = new_buf; 4904 size = new_size; 4905 } 4906 COPY_BUF(ql,buf,len,q); 4907 q = r; 4908 ql = rl; 4909 r = cur; 4910 rl = l; 4911 4912 count++; 4913 if (count > 50) { 4914 GROW; 4915 count = 0; 4916 if (ctxt->instate == XML_PARSER_EOF) { 4917 xmlFree(buf); 4918 return; 4919 } 4920 } 4921 NEXTL(l); 4922 cur = CUR_CHAR(l); 4923 if (cur == 0) { 4924 SHRINK; 4925 GROW; 4926 cur = CUR_CHAR(l); 4927 } 4928 } 4929 buf[len] = 0; 4930 if (cur == 0) { 4931 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4932 "Comment not terminated \n<!--%.50s\n", buf); 4933 } else if (!IS_CHAR(cur)) { 4934 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4935 "xmlParseComment: invalid xmlChar value %d\n", 4936 cur); 4937 } else { 4938 if (inputid != ctxt->input->id) { 4939 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4940 "Comment doesn't start and stop in the same entity\n"); 4941 } 4942 NEXT; 4943 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4944 (!ctxt->disableSAX)) 4945 ctxt->sax->comment(ctxt->userData, buf); 4946 } 4947 xmlFree(buf); 4948 return; 4949not_terminated: 4950 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4951 "Comment not terminated\n", NULL); 4952 xmlFree(buf); 4953 return; 4954} 4955 4956/** 4957 * xmlParseComment: 4958 * @ctxt: an XML parser context 4959 * 4960 * Skip an XML (SGML) comment <!-- .... --> 4961 * The spec says that "For compatibility, the string "--" (double-hyphen) 4962 * must not occur within comments. " 4963 * 4964 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4965 */ 4966void 4967xmlParseComment(xmlParserCtxtPtr ctxt) { 4968 xmlChar *buf = NULL; 4969 size_t size = XML_PARSER_BUFFER_SIZE; 4970 size_t len = 0; 4971 xmlParserInputState state; 4972 const xmlChar *in; 4973 size_t nbchar = 0; 4974 int ccol; 4975 int inputid; 4976 4977 /* 4978 * Check that there is a comment right here. 4979 */ 4980 if ((RAW != '<') || (NXT(1) != '!') || 4981 (NXT(2) != '-') || (NXT(3) != '-')) return; 4982 state = ctxt->instate; 4983 ctxt->instate = XML_PARSER_COMMENT; 4984 inputid = ctxt->input->id; 4985 SKIP(4); 4986 SHRINK; 4987 GROW; 4988 4989 /* 4990 * Accelerated common case where input don't need to be 4991 * modified before passing it to the handler. 4992 */ 4993 in = ctxt->input->cur; 4994 do { 4995 if (*in == 0xA) { 4996 do { 4997 ctxt->input->line++; ctxt->input->col = 1; 4998 in++; 4999 } while (*in == 0xA); 5000 } 5001get_more: 5002 ccol = ctxt->input->col; 5003 while (((*in > '-') && (*in <= 0x7F)) || 5004 ((*in >= 0x20) && (*in < '-')) || 5005 (*in == 0x09)) { 5006 in++; 5007 ccol++; 5008 } 5009 ctxt->input->col = ccol; 5010 if (*in == 0xA) { 5011 do { 5012 ctxt->input->line++; ctxt->input->col = 1; 5013 in++; 5014 } while (*in == 0xA); 5015 goto get_more; 5016 } 5017 nbchar = in - ctxt->input->cur; 5018 /* 5019 * save current set of data 5020 */ 5021 if (nbchar > 0) { 5022 if ((ctxt->sax != NULL) && 5023 (ctxt->sax->comment != NULL)) { 5024 if (buf == NULL) { 5025 if ((*in == '-') && (in[1] == '-')) 5026 size = nbchar + 1; 5027 else 5028 size = XML_PARSER_BUFFER_SIZE + nbchar; 5029 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5030 if (buf == NULL) { 5031 xmlErrMemory(ctxt, NULL); 5032 ctxt->instate = state; 5033 return; 5034 } 5035 len = 0; 5036 } else if (len + nbchar + 1 >= size) { 5037 xmlChar *new_buf; 5038 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5039 new_buf = (xmlChar *) xmlRealloc(buf, 5040 size * sizeof(xmlChar)); 5041 if (new_buf == NULL) { 5042 xmlFree (buf); 5043 xmlErrMemory(ctxt, NULL); 5044 ctxt->instate = state; 5045 return; 5046 } 5047 buf = new_buf; 5048 } 5049 memcpy(&buf[len], ctxt->input->cur, nbchar); 5050 len += nbchar; 5051 buf[len] = 0; 5052 } 5053 } 5054 if ((len > XML_MAX_TEXT_LENGTH) && 5055 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5056 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5057 "Comment too big found", NULL); 5058 xmlFree (buf); 5059 return; 5060 } 5061 ctxt->input->cur = in; 5062 if (*in == 0xA) { 5063 in++; 5064 ctxt->input->line++; ctxt->input->col = 1; 5065 } 5066 if (*in == 0xD) { 5067 in++; 5068 if (*in == 0xA) { 5069 ctxt->input->cur = in; 5070 in++; 5071 ctxt->input->line++; ctxt->input->col = 1; 5072 continue; /* while */ 5073 } 5074 in--; 5075 } 5076 SHRINK; 5077 GROW; 5078 if (ctxt->instate == XML_PARSER_EOF) { 5079 xmlFree(buf); 5080 return; 5081 } 5082 in = ctxt->input->cur; 5083 if (*in == '-') { 5084 if (in[1] == '-') { 5085 if (in[2] == '>') { 5086 if (ctxt->input->id != inputid) { 5087 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5088 "comment doesn't start and stop in the same entity\n"); 5089 } 5090 SKIP(3); 5091 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5092 (!ctxt->disableSAX)) { 5093 if (buf != NULL) 5094 ctxt->sax->comment(ctxt->userData, buf); 5095 else 5096 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5097 } 5098 if (buf != NULL) 5099 xmlFree(buf); 5100 if (ctxt->instate != XML_PARSER_EOF) 5101 ctxt->instate = state; 5102 return; 5103 } 5104 if (buf != NULL) { 5105 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5106 "Double hyphen within comment: " 5107 "<!--%.50s\n", 5108 buf); 5109 } else 5110 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5111 "Double hyphen within comment\n", NULL); 5112 in++; 5113 ctxt->input->col++; 5114 } 5115 in++; 5116 ctxt->input->col++; 5117 goto get_more; 5118 } 5119 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5120 xmlParseCommentComplex(ctxt, buf, len, size); 5121 ctxt->instate = state; 5122 return; 5123} 5124 5125 5126/** 5127 * xmlParsePITarget: 5128 * @ctxt: an XML parser context 5129 * 5130 * parse the name of a PI 5131 * 5132 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5133 * 5134 * Returns the PITarget name or NULL 5135 */ 5136 5137const xmlChar * 5138xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5139 const xmlChar *name; 5140 5141 name = xmlParseName(ctxt); 5142 if ((name != NULL) && 5143 ((name[0] == 'x') || (name[0] == 'X')) && 5144 ((name[1] == 'm') || (name[1] == 'M')) && 5145 ((name[2] == 'l') || (name[2] == 'L'))) { 5146 int i; 5147 if ((name[0] == 'x') && (name[1] == 'm') && 5148 (name[2] == 'l') && (name[3] == 0)) { 5149 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5150 "XML declaration allowed only at the start of the document\n"); 5151 return(name); 5152 } else if (name[3] == 0) { 5153 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5154 return(name); 5155 } 5156 for (i = 0;;i++) { 5157 if (xmlW3CPIs[i] == NULL) break; 5158 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5159 return(name); 5160 } 5161 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5162 "xmlParsePITarget: invalid name prefix 'xml'\n", 5163 NULL, NULL); 5164 } 5165 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5166 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5167 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5168 } 5169 return(name); 5170} 5171 5172#ifdef LIBXML_CATALOG_ENABLED 5173/** 5174 * xmlParseCatalogPI: 5175 * @ctxt: an XML parser context 5176 * @catalog: the PI value string 5177 * 5178 * parse an XML Catalog Processing Instruction. 5179 * 5180 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5181 * 5182 * Occurs only if allowed by the user and if happening in the Misc 5183 * part of the document before any doctype informations 5184 * This will add the given catalog to the parsing context in order 5185 * to be used if there is a resolution need further down in the document 5186 */ 5187 5188static void 5189xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5190 xmlChar *URL = NULL; 5191 const xmlChar *tmp, *base; 5192 xmlChar marker; 5193 5194 tmp = catalog; 5195 while (IS_BLANK_CH(*tmp)) tmp++; 5196 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5197 goto error; 5198 tmp += 7; 5199 while (IS_BLANK_CH(*tmp)) tmp++; 5200 if (*tmp != '=') { 5201 return; 5202 } 5203 tmp++; 5204 while (IS_BLANK_CH(*tmp)) tmp++; 5205 marker = *tmp; 5206 if ((marker != '\'') && (marker != '"')) 5207 goto error; 5208 tmp++; 5209 base = tmp; 5210 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5211 if (*tmp == 0) 5212 goto error; 5213 URL = xmlStrndup(base, tmp - base); 5214 tmp++; 5215 while (IS_BLANK_CH(*tmp)) tmp++; 5216 if (*tmp != 0) 5217 goto error; 5218 5219 if (URL != NULL) { 5220 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5221 xmlFree(URL); 5222 } 5223 return; 5224 5225error: 5226 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5227 "Catalog PI syntax error: %s\n", 5228 catalog, NULL); 5229 if (URL != NULL) 5230 xmlFree(URL); 5231} 5232#endif 5233 5234/** 5235 * xmlParsePI: 5236 * @ctxt: an XML parser context 5237 * 5238 * parse an XML Processing Instruction. 5239 * 5240 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5241 * 5242 * The processing is transfered to SAX once parsed. 5243 */ 5244 5245void 5246xmlParsePI(xmlParserCtxtPtr ctxt) { 5247 xmlChar *buf = NULL; 5248 size_t len = 0; 5249 size_t size = XML_PARSER_BUFFER_SIZE; 5250 int cur, l; 5251 const xmlChar *target; 5252 xmlParserInputState state; 5253 int count = 0; 5254 5255 if ((RAW == '<') && (NXT(1) == '?')) { 5256 xmlParserInputPtr input = ctxt->input; 5257 state = ctxt->instate; 5258 ctxt->instate = XML_PARSER_PI; 5259 /* 5260 * this is a Processing Instruction. 5261 */ 5262 SKIP(2); 5263 SHRINK; 5264 5265 /* 5266 * Parse the target name and check for special support like 5267 * namespace. 5268 */ 5269 target = xmlParsePITarget(ctxt); 5270 if (target != NULL) { 5271 if ((RAW == '?') && (NXT(1) == '>')) { 5272 if (input != ctxt->input) { 5273 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5274 "PI declaration doesn't start and stop in the same entity\n"); 5275 } 5276 SKIP(2); 5277 5278 /* 5279 * SAX: PI detected. 5280 */ 5281 if ((ctxt->sax) && (!ctxt->disableSAX) && 5282 (ctxt->sax->processingInstruction != NULL)) 5283 ctxt->sax->processingInstruction(ctxt->userData, 5284 target, NULL); 5285 if (ctxt->instate != XML_PARSER_EOF) 5286 ctxt->instate = state; 5287 return; 5288 } 5289 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5290 if (buf == NULL) { 5291 xmlErrMemory(ctxt, NULL); 5292 ctxt->instate = state; 5293 return; 5294 } 5295 cur = CUR; 5296 if (!IS_BLANK(cur)) { 5297 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5298 "ParsePI: PI %s space expected\n", target); 5299 } 5300 SKIP_BLANKS; 5301 cur = CUR_CHAR(l); 5302 while (IS_CHAR(cur) && /* checked */ 5303 ((cur != '?') || (NXT(1) != '>'))) { 5304 if (len + 5 >= size) { 5305 xmlChar *tmp; 5306 size_t new_size = size * 2; 5307 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5308 if (tmp == NULL) { 5309 xmlErrMemory(ctxt, NULL); 5310 xmlFree(buf); 5311 ctxt->instate = state; 5312 return; 5313 } 5314 buf = tmp; 5315 size = new_size; 5316 } 5317 count++; 5318 if (count > 50) { 5319 GROW; 5320 if (ctxt->instate == XML_PARSER_EOF) { 5321 xmlFree(buf); 5322 return; 5323 } 5324 count = 0; 5325 if ((len > XML_MAX_TEXT_LENGTH) && 5326 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5327 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5328 "PI %s too big found", target); 5329 xmlFree(buf); 5330 ctxt->instate = state; 5331 return; 5332 } 5333 } 5334 COPY_BUF(l,buf,len,cur); 5335 NEXTL(l); 5336 cur = CUR_CHAR(l); 5337 if (cur == 0) { 5338 SHRINK; 5339 GROW; 5340 cur = CUR_CHAR(l); 5341 } 5342 } 5343 if ((len > XML_MAX_TEXT_LENGTH) && 5344 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5346 "PI %s too big found", target); 5347 xmlFree(buf); 5348 ctxt->instate = state; 5349 return; 5350 } 5351 buf[len] = 0; 5352 if (cur != '?') { 5353 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5354 "ParsePI: PI %s never end ...\n", target); 5355 } else { 5356 if (input != ctxt->input) { 5357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5358 "PI declaration doesn't start and stop in the same entity\n"); 5359 } 5360 SKIP(2); 5361 5362#ifdef LIBXML_CATALOG_ENABLED 5363 if (((state == XML_PARSER_MISC) || 5364 (state == XML_PARSER_START)) && 5365 (xmlStrEqual(target, XML_CATALOG_PI))) { 5366 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5367 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5368 (allow == XML_CATA_ALLOW_ALL)) 5369 xmlParseCatalogPI(ctxt, buf); 5370 } 5371#endif 5372 5373 5374 /* 5375 * SAX: PI detected. 5376 */ 5377 if ((ctxt->sax) && (!ctxt->disableSAX) && 5378 (ctxt->sax->processingInstruction != NULL)) 5379 ctxt->sax->processingInstruction(ctxt->userData, 5380 target, buf); 5381 } 5382 xmlFree(buf); 5383 } else { 5384 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5385 } 5386 if (ctxt->instate != XML_PARSER_EOF) 5387 ctxt->instate = state; 5388 } 5389} 5390 5391/** 5392 * xmlParseNotationDecl: 5393 * @ctxt: an XML parser context 5394 * 5395 * parse a notation declaration 5396 * 5397 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5398 * 5399 * Hence there is actually 3 choices: 5400 * 'PUBLIC' S PubidLiteral 5401 * 'PUBLIC' S PubidLiteral S SystemLiteral 5402 * and 'SYSTEM' S SystemLiteral 5403 * 5404 * See the NOTE on xmlParseExternalID(). 5405 */ 5406 5407void 5408xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5409 const xmlChar *name; 5410 xmlChar *Pubid; 5411 xmlChar *Systemid; 5412 5413 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5414 xmlParserInputPtr input = ctxt->input; 5415 SHRINK; 5416 SKIP(10); 5417 if (!IS_BLANK_CH(CUR)) { 5418 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5419 "Space required after '<!NOTATION'\n"); 5420 return; 5421 } 5422 SKIP_BLANKS; 5423 5424 name = xmlParseName(ctxt); 5425 if (name == NULL) { 5426 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5427 return; 5428 } 5429 if (!IS_BLANK_CH(CUR)) { 5430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5431 "Space required after the NOTATION name'\n"); 5432 return; 5433 } 5434 if (xmlStrchr(name, ':') != NULL) { 5435 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5436 "colons are forbidden from notation names '%s'\n", 5437 name, NULL, NULL); 5438 } 5439 SKIP_BLANKS; 5440 5441 /* 5442 * Parse the IDs. 5443 */ 5444 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5445 SKIP_BLANKS; 5446 5447 if (RAW == '>') { 5448 if (input != ctxt->input) { 5449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5450 "Notation declaration doesn't start and stop in the same entity\n"); 5451 } 5452 NEXT; 5453 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5454 (ctxt->sax->notationDecl != NULL)) 5455 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5456 } else { 5457 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5458 } 5459 if (Systemid != NULL) xmlFree(Systemid); 5460 if (Pubid != NULL) xmlFree(Pubid); 5461 } 5462} 5463 5464/** 5465 * xmlParseEntityDecl: 5466 * @ctxt: an XML parser context 5467 * 5468 * parse <!ENTITY declarations 5469 * 5470 * [70] EntityDecl ::= GEDecl | PEDecl 5471 * 5472 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5473 * 5474 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5475 * 5476 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5477 * 5478 * [74] PEDef ::= EntityValue | ExternalID 5479 * 5480 * [76] NDataDecl ::= S 'NDATA' S Name 5481 * 5482 * [ VC: Notation Declared ] 5483 * The Name must match the declared name of a notation. 5484 */ 5485 5486void 5487xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5488 const xmlChar *name = NULL; 5489 xmlChar *value = NULL; 5490 xmlChar *URI = NULL, *literal = NULL; 5491 const xmlChar *ndata = NULL; 5492 int isParameter = 0; 5493 xmlChar *orig = NULL; 5494 int skipped; 5495 5496 /* GROW; done in the caller */ 5497 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5498 xmlParserInputPtr input = ctxt->input; 5499 SHRINK; 5500 SKIP(8); 5501 skipped = SKIP_BLANKS; 5502 if (skipped == 0) { 5503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5504 "Space required after '<!ENTITY'\n"); 5505 } 5506 5507 if (RAW == '%') { 5508 NEXT; 5509 skipped = SKIP_BLANKS; 5510 if (skipped == 0) { 5511 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5512 "Space required after '%'\n"); 5513 } 5514 isParameter = 1; 5515 } 5516 5517 name = xmlParseName(ctxt); 5518 if (name == NULL) { 5519 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5520 "xmlParseEntityDecl: no name\n"); 5521 return; 5522 } 5523 if (xmlStrchr(name, ':') != NULL) { 5524 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5525 "colons are forbidden from entities names '%s'\n", 5526 name, NULL, NULL); 5527 } 5528 skipped = SKIP_BLANKS; 5529 if (skipped == 0) { 5530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5531 "Space required after the entity name\n"); 5532 } 5533 5534 ctxt->instate = XML_PARSER_ENTITY_DECL; 5535 /* 5536 * handle the various case of definitions... 5537 */ 5538 if (isParameter) { 5539 if ((RAW == '"') || (RAW == '\'')) { 5540 value = xmlParseEntityValue(ctxt, &orig); 5541 if (value) { 5542 if ((ctxt->sax != NULL) && 5543 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5544 ctxt->sax->entityDecl(ctxt->userData, name, 5545 XML_INTERNAL_PARAMETER_ENTITY, 5546 NULL, NULL, value); 5547 } 5548 } else { 5549 URI = xmlParseExternalID(ctxt, &literal, 1); 5550 if ((URI == NULL) && (literal == NULL)) { 5551 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5552 } 5553 if (URI) { 5554 xmlURIPtr uri; 5555 5556 uri = xmlParseURI((const char *) URI); 5557 if (uri == NULL) { 5558 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5559 "Invalid URI: %s\n", URI); 5560 /* 5561 * This really ought to be a well formedness error 5562 * but the XML Core WG decided otherwise c.f. issue 5563 * E26 of the XML erratas. 5564 */ 5565 } else { 5566 if (uri->fragment != NULL) { 5567 /* 5568 * Okay this is foolish to block those but not 5569 * invalid URIs. 5570 */ 5571 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5572 } else { 5573 if ((ctxt->sax != NULL) && 5574 (!ctxt->disableSAX) && 5575 (ctxt->sax->entityDecl != NULL)) 5576 ctxt->sax->entityDecl(ctxt->userData, name, 5577 XML_EXTERNAL_PARAMETER_ENTITY, 5578 literal, URI, NULL); 5579 } 5580 xmlFreeURI(uri); 5581 } 5582 } 5583 } 5584 } else { 5585 if ((RAW == '"') || (RAW == '\'')) { 5586 value = xmlParseEntityValue(ctxt, &orig); 5587 if ((ctxt->sax != NULL) && 5588 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5589 ctxt->sax->entityDecl(ctxt->userData, name, 5590 XML_INTERNAL_GENERAL_ENTITY, 5591 NULL, NULL, value); 5592 /* 5593 * For expat compatibility in SAX mode. 5594 */ 5595 if ((ctxt->myDoc == NULL) || 5596 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5597 if (ctxt->myDoc == NULL) { 5598 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5599 if (ctxt->myDoc == NULL) { 5600 xmlErrMemory(ctxt, "New Doc failed"); 5601 return; 5602 } 5603 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5604 } 5605 if (ctxt->myDoc->intSubset == NULL) 5606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5607 BAD_CAST "fake", NULL, NULL); 5608 5609 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5610 NULL, NULL, value); 5611 } 5612 } else { 5613 URI = xmlParseExternalID(ctxt, &literal, 1); 5614 if ((URI == NULL) && (literal == NULL)) { 5615 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5616 } 5617 if (URI) { 5618 xmlURIPtr uri; 5619 5620 uri = xmlParseURI((const char *)URI); 5621 if (uri == NULL) { 5622 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5623 "Invalid URI: %s\n", URI); 5624 /* 5625 * This really ought to be a well formedness error 5626 * but the XML Core WG decided otherwise c.f. issue 5627 * E26 of the XML erratas. 5628 */ 5629 } else { 5630 if (uri->fragment != NULL) { 5631 /* 5632 * Okay this is foolish to block those but not 5633 * invalid URIs. 5634 */ 5635 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5636 } 5637 xmlFreeURI(uri); 5638 } 5639 } 5640 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5641 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5642 "Space required before 'NDATA'\n"); 5643 } 5644 SKIP_BLANKS; 5645 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5646 SKIP(5); 5647 if (!IS_BLANK_CH(CUR)) { 5648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5649 "Space required after 'NDATA'\n"); 5650 } 5651 SKIP_BLANKS; 5652 ndata = xmlParseName(ctxt); 5653 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5654 (ctxt->sax->unparsedEntityDecl != NULL)) 5655 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5656 literal, URI, ndata); 5657 } else { 5658 if ((ctxt->sax != NULL) && 5659 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5660 ctxt->sax->entityDecl(ctxt->userData, name, 5661 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5662 literal, URI, NULL); 5663 /* 5664 * For expat compatibility in SAX mode. 5665 * assuming the entity repalcement was asked for 5666 */ 5667 if ((ctxt->replaceEntities != 0) && 5668 ((ctxt->myDoc == NULL) || 5669 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5670 if (ctxt->myDoc == NULL) { 5671 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5672 if (ctxt->myDoc == NULL) { 5673 xmlErrMemory(ctxt, "New Doc failed"); 5674 return; 5675 } 5676 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5677 } 5678 5679 if (ctxt->myDoc->intSubset == NULL) 5680 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5681 BAD_CAST "fake", NULL, NULL); 5682 xmlSAX2EntityDecl(ctxt, name, 5683 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5684 literal, URI, NULL); 5685 } 5686 } 5687 } 5688 } 5689 if (ctxt->instate == XML_PARSER_EOF) 5690 return; 5691 SKIP_BLANKS; 5692 if (RAW != '>') { 5693 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5694 "xmlParseEntityDecl: entity %s not terminated\n", name); 5695 xmlHaltParser(ctxt); 5696 } else { 5697 if (input != ctxt->input) { 5698 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5699 "Entity declaration doesn't start and stop in the same entity\n"); 5700 } 5701 NEXT; 5702 } 5703 if (orig != NULL) { 5704 /* 5705 * Ugly mechanism to save the raw entity value. 5706 */ 5707 xmlEntityPtr cur = NULL; 5708 5709 if (isParameter) { 5710 if ((ctxt->sax != NULL) && 5711 (ctxt->sax->getParameterEntity != NULL)) 5712 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5713 } else { 5714 if ((ctxt->sax != NULL) && 5715 (ctxt->sax->getEntity != NULL)) 5716 cur = ctxt->sax->getEntity(ctxt->userData, name); 5717 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5718 cur = xmlSAX2GetEntity(ctxt, name); 5719 } 5720 } 5721 if (cur != NULL) { 5722 if (cur->orig != NULL) 5723 xmlFree(orig); 5724 else 5725 cur->orig = orig; 5726 } else 5727 xmlFree(orig); 5728 } 5729 if (value != NULL) xmlFree(value); 5730 if (URI != NULL) xmlFree(URI); 5731 if (literal != NULL) xmlFree(literal); 5732 } 5733} 5734 5735/** 5736 * xmlParseDefaultDecl: 5737 * @ctxt: an XML parser context 5738 * @value: Receive a possible fixed default value for the attribute 5739 * 5740 * Parse an attribute default declaration 5741 * 5742 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5743 * 5744 * [ VC: Required Attribute ] 5745 * if the default declaration is the keyword #REQUIRED, then the 5746 * attribute must be specified for all elements of the type in the 5747 * attribute-list declaration. 5748 * 5749 * [ VC: Attribute Default Legal ] 5750 * The declared default value must meet the lexical constraints of 5751 * the declared attribute type c.f. xmlValidateAttributeDecl() 5752 * 5753 * [ VC: Fixed Attribute Default ] 5754 * if an attribute has a default value declared with the #FIXED 5755 * keyword, instances of that attribute must match the default value. 5756 * 5757 * [ WFC: No < in Attribute Values ] 5758 * handled in xmlParseAttValue() 5759 * 5760 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5761 * or XML_ATTRIBUTE_FIXED. 5762 */ 5763 5764int 5765xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5766 int val; 5767 xmlChar *ret; 5768 5769 *value = NULL; 5770 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5771 SKIP(9); 5772 return(XML_ATTRIBUTE_REQUIRED); 5773 } 5774 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5775 SKIP(8); 5776 return(XML_ATTRIBUTE_IMPLIED); 5777 } 5778 val = XML_ATTRIBUTE_NONE; 5779 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5780 SKIP(6); 5781 val = XML_ATTRIBUTE_FIXED; 5782 if (!IS_BLANK_CH(CUR)) { 5783 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5784 "Space required after '#FIXED'\n"); 5785 } 5786 SKIP_BLANKS; 5787 } 5788 ret = xmlParseAttValue(ctxt); 5789 ctxt->instate = XML_PARSER_DTD; 5790 if (ret == NULL) { 5791 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5792 "Attribute default value declaration error\n"); 5793 } else 5794 *value = ret; 5795 return(val); 5796} 5797 5798/** 5799 * xmlParseNotationType: 5800 * @ctxt: an XML parser context 5801 * 5802 * parse an Notation attribute type. 5803 * 5804 * Note: the leading 'NOTATION' S part has already being parsed... 5805 * 5806 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5807 * 5808 * [ VC: Notation Attributes ] 5809 * Values of this type must match one of the notation names included 5810 * in the declaration; all notation names in the declaration must be declared. 5811 * 5812 * Returns: the notation attribute tree built while parsing 5813 */ 5814 5815xmlEnumerationPtr 5816xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5817 const xmlChar *name; 5818 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5819 5820 if (RAW != '(') { 5821 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5822 return(NULL); 5823 } 5824 SHRINK; 5825 do { 5826 NEXT; 5827 SKIP_BLANKS; 5828 name = xmlParseName(ctxt); 5829 if (name == NULL) { 5830 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5831 "Name expected in NOTATION declaration\n"); 5832 xmlFreeEnumeration(ret); 5833 return(NULL); 5834 } 5835 tmp = ret; 5836 while (tmp != NULL) { 5837 if (xmlStrEqual(name, tmp->name)) { 5838 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5839 "standalone: attribute notation value token %s duplicated\n", 5840 name, NULL); 5841 if (!xmlDictOwns(ctxt->dict, name)) 5842 xmlFree((xmlChar *) name); 5843 break; 5844 } 5845 tmp = tmp->next; 5846 } 5847 if (tmp == NULL) { 5848 cur = xmlCreateEnumeration(name); 5849 if (cur == NULL) { 5850 xmlFreeEnumeration(ret); 5851 return(NULL); 5852 } 5853 if (last == NULL) ret = last = cur; 5854 else { 5855 last->next = cur; 5856 last = cur; 5857 } 5858 } 5859 SKIP_BLANKS; 5860 } while (RAW == '|'); 5861 if (RAW != ')') { 5862 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5863 xmlFreeEnumeration(ret); 5864 return(NULL); 5865 } 5866 NEXT; 5867 return(ret); 5868} 5869 5870/** 5871 * xmlParseEnumerationType: 5872 * @ctxt: an XML parser context 5873 * 5874 * parse an Enumeration attribute type. 5875 * 5876 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5877 * 5878 * [ VC: Enumeration ] 5879 * Values of this type must match one of the Nmtoken tokens in 5880 * the declaration 5881 * 5882 * Returns: the enumeration attribute tree built while parsing 5883 */ 5884 5885xmlEnumerationPtr 5886xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5887 xmlChar *name; 5888 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5889 5890 if (RAW != '(') { 5891 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5892 return(NULL); 5893 } 5894 SHRINK; 5895 do { 5896 NEXT; 5897 SKIP_BLANKS; 5898 name = xmlParseNmtoken(ctxt); 5899 if (name == NULL) { 5900 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5901 return(ret); 5902 } 5903 tmp = ret; 5904 while (tmp != NULL) { 5905 if (xmlStrEqual(name, tmp->name)) { 5906 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5907 "standalone: attribute enumeration value token %s duplicated\n", 5908 name, NULL); 5909 if (!xmlDictOwns(ctxt->dict, name)) 5910 xmlFree(name); 5911 break; 5912 } 5913 tmp = tmp->next; 5914 } 5915 if (tmp == NULL) { 5916 cur = xmlCreateEnumeration(name); 5917 if (!xmlDictOwns(ctxt->dict, name)) 5918 xmlFree(name); 5919 if (cur == NULL) { 5920 xmlFreeEnumeration(ret); 5921 return(NULL); 5922 } 5923 if (last == NULL) ret = last = cur; 5924 else { 5925 last->next = cur; 5926 last = cur; 5927 } 5928 } 5929 SKIP_BLANKS; 5930 } while (RAW == '|'); 5931 if (RAW != ')') { 5932 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5933 return(ret); 5934 } 5935 NEXT; 5936 return(ret); 5937} 5938 5939/** 5940 * xmlParseEnumeratedType: 5941 * @ctxt: an XML parser context 5942 * @tree: the enumeration tree built while parsing 5943 * 5944 * parse an Enumerated attribute type. 5945 * 5946 * [57] EnumeratedType ::= NotationType | Enumeration 5947 * 5948 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5949 * 5950 * 5951 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5952 */ 5953 5954int 5955xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5956 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5957 SKIP(8); 5958 if (!IS_BLANK_CH(CUR)) { 5959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5960 "Space required after 'NOTATION'\n"); 5961 return(0); 5962 } 5963 SKIP_BLANKS; 5964 *tree = xmlParseNotationType(ctxt); 5965 if (*tree == NULL) return(0); 5966 return(XML_ATTRIBUTE_NOTATION); 5967 } 5968 *tree = xmlParseEnumerationType(ctxt); 5969 if (*tree == NULL) return(0); 5970 return(XML_ATTRIBUTE_ENUMERATION); 5971} 5972 5973/** 5974 * xmlParseAttributeType: 5975 * @ctxt: an XML parser context 5976 * @tree: the enumeration tree built while parsing 5977 * 5978 * parse the Attribute list def for an element 5979 * 5980 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5981 * 5982 * [55] StringType ::= 'CDATA' 5983 * 5984 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5985 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5986 * 5987 * Validity constraints for attribute values syntax are checked in 5988 * xmlValidateAttributeValue() 5989 * 5990 * [ VC: ID ] 5991 * Values of type ID must match the Name production. A name must not 5992 * appear more than once in an XML document as a value of this type; 5993 * i.e., ID values must uniquely identify the elements which bear them. 5994 * 5995 * [ VC: One ID per Element Type ] 5996 * No element type may have more than one ID attribute specified. 5997 * 5998 * [ VC: ID Attribute Default ] 5999 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 6000 * 6001 * [ VC: IDREF ] 6002 * Values of type IDREF must match the Name production, and values 6003 * of type IDREFS must match Names; each IDREF Name must match the value 6004 * of an ID attribute on some element in the XML document; i.e. IDREF 6005 * values must match the value of some ID attribute. 6006 * 6007 * [ VC: Entity Name ] 6008 * Values of type ENTITY must match the Name production, values 6009 * of type ENTITIES must match Names; each Entity Name must match the 6010 * name of an unparsed entity declared in the DTD. 6011 * 6012 * [ VC: Name Token ] 6013 * Values of type NMTOKEN must match the Nmtoken production; values 6014 * of type NMTOKENS must match Nmtokens. 6015 * 6016 * Returns the attribute type 6017 */ 6018int 6019xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6020 SHRINK; 6021 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 6022 SKIP(5); 6023 return(XML_ATTRIBUTE_CDATA); 6024 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 6025 SKIP(6); 6026 return(XML_ATTRIBUTE_IDREFS); 6027 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 6028 SKIP(5); 6029 return(XML_ATTRIBUTE_IDREF); 6030 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 6031 SKIP(2); 6032 return(XML_ATTRIBUTE_ID); 6033 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 6034 SKIP(6); 6035 return(XML_ATTRIBUTE_ENTITY); 6036 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6037 SKIP(8); 6038 return(XML_ATTRIBUTE_ENTITIES); 6039 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6040 SKIP(8); 6041 return(XML_ATTRIBUTE_NMTOKENS); 6042 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6043 SKIP(7); 6044 return(XML_ATTRIBUTE_NMTOKEN); 6045 } 6046 return(xmlParseEnumeratedType(ctxt, tree)); 6047} 6048 6049/** 6050 * xmlParseAttributeListDecl: 6051 * @ctxt: an XML parser context 6052 * 6053 * : parse the Attribute list def for an element 6054 * 6055 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6056 * 6057 * [53] AttDef ::= S Name S AttType S DefaultDecl 6058 * 6059 */ 6060void 6061xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6062 const xmlChar *elemName; 6063 const xmlChar *attrName; 6064 xmlEnumerationPtr tree; 6065 6066 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6067 xmlParserInputPtr input = ctxt->input; 6068 6069 SKIP(9); 6070 if (!IS_BLANK_CH(CUR)) { 6071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6072 "Space required after '<!ATTLIST'\n"); 6073 } 6074 SKIP_BLANKS; 6075 elemName = xmlParseName(ctxt); 6076 if (elemName == NULL) { 6077 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6078 "ATTLIST: no name for Element\n"); 6079 return; 6080 } 6081 SKIP_BLANKS; 6082 GROW; 6083 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6084 const xmlChar *check = CUR_PTR; 6085 int type; 6086 int def; 6087 xmlChar *defaultValue = NULL; 6088 6089 GROW; 6090 tree = NULL; 6091 attrName = xmlParseName(ctxt); 6092 if (attrName == NULL) { 6093 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6094 "ATTLIST: no name for Attribute\n"); 6095 break; 6096 } 6097 GROW; 6098 if (!IS_BLANK_CH(CUR)) { 6099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6100 "Space required after the attribute name\n"); 6101 break; 6102 } 6103 SKIP_BLANKS; 6104 6105 type = xmlParseAttributeType(ctxt, &tree); 6106 if (type <= 0) { 6107 break; 6108 } 6109 6110 GROW; 6111 if (!IS_BLANK_CH(CUR)) { 6112 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6113 "Space required after the attribute type\n"); 6114 if (tree != NULL) 6115 xmlFreeEnumeration(tree); 6116 break; 6117 } 6118 SKIP_BLANKS; 6119 6120 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6121 if (def <= 0) { 6122 if (defaultValue != NULL) 6123 xmlFree(defaultValue); 6124 if (tree != NULL) 6125 xmlFreeEnumeration(tree); 6126 break; 6127 } 6128 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6129 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6130 6131 GROW; 6132 if (RAW != '>') { 6133 if (!IS_BLANK_CH(CUR)) { 6134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6135 "Space required after the attribute default value\n"); 6136 if (defaultValue != NULL) 6137 xmlFree(defaultValue); 6138 if (tree != NULL) 6139 xmlFreeEnumeration(tree); 6140 break; 6141 } 6142 SKIP_BLANKS; 6143 } 6144 if (check == CUR_PTR) { 6145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6146 "in xmlParseAttributeListDecl\n"); 6147 if (defaultValue != NULL) 6148 xmlFree(defaultValue); 6149 if (tree != NULL) 6150 xmlFreeEnumeration(tree); 6151 break; 6152 } 6153 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6154 (ctxt->sax->attributeDecl != NULL)) 6155 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6156 type, def, defaultValue, tree); 6157 else if (tree != NULL) 6158 xmlFreeEnumeration(tree); 6159 6160 if ((ctxt->sax2) && (defaultValue != NULL) && 6161 (def != XML_ATTRIBUTE_IMPLIED) && 6162 (def != XML_ATTRIBUTE_REQUIRED)) { 6163 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6164 } 6165 if (ctxt->sax2) { 6166 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6167 } 6168 if (defaultValue != NULL) 6169 xmlFree(defaultValue); 6170 GROW; 6171 } 6172 if (RAW == '>') { 6173 if (input != ctxt->input) { 6174 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6175 "Attribute list declaration doesn't start and stop in the same entity\n", 6176 NULL, NULL); 6177 } 6178 NEXT; 6179 } 6180 } 6181} 6182 6183/** 6184 * xmlParseElementMixedContentDecl: 6185 * @ctxt: an XML parser context 6186 * @inputchk: the input used for the current entity, needed for boundary checks 6187 * 6188 * parse the declaration for a Mixed Element content 6189 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6190 * 6191 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6192 * '(' S? '#PCDATA' S? ')' 6193 * 6194 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6195 * 6196 * [ VC: No Duplicate Types ] 6197 * The same name must not appear more than once in a single 6198 * mixed-content declaration. 6199 * 6200 * returns: the list of the xmlElementContentPtr describing the element choices 6201 */ 6202xmlElementContentPtr 6203xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6204 xmlElementContentPtr ret = NULL, cur = NULL, n; 6205 const xmlChar *elem = NULL; 6206 6207 GROW; 6208 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6209 SKIP(7); 6210 SKIP_BLANKS; 6211 SHRINK; 6212 if (RAW == ')') { 6213 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6214 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6215"Element content declaration doesn't start and stop in the same entity\n", 6216 NULL, NULL); 6217 } 6218 NEXT; 6219 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6220 if (ret == NULL) 6221 return(NULL); 6222 if (RAW == '*') { 6223 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6224 NEXT; 6225 } 6226 return(ret); 6227 } 6228 if ((RAW == '(') || (RAW == '|')) { 6229 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6230 if (ret == NULL) return(NULL); 6231 } 6232 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6233 NEXT; 6234 if (elem == NULL) { 6235 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6236 if (ret == NULL) return(NULL); 6237 ret->c1 = cur; 6238 if (cur != NULL) 6239 cur->parent = ret; 6240 cur = ret; 6241 } else { 6242 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6243 if (n == NULL) return(NULL); 6244 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6245 if (n->c1 != NULL) 6246 n->c1->parent = n; 6247 cur->c2 = n; 6248 if (n != NULL) 6249 n->parent = cur; 6250 cur = n; 6251 } 6252 SKIP_BLANKS; 6253 elem = xmlParseName(ctxt); 6254 if (elem == NULL) { 6255 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6256 "xmlParseElementMixedContentDecl : Name expected\n"); 6257 xmlFreeDocElementContent(ctxt->myDoc, cur); 6258 return(NULL); 6259 } 6260 SKIP_BLANKS; 6261 GROW; 6262 } 6263 if ((RAW == ')') && (NXT(1) == '*')) { 6264 if (elem != NULL) { 6265 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6266 XML_ELEMENT_CONTENT_ELEMENT); 6267 if (cur->c2 != NULL) 6268 cur->c2->parent = cur; 6269 } 6270 if (ret != NULL) 6271 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6272 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6273 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6274"Element content declaration doesn't start and stop in the same entity\n", 6275 NULL, NULL); 6276 } 6277 SKIP(2); 6278 } else { 6279 xmlFreeDocElementContent(ctxt->myDoc, ret); 6280 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6281 return(NULL); 6282 } 6283 6284 } else { 6285 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6286 } 6287 return(ret); 6288} 6289 6290/** 6291 * xmlParseElementChildrenContentDeclPriv: 6292 * @ctxt: an XML parser context 6293 * @inputchk: the input used for the current entity, needed for boundary checks 6294 * @depth: the level of recursion 6295 * 6296 * parse the declaration for a Mixed Element content 6297 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6298 * 6299 * 6300 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6301 * 6302 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6303 * 6304 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6305 * 6306 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6307 * 6308 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6309 * TODO Parameter-entity replacement text must be properly nested 6310 * with parenthesized groups. That is to say, if either of the 6311 * opening or closing parentheses in a choice, seq, or Mixed 6312 * construct is contained in the replacement text for a parameter 6313 * entity, both must be contained in the same replacement text. For 6314 * interoperability, if a parameter-entity reference appears in a 6315 * choice, seq, or Mixed construct, its replacement text should not 6316 * be empty, and neither the first nor last non-blank character of 6317 * the replacement text should be a connector (| or ,). 6318 * 6319 * Returns the tree of xmlElementContentPtr describing the element 6320 * hierarchy. 6321 */ 6322static xmlElementContentPtr 6323xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6324 int depth) { 6325 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6326 const xmlChar *elem; 6327 xmlChar type = 0; 6328 6329 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6330 (depth > 2048)) { 6331 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6332"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6333 depth); 6334 return(NULL); 6335 } 6336 SKIP_BLANKS; 6337 GROW; 6338 if (RAW == '(') { 6339 int inputid = ctxt->input->id; 6340 6341 /* Recurse on first child */ 6342 NEXT; 6343 SKIP_BLANKS; 6344 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6345 depth + 1); 6346 SKIP_BLANKS; 6347 GROW; 6348 } else { 6349 elem = xmlParseName(ctxt); 6350 if (elem == NULL) { 6351 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6352 return(NULL); 6353 } 6354 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6355 if (cur == NULL) { 6356 xmlErrMemory(ctxt, NULL); 6357 return(NULL); 6358 } 6359 GROW; 6360 if (RAW == '?') { 6361 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6362 NEXT; 6363 } else if (RAW == '*') { 6364 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6365 NEXT; 6366 } else if (RAW == '+') { 6367 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6368 NEXT; 6369 } else { 6370 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6371 } 6372 GROW; 6373 } 6374 SKIP_BLANKS; 6375 SHRINK; 6376 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6377 /* 6378 * Each loop we parse one separator and one element. 6379 */ 6380 if (RAW == ',') { 6381 if (type == 0) type = CUR; 6382 6383 /* 6384 * Detect "Name | Name , Name" error 6385 */ 6386 else if (type != CUR) { 6387 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6388 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6389 type); 6390 if ((last != NULL) && (last != ret)) 6391 xmlFreeDocElementContent(ctxt->myDoc, last); 6392 if (ret != NULL) 6393 xmlFreeDocElementContent(ctxt->myDoc, ret); 6394 return(NULL); 6395 } 6396 NEXT; 6397 6398 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6399 if (op == NULL) { 6400 if ((last != NULL) && (last != ret)) 6401 xmlFreeDocElementContent(ctxt->myDoc, last); 6402 xmlFreeDocElementContent(ctxt->myDoc, ret); 6403 return(NULL); 6404 } 6405 if (last == NULL) { 6406 op->c1 = ret; 6407 if (ret != NULL) 6408 ret->parent = op; 6409 ret = cur = op; 6410 } else { 6411 cur->c2 = op; 6412 if (op != NULL) 6413 op->parent = cur; 6414 op->c1 = last; 6415 if (last != NULL) 6416 last->parent = op; 6417 cur =op; 6418 last = NULL; 6419 } 6420 } else if (RAW == '|') { 6421 if (type == 0) type = CUR; 6422 6423 /* 6424 * Detect "Name , Name | Name" error 6425 */ 6426 else if (type != CUR) { 6427 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6428 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6429 type); 6430 if ((last != NULL) && (last != ret)) 6431 xmlFreeDocElementContent(ctxt->myDoc, last); 6432 if (ret != NULL) 6433 xmlFreeDocElementContent(ctxt->myDoc, ret); 6434 return(NULL); 6435 } 6436 NEXT; 6437 6438 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6439 if (op == NULL) { 6440 if ((last != NULL) && (last != ret)) 6441 xmlFreeDocElementContent(ctxt->myDoc, last); 6442 if (ret != NULL) 6443 xmlFreeDocElementContent(ctxt->myDoc, ret); 6444 return(NULL); 6445 } 6446 if (last == NULL) { 6447 op->c1 = ret; 6448 if (ret != NULL) 6449 ret->parent = op; 6450 ret = cur = op; 6451 } else { 6452 cur->c2 = op; 6453 if (op != NULL) 6454 op->parent = cur; 6455 op->c1 = last; 6456 if (last != NULL) 6457 last->parent = op; 6458 cur =op; 6459 last = NULL; 6460 } 6461 } else { 6462 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6463 if ((last != NULL) && (last != ret)) 6464 xmlFreeDocElementContent(ctxt->myDoc, last); 6465 if (ret != NULL) 6466 xmlFreeDocElementContent(ctxt->myDoc, ret); 6467 return(NULL); 6468 } 6469 GROW; 6470 SKIP_BLANKS; 6471 GROW; 6472 if (RAW == '(') { 6473 int inputid = ctxt->input->id; 6474 /* Recurse on second child */ 6475 NEXT; 6476 SKIP_BLANKS; 6477 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6478 depth + 1); 6479 SKIP_BLANKS; 6480 } else { 6481 elem = xmlParseName(ctxt); 6482 if (elem == NULL) { 6483 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6484 if (ret != NULL) 6485 xmlFreeDocElementContent(ctxt->myDoc, ret); 6486 return(NULL); 6487 } 6488 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6489 if (last == NULL) { 6490 if (ret != NULL) 6491 xmlFreeDocElementContent(ctxt->myDoc, ret); 6492 return(NULL); 6493 } 6494 if (RAW == '?') { 6495 last->ocur = XML_ELEMENT_CONTENT_OPT; 6496 NEXT; 6497 } else if (RAW == '*') { 6498 last->ocur = XML_ELEMENT_CONTENT_MULT; 6499 NEXT; 6500 } else if (RAW == '+') { 6501 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6502 NEXT; 6503 } else { 6504 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6505 } 6506 } 6507 SKIP_BLANKS; 6508 GROW; 6509 } 6510 if ((cur != NULL) && (last != NULL)) { 6511 cur->c2 = last; 6512 if (last != NULL) 6513 last->parent = cur; 6514 } 6515 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6516 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6517"Element content declaration doesn't start and stop in the same entity\n", 6518 NULL, NULL); 6519 } 6520 NEXT; 6521 if (RAW == '?') { 6522 if (ret != NULL) { 6523 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6524 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6525 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6526 else 6527 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6528 } 6529 NEXT; 6530 } else if (RAW == '*') { 6531 if (ret != NULL) { 6532 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6533 cur = ret; 6534 /* 6535 * Some normalization: 6536 * (a | b* | c?)* == (a | b | c)* 6537 */ 6538 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6539 if ((cur->c1 != NULL) && 6540 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6541 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6542 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6543 if ((cur->c2 != NULL) && 6544 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6545 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6546 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6547 cur = cur->c2; 6548 } 6549 } 6550 NEXT; 6551 } else if (RAW == '+') { 6552 if (ret != NULL) { 6553 int found = 0; 6554 6555 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6556 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6557 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6558 else 6559 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6560 /* 6561 * Some normalization: 6562 * (a | b*)+ == (a | b)* 6563 * (a | b?)+ == (a | b)* 6564 */ 6565 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6566 if ((cur->c1 != NULL) && 6567 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6568 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6569 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6570 found = 1; 6571 } 6572 if ((cur->c2 != NULL) && 6573 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6574 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6575 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6576 found = 1; 6577 } 6578 cur = cur->c2; 6579 } 6580 if (found) 6581 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6582 } 6583 NEXT; 6584 } 6585 return(ret); 6586} 6587 6588/** 6589 * xmlParseElementChildrenContentDecl: 6590 * @ctxt: an XML parser context 6591 * @inputchk: the input used for the current entity, needed for boundary checks 6592 * 6593 * parse the declaration for a Mixed Element content 6594 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6595 * 6596 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6597 * 6598 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6599 * 6600 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6601 * 6602 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6603 * 6604 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6605 * TODO Parameter-entity replacement text must be properly nested 6606 * with parenthesized groups. That is to say, if either of the 6607 * opening or closing parentheses in a choice, seq, or Mixed 6608 * construct is contained in the replacement text for a parameter 6609 * entity, both must be contained in the same replacement text. For 6610 * interoperability, if a parameter-entity reference appears in a 6611 * choice, seq, or Mixed construct, its replacement text should not 6612 * be empty, and neither the first nor last non-blank character of 6613 * the replacement text should be a connector (| or ,). 6614 * 6615 * Returns the tree of xmlElementContentPtr describing the element 6616 * hierarchy. 6617 */ 6618xmlElementContentPtr 6619xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6620 /* stub left for API/ABI compat */ 6621 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6622} 6623 6624/** 6625 * xmlParseElementContentDecl: 6626 * @ctxt: an XML parser context 6627 * @name: the name of the element being defined. 6628 * @result: the Element Content pointer will be stored here if any 6629 * 6630 * parse the declaration for an Element content either Mixed or Children, 6631 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6632 * 6633 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6634 * 6635 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6636 */ 6637 6638int 6639xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6640 xmlElementContentPtr *result) { 6641 6642 xmlElementContentPtr tree = NULL; 6643 int inputid = ctxt->input->id; 6644 int res; 6645 6646 *result = NULL; 6647 6648 if (RAW != '(') { 6649 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6650 "xmlParseElementContentDecl : %s '(' expected\n", name); 6651 return(-1); 6652 } 6653 NEXT; 6654 GROW; 6655 if (ctxt->instate == XML_PARSER_EOF) 6656 return(-1); 6657 SKIP_BLANKS; 6658 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6659 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6660 res = XML_ELEMENT_TYPE_MIXED; 6661 } else { 6662 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6663 res = XML_ELEMENT_TYPE_ELEMENT; 6664 } 6665 SKIP_BLANKS; 6666 *result = tree; 6667 return(res); 6668} 6669 6670/** 6671 * xmlParseElementDecl: 6672 * @ctxt: an XML parser context 6673 * 6674 * parse an Element declaration. 6675 * 6676 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6677 * 6678 * [ VC: Unique Element Type Declaration ] 6679 * No element type may be declared more than once 6680 * 6681 * Returns the type of the element, or -1 in case of error 6682 */ 6683int 6684xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6685 const xmlChar *name; 6686 int ret = -1; 6687 xmlElementContentPtr content = NULL; 6688 6689 /* GROW; done in the caller */ 6690 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6691 xmlParserInputPtr input = ctxt->input; 6692 6693 SKIP(9); 6694 if (!IS_BLANK_CH(CUR)) { 6695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6696 "Space required after 'ELEMENT'\n"); 6697 } 6698 SKIP_BLANKS; 6699 name = xmlParseName(ctxt); 6700 if (name == NULL) { 6701 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6702 "xmlParseElementDecl: no name for Element\n"); 6703 return(-1); 6704 } 6705 while ((RAW == 0) && (ctxt->inputNr > 1)) 6706 xmlPopInput(ctxt); 6707 if (!IS_BLANK_CH(CUR)) { 6708 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6709 "Space required after the element name\n"); 6710 } 6711 SKIP_BLANKS; 6712 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6713 SKIP(5); 6714 /* 6715 * Element must always be empty. 6716 */ 6717 ret = XML_ELEMENT_TYPE_EMPTY; 6718 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6719 (NXT(2) == 'Y')) { 6720 SKIP(3); 6721 /* 6722 * Element is a generic container. 6723 */ 6724 ret = XML_ELEMENT_TYPE_ANY; 6725 } else if (RAW == '(') { 6726 ret = xmlParseElementContentDecl(ctxt, name, &content); 6727 } else { 6728 /* 6729 * [ WFC: PEs in Internal Subset ] error handling. 6730 */ 6731 if ((RAW == '%') && (ctxt->external == 0) && 6732 (ctxt->inputNr == 1)) { 6733 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6734 "PEReference: forbidden within markup decl in internal subset\n"); 6735 } else { 6736 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6737 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6738 } 6739 return(-1); 6740 } 6741 6742 SKIP_BLANKS; 6743 /* 6744 * Pop-up of finished entities. 6745 */ 6746 while ((RAW == 0) && (ctxt->inputNr > 1)) 6747 xmlPopInput(ctxt); 6748 SKIP_BLANKS; 6749 6750 if (RAW != '>') { 6751 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6752 if (content != NULL) { 6753 xmlFreeDocElementContent(ctxt->myDoc, content); 6754 } 6755 } else { 6756 if (input != ctxt->input) { 6757 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6758 "Element declaration doesn't start and stop in the same entity\n"); 6759 } 6760 6761 NEXT; 6762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6763 (ctxt->sax->elementDecl != NULL)) { 6764 if (content != NULL) 6765 content->parent = NULL; 6766 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6767 content); 6768 if ((content != NULL) && (content->parent == NULL)) { 6769 /* 6770 * this is a trick: if xmlAddElementDecl is called, 6771 * instead of copying the full tree it is plugged directly 6772 * if called from the parser. Avoid duplicating the 6773 * interfaces or change the API/ABI 6774 */ 6775 xmlFreeDocElementContent(ctxt->myDoc, content); 6776 } 6777 } else if (content != NULL) { 6778 xmlFreeDocElementContent(ctxt->myDoc, content); 6779 } 6780 } 6781 } 6782 return(ret); 6783} 6784 6785/** 6786 * xmlParseConditionalSections 6787 * @ctxt: an XML parser context 6788 * 6789 * [61] conditionalSect ::= includeSect | ignoreSect 6790 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6791 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6792 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6793 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6794 */ 6795 6796static void 6797xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6798 int id = ctxt->input->id; 6799 6800 SKIP(3); 6801 SKIP_BLANKS; 6802 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6803 SKIP(7); 6804 SKIP_BLANKS; 6805 if (RAW != '[') { 6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6807 xmlHaltParser(ctxt); 6808 return; 6809 } else { 6810 if (ctxt->input->id != id) { 6811 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6812 "All markup of the conditional section is not in the same entity\n", 6813 NULL, NULL); 6814 } 6815 NEXT; 6816 } 6817 if (xmlParserDebugEntities) { 6818 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6819 xmlGenericError(xmlGenericErrorContext, 6820 "%s(%d): ", ctxt->input->filename, 6821 ctxt->input->line); 6822 xmlGenericError(xmlGenericErrorContext, 6823 "Entering INCLUDE Conditional Section\n"); 6824 } 6825 6826 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6827 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6828 const xmlChar *check = CUR_PTR; 6829 unsigned int cons = ctxt->input->consumed; 6830 6831 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6832 xmlParseConditionalSections(ctxt); 6833 } else if (IS_BLANK_CH(CUR)) { 6834 NEXT; 6835 } else if (RAW == '%') { 6836 xmlParsePEReference(ctxt); 6837 } else 6838 xmlParseMarkupDecl(ctxt); 6839 6840 /* 6841 * Pop-up of finished entities. 6842 */ 6843 while ((RAW == 0) && (ctxt->inputNr > 1)) 6844 xmlPopInput(ctxt); 6845 6846 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6847 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6848 break; 6849 } 6850 } 6851 if (xmlParserDebugEntities) { 6852 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6853 xmlGenericError(xmlGenericErrorContext, 6854 "%s(%d): ", ctxt->input->filename, 6855 ctxt->input->line); 6856 xmlGenericError(xmlGenericErrorContext, 6857 "Leaving INCLUDE Conditional Section\n"); 6858 } 6859 6860 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6861 int state; 6862 xmlParserInputState instate; 6863 int depth = 0; 6864 6865 SKIP(6); 6866 SKIP_BLANKS; 6867 if (RAW != '[') { 6868 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6869 xmlHaltParser(ctxt); 6870 return; 6871 } else { 6872 if (ctxt->input->id != id) { 6873 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6874 "All markup of the conditional section is not in the same entity\n", 6875 NULL, NULL); 6876 } 6877 NEXT; 6878 } 6879 if (xmlParserDebugEntities) { 6880 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6881 xmlGenericError(xmlGenericErrorContext, 6882 "%s(%d): ", ctxt->input->filename, 6883 ctxt->input->line); 6884 xmlGenericError(xmlGenericErrorContext, 6885 "Entering IGNORE Conditional Section\n"); 6886 } 6887 6888 /* 6889 * Parse up to the end of the conditional section 6890 * But disable SAX event generating DTD building in the meantime 6891 */ 6892 state = ctxt->disableSAX; 6893 instate = ctxt->instate; 6894 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6895 ctxt->instate = XML_PARSER_IGNORE; 6896 6897 while (((depth >= 0) && (RAW != 0)) && 6898 (ctxt->instate != XML_PARSER_EOF)) { 6899 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6900 depth++; 6901 SKIP(3); 6902 continue; 6903 } 6904 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6905 if (--depth >= 0) SKIP(3); 6906 continue; 6907 } 6908 NEXT; 6909 continue; 6910 } 6911 6912 ctxt->disableSAX = state; 6913 ctxt->instate = instate; 6914 6915 if (xmlParserDebugEntities) { 6916 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6917 xmlGenericError(xmlGenericErrorContext, 6918 "%s(%d): ", ctxt->input->filename, 6919 ctxt->input->line); 6920 xmlGenericError(xmlGenericErrorContext, 6921 "Leaving IGNORE Conditional Section\n"); 6922 } 6923 6924 } else { 6925 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6926 xmlHaltParser(ctxt); 6927 return; 6928 } 6929 6930 if (RAW == 0) 6931 SHRINK; 6932 6933 if (RAW == 0) { 6934 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6935 } else { 6936 if (ctxt->input->id != id) { 6937 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6938 "All markup of the conditional section is not in the same entity\n", 6939 NULL, NULL); 6940 } 6941 if ((ctxt-> instate != XML_PARSER_EOF) && 6942 ((ctxt->input->cur + 3) <= ctxt->input->end)) 6943 SKIP(3); 6944 } 6945} 6946 6947/** 6948 * xmlParseMarkupDecl: 6949 * @ctxt: an XML parser context 6950 * 6951 * parse Markup declarations 6952 * 6953 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6954 * NotationDecl | PI | Comment 6955 * 6956 * [ VC: Proper Declaration/PE Nesting ] 6957 * Parameter-entity replacement text must be properly nested with 6958 * markup declarations. That is to say, if either the first character 6959 * or the last character of a markup declaration (markupdecl above) is 6960 * contained in the replacement text for a parameter-entity reference, 6961 * both must be contained in the same replacement text. 6962 * 6963 * [ WFC: PEs in Internal Subset ] 6964 * In the internal DTD subset, parameter-entity references can occur 6965 * only where markup declarations can occur, not within markup declarations. 6966 * (This does not apply to references that occur in external parameter 6967 * entities or to the external subset.) 6968 */ 6969void 6970xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6971 GROW; 6972 if (CUR == '<') { 6973 if (NXT(1) == '!') { 6974 switch (NXT(2)) { 6975 case 'E': 6976 if (NXT(3) == 'L') 6977 xmlParseElementDecl(ctxt); 6978 else if (NXT(3) == 'N') 6979 xmlParseEntityDecl(ctxt); 6980 break; 6981 case 'A': 6982 xmlParseAttributeListDecl(ctxt); 6983 break; 6984 case 'N': 6985 xmlParseNotationDecl(ctxt); 6986 break; 6987 case '-': 6988 xmlParseComment(ctxt); 6989 break; 6990 default: 6991 /* there is an error but it will be detected later */ 6992 break; 6993 } 6994 } else if (NXT(1) == '?') { 6995 xmlParsePI(ctxt); 6996 } 6997 } 6998 6999 /* 7000 * detect requirement to exit there and act accordingly 7001 * and avoid having instate overriden later on 7002 */ 7003 if (ctxt->instate == XML_PARSER_EOF) 7004 return; 7005 7006 /* 7007 * This is only for internal subset. On external entities, 7008 * the replacement is done before parsing stage 7009 */ 7010 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 7011 xmlParsePEReference(ctxt); 7012 7013 /* 7014 * Conditional sections are allowed from entities included 7015 * by PE References in the internal subset. 7016 */ 7017 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 7018 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7019 xmlParseConditionalSections(ctxt); 7020 } 7021 } 7022 7023 ctxt->instate = XML_PARSER_DTD; 7024} 7025 7026/** 7027 * xmlParseTextDecl: 7028 * @ctxt: an XML parser context 7029 * 7030 * parse an XML declaration header for external entities 7031 * 7032 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 7033 */ 7034 7035void 7036xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 7037 xmlChar *version; 7038 const xmlChar *encoding; 7039 7040 /* 7041 * We know that '<?xml' is here. 7042 */ 7043 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 7044 SKIP(5); 7045 } else { 7046 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 7047 return; 7048 } 7049 7050 if (!IS_BLANK_CH(CUR)) { 7051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7052 "Space needed after '<?xml'\n"); 7053 } 7054 SKIP_BLANKS; 7055 7056 /* 7057 * We may have the VersionInfo here. 7058 */ 7059 version = xmlParseVersionInfo(ctxt); 7060 if (version == NULL) 7061 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7062 else { 7063 if (!IS_BLANK_CH(CUR)) { 7064 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7065 "Space needed here\n"); 7066 } 7067 } 7068 ctxt->input->version = version; 7069 7070 /* 7071 * We must have the encoding declaration 7072 */ 7073 encoding = xmlParseEncodingDecl(ctxt); 7074 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7075 /* 7076 * The XML REC instructs us to stop parsing right here 7077 */ 7078 return; 7079 } 7080 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 7081 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 7082 "Missing encoding in text declaration\n"); 7083 } 7084 7085 SKIP_BLANKS; 7086 if ((RAW == '?') && (NXT(1) == '>')) { 7087 SKIP(2); 7088 } else if (RAW == '>') { 7089 /* Deprecated old WD ... */ 7090 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7091 NEXT; 7092 } else { 7093 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7094 MOVETO_ENDTAG(CUR_PTR); 7095 NEXT; 7096 } 7097} 7098 7099/** 7100 * xmlParseExternalSubset: 7101 * @ctxt: an XML parser context 7102 * @ExternalID: the external identifier 7103 * @SystemID: the system identifier (or URL) 7104 * 7105 * parse Markup declarations from an external subset 7106 * 7107 * [30] extSubset ::= textDecl? extSubsetDecl 7108 * 7109 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7110 */ 7111void 7112xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7113 const xmlChar *SystemID) { 7114 xmlDetectSAX2(ctxt); 7115 GROW; 7116 7117 if ((ctxt->encoding == NULL) && 7118 (ctxt->input->end - ctxt->input->cur >= 4)) { 7119 xmlChar start[4]; 7120 xmlCharEncoding enc; 7121 7122 start[0] = RAW; 7123 start[1] = NXT(1); 7124 start[2] = NXT(2); 7125 start[3] = NXT(3); 7126 enc = xmlDetectCharEncoding(start, 4); 7127 if (enc != XML_CHAR_ENCODING_NONE) 7128 xmlSwitchEncoding(ctxt, enc); 7129 } 7130 7131 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7132 xmlParseTextDecl(ctxt); 7133 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7134 /* 7135 * The XML REC instructs us to stop parsing right here 7136 */ 7137 xmlHaltParser(ctxt); 7138 return; 7139 } 7140 } 7141 if (ctxt->myDoc == NULL) { 7142 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7143 if (ctxt->myDoc == NULL) { 7144 xmlErrMemory(ctxt, "New Doc failed"); 7145 return; 7146 } 7147 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7148 } 7149 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7150 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7151 7152 ctxt->instate = XML_PARSER_DTD; 7153 ctxt->external = 1; 7154 while (((RAW == '<') && (NXT(1) == '?')) || 7155 ((RAW == '<') && (NXT(1) == '!')) || 7156 (RAW == '%') || IS_BLANK_CH(CUR)) { 7157 const xmlChar *check = CUR_PTR; 7158 unsigned int cons = ctxt->input->consumed; 7159 7160 GROW; 7161 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7162 xmlParseConditionalSections(ctxt); 7163 } else if (IS_BLANK_CH(CUR)) { 7164 NEXT; 7165 } else if (RAW == '%') { 7166 xmlParsePEReference(ctxt); 7167 } else 7168 xmlParseMarkupDecl(ctxt); 7169 7170 /* 7171 * Pop-up of finished entities. 7172 */ 7173 while ((RAW == 0) && (ctxt->inputNr > 1)) 7174 xmlPopInput(ctxt); 7175 7176 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7177 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7178 break; 7179 } 7180 } 7181 7182 if (RAW != 0) { 7183 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7184 } 7185 7186} 7187 7188/** 7189 * xmlParseReference: 7190 * @ctxt: an XML parser context 7191 * 7192 * parse and handle entity references in content, depending on the SAX 7193 * interface, this may end-up in a call to character() if this is a 7194 * CharRef, a predefined entity, if there is no reference() callback. 7195 * or if the parser was asked to switch to that mode. 7196 * 7197 * [67] Reference ::= EntityRef | CharRef 7198 */ 7199void 7200xmlParseReference(xmlParserCtxtPtr ctxt) { 7201 xmlEntityPtr ent; 7202 xmlChar *val; 7203 int was_checked; 7204 xmlNodePtr list = NULL; 7205 xmlParserErrors ret = XML_ERR_OK; 7206 7207 7208 if (RAW != '&') 7209 return; 7210 7211 /* 7212 * Simple case of a CharRef 7213 */ 7214 if (NXT(1) == '#') { 7215 int i = 0; 7216 xmlChar out[10]; 7217 int hex = NXT(2); 7218 int value = xmlParseCharRef(ctxt); 7219 7220 if (value == 0) 7221 return; 7222 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7223 /* 7224 * So we are using non-UTF-8 buffers 7225 * Check that the char fit on 8bits, if not 7226 * generate a CharRef. 7227 */ 7228 if (value <= 0xFF) { 7229 out[0] = value; 7230 out[1] = 0; 7231 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7232 (!ctxt->disableSAX)) 7233 ctxt->sax->characters(ctxt->userData, out, 1); 7234 } else { 7235 if ((hex == 'x') || (hex == 'X')) 7236 snprintf((char *)out, sizeof(out), "#x%X", value); 7237 else 7238 snprintf((char *)out, sizeof(out), "#%d", value); 7239 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7240 (!ctxt->disableSAX)) 7241 ctxt->sax->reference(ctxt->userData, out); 7242 } 7243 } else { 7244 /* 7245 * Just encode the value in UTF-8 7246 */ 7247 COPY_BUF(0 ,out, i, value); 7248 out[i] = 0; 7249 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7250 (!ctxt->disableSAX)) 7251 ctxt->sax->characters(ctxt->userData, out, i); 7252 } 7253 return; 7254 } 7255 7256 /* 7257 * We are seeing an entity reference 7258 */ 7259 ent = xmlParseEntityRef(ctxt); 7260 if (ent == NULL) return; 7261 if (!ctxt->wellFormed) 7262 return; 7263 was_checked = ent->checked; 7264 7265 /* special case of predefined entities */ 7266 if ((ent->name == NULL) || 7267 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7268 val = ent->content; 7269 if (val == NULL) return; 7270 /* 7271 * inline the entity. 7272 */ 7273 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7274 (!ctxt->disableSAX)) 7275 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7276 return; 7277 } 7278 7279 /* 7280 * The first reference to the entity trigger a parsing phase 7281 * where the ent->children is filled with the result from 7282 * the parsing. 7283 * Note: external parsed entities will not be loaded, it is not 7284 * required for a non-validating parser, unless the parsing option 7285 * of validating, or substituting entities were given. Doing so is 7286 * far more secure as the parser will only process data coming from 7287 * the document entity by default. 7288 */ 7289 if (((ent->checked == 0) || 7290 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7291 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7292 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7293 unsigned long oldnbent = ctxt->nbentities; 7294 7295 /* 7296 * This is a bit hackish but this seems the best 7297 * way to make sure both SAX and DOM entity support 7298 * behaves okay. 7299 */ 7300 void *user_data; 7301 if (ctxt->userData == ctxt) 7302 user_data = NULL; 7303 else 7304 user_data = ctxt->userData; 7305 7306 /* 7307 * Check that this entity is well formed 7308 * 4.3.2: An internal general parsed entity is well-formed 7309 * if its replacement text matches the production labeled 7310 * content. 7311 */ 7312 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7313 ctxt->depth++; 7314 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7315 user_data, &list); 7316 ctxt->depth--; 7317 7318 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7319 ctxt->depth++; 7320 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7321 user_data, ctxt->depth, ent->URI, 7322 ent->ExternalID, &list); 7323 ctxt->depth--; 7324 } else { 7325 ret = XML_ERR_ENTITY_PE_INTERNAL; 7326 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7327 "invalid entity type found\n", NULL); 7328 } 7329 7330 /* 7331 * Store the number of entities needing parsing for this entity 7332 * content and do checkings 7333 */ 7334 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7335 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7336 ent->checked |= 1; 7337 if (ret == XML_ERR_ENTITY_LOOP) { 7338 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7339 xmlFreeNodeList(list); 7340 return; 7341 } 7342 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7343 xmlFreeNodeList(list); 7344 return; 7345 } 7346 7347 if ((ret == XML_ERR_OK) && (list != NULL)) { 7348 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7349 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7350 (ent->children == NULL)) { 7351 ent->children = list; 7352 if (ctxt->replaceEntities) { 7353 /* 7354 * Prune it directly in the generated document 7355 * except for single text nodes. 7356 */ 7357 if (((list->type == XML_TEXT_NODE) && 7358 (list->next == NULL)) || 7359 (ctxt->parseMode == XML_PARSE_READER)) { 7360 list->parent = (xmlNodePtr) ent; 7361 list = NULL; 7362 ent->owner = 1; 7363 } else { 7364 ent->owner = 0; 7365 while (list != NULL) { 7366 list->parent = (xmlNodePtr) ctxt->node; 7367 list->doc = ctxt->myDoc; 7368 if (list->next == NULL) 7369 ent->last = list; 7370 list = list->next; 7371 } 7372 list = ent->children; 7373#ifdef LIBXML_LEGACY_ENABLED 7374 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7375 xmlAddEntityReference(ent, list, NULL); 7376#endif /* LIBXML_LEGACY_ENABLED */ 7377 } 7378 } else { 7379 ent->owner = 1; 7380 while (list != NULL) { 7381 list->parent = (xmlNodePtr) ent; 7382 xmlSetTreeDoc(list, ent->doc); 7383 if (list->next == NULL) 7384 ent->last = list; 7385 list = list->next; 7386 } 7387 } 7388 } else { 7389 xmlFreeNodeList(list); 7390 list = NULL; 7391 } 7392 } else if ((ret != XML_ERR_OK) && 7393 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7394 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7395 "Entity '%s' failed to parse\n", ent->name); 7396 xmlParserEntityCheck(ctxt, 0, ent, 0); 7397 } else if (list != NULL) { 7398 xmlFreeNodeList(list); 7399 list = NULL; 7400 } 7401 if (ent->checked == 0) 7402 ent->checked = 2; 7403 } else if (ent->checked != 1) { 7404 ctxt->nbentities += ent->checked / 2; 7405 } 7406 7407 /* 7408 * Now that the entity content has been gathered 7409 * provide it to the application, this can take different forms based 7410 * on the parsing modes. 7411 */ 7412 if (ent->children == NULL) { 7413 /* 7414 * Probably running in SAX mode and the callbacks don't 7415 * build the entity content. So unless we already went 7416 * though parsing for first checking go though the entity 7417 * content to generate callbacks associated to the entity 7418 */ 7419 if (was_checked != 0) { 7420 void *user_data; 7421 /* 7422 * This is a bit hackish but this seems the best 7423 * way to make sure both SAX and DOM entity support 7424 * behaves okay. 7425 */ 7426 if (ctxt->userData == ctxt) 7427 user_data = NULL; 7428 else 7429 user_data = ctxt->userData; 7430 7431 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7432 ctxt->depth++; 7433 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7434 ent->content, user_data, NULL); 7435 ctxt->depth--; 7436 } else if (ent->etype == 7437 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7438 ctxt->depth++; 7439 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7440 ctxt->sax, user_data, ctxt->depth, 7441 ent->URI, ent->ExternalID, NULL); 7442 ctxt->depth--; 7443 } else { 7444 ret = XML_ERR_ENTITY_PE_INTERNAL; 7445 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7446 "invalid entity type found\n", NULL); 7447 } 7448 if (ret == XML_ERR_ENTITY_LOOP) { 7449 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7450 return; 7451 } 7452 } 7453 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7454 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7455 /* 7456 * Entity reference callback comes second, it's somewhat 7457 * superfluous but a compatibility to historical behaviour 7458 */ 7459 ctxt->sax->reference(ctxt->userData, ent->name); 7460 } 7461 return; 7462 } 7463 7464 /* 7465 * If we didn't get any children for the entity being built 7466 */ 7467 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7468 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7469 /* 7470 * Create a node. 7471 */ 7472 ctxt->sax->reference(ctxt->userData, ent->name); 7473 return; 7474 } 7475 7476 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7477 /* 7478 * There is a problem on the handling of _private for entities 7479 * (bug 155816): Should we copy the content of the field from 7480 * the entity (possibly overwriting some value set by the user 7481 * when a copy is created), should we leave it alone, or should 7482 * we try to take care of different situations? The problem 7483 * is exacerbated by the usage of this field by the xmlReader. 7484 * To fix this bug, we look at _private on the created node 7485 * and, if it's NULL, we copy in whatever was in the entity. 7486 * If it's not NULL we leave it alone. This is somewhat of a 7487 * hack - maybe we should have further tests to determine 7488 * what to do. 7489 */ 7490 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7491 /* 7492 * Seems we are generating the DOM content, do 7493 * a simple tree copy for all references except the first 7494 * In the first occurrence list contains the replacement. 7495 */ 7496 if (((list == NULL) && (ent->owner == 0)) || 7497 (ctxt->parseMode == XML_PARSE_READER)) { 7498 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7499 7500 /* 7501 * We are copying here, make sure there is no abuse 7502 */ 7503 ctxt->sizeentcopy += ent->length + 5; 7504 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7505 return; 7506 7507 /* 7508 * when operating on a reader, the entities definitions 7509 * are always owning the entities subtree. 7510 if (ctxt->parseMode == XML_PARSE_READER) 7511 ent->owner = 1; 7512 */ 7513 7514 cur = ent->children; 7515 while (cur != NULL) { 7516 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7517 if (nw != NULL) { 7518 if (nw->_private == NULL) 7519 nw->_private = cur->_private; 7520 if (firstChild == NULL){ 7521 firstChild = nw; 7522 } 7523 nw = xmlAddChild(ctxt->node, nw); 7524 } 7525 if (cur == ent->last) { 7526 /* 7527 * needed to detect some strange empty 7528 * node cases in the reader tests 7529 */ 7530 if ((ctxt->parseMode == XML_PARSE_READER) && 7531 (nw != NULL) && 7532 (nw->type == XML_ELEMENT_NODE) && 7533 (nw->children == NULL)) 7534 nw->extra = 1; 7535 7536 break; 7537 } 7538 cur = cur->next; 7539 } 7540#ifdef LIBXML_LEGACY_ENABLED 7541 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7542 xmlAddEntityReference(ent, firstChild, nw); 7543#endif /* LIBXML_LEGACY_ENABLED */ 7544 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7545 xmlNodePtr nw = NULL, cur, next, last, 7546 firstChild = NULL; 7547 7548 /* 7549 * We are copying here, make sure there is no abuse 7550 */ 7551 ctxt->sizeentcopy += ent->length + 5; 7552 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7553 return; 7554 7555 /* 7556 * Copy the entity child list and make it the new 7557 * entity child list. The goal is to make sure any 7558 * ID or REF referenced will be the one from the 7559 * document content and not the entity copy. 7560 */ 7561 cur = ent->children; 7562 ent->children = NULL; 7563 last = ent->last; 7564 ent->last = NULL; 7565 while (cur != NULL) { 7566 next = cur->next; 7567 cur->next = NULL; 7568 cur->parent = NULL; 7569 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7570 if (nw != NULL) { 7571 if (nw->_private == NULL) 7572 nw->_private = cur->_private; 7573 if (firstChild == NULL){ 7574 firstChild = cur; 7575 } 7576 xmlAddChild((xmlNodePtr) ent, nw); 7577 xmlAddChild(ctxt->node, cur); 7578 } 7579 if (cur == last) 7580 break; 7581 cur = next; 7582 } 7583 if (ent->owner == 0) 7584 ent->owner = 1; 7585#ifdef LIBXML_LEGACY_ENABLED 7586 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7587 xmlAddEntityReference(ent, firstChild, nw); 7588#endif /* LIBXML_LEGACY_ENABLED */ 7589 } else { 7590 const xmlChar *nbktext; 7591 7592 /* 7593 * the name change is to avoid coalescing of the 7594 * node with a possible previous text one which 7595 * would make ent->children a dangling pointer 7596 */ 7597 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7598 -1); 7599 if (ent->children->type == XML_TEXT_NODE) 7600 ent->children->name = nbktext; 7601 if ((ent->last != ent->children) && 7602 (ent->last->type == XML_TEXT_NODE)) 7603 ent->last->name = nbktext; 7604 xmlAddChildList(ctxt->node, ent->children); 7605 } 7606 7607 /* 7608 * This is to avoid a nasty side effect, see 7609 * characters() in SAX.c 7610 */ 7611 ctxt->nodemem = 0; 7612 ctxt->nodelen = 0; 7613 return; 7614 } 7615 } 7616} 7617 7618/** 7619 * xmlParseEntityRef: 7620 * @ctxt: an XML parser context 7621 * 7622 * parse ENTITY references declarations 7623 * 7624 * [68] EntityRef ::= '&' Name ';' 7625 * 7626 * [ WFC: Entity Declared ] 7627 * In a document without any DTD, a document with only an internal DTD 7628 * subset which contains no parameter entity references, or a document 7629 * with "standalone='yes'", the Name given in the entity reference 7630 * must match that in an entity declaration, except that well-formed 7631 * documents need not declare any of the following entities: amp, lt, 7632 * gt, apos, quot. The declaration of a parameter entity must precede 7633 * any reference to it. Similarly, the declaration of a general entity 7634 * must precede any reference to it which appears in a default value in an 7635 * attribute-list declaration. Note that if entities are declared in the 7636 * external subset or in external parameter entities, a non-validating 7637 * processor is not obligated to read and process their declarations; 7638 * for such documents, the rule that an entity must be declared is a 7639 * well-formedness constraint only if standalone='yes'. 7640 * 7641 * [ WFC: Parsed Entity ] 7642 * An entity reference must not contain the name of an unparsed entity 7643 * 7644 * Returns the xmlEntityPtr if found, or NULL otherwise. 7645 */ 7646xmlEntityPtr 7647xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7648 const xmlChar *name; 7649 xmlEntityPtr ent = NULL; 7650 7651 GROW; 7652 if (ctxt->instate == XML_PARSER_EOF) 7653 return(NULL); 7654 7655 if (RAW != '&') 7656 return(NULL); 7657 NEXT; 7658 name = xmlParseName(ctxt); 7659 if (name == NULL) { 7660 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7661 "xmlParseEntityRef: no name\n"); 7662 return(NULL); 7663 } 7664 if (RAW != ';') { 7665 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7666 return(NULL); 7667 } 7668 NEXT; 7669 7670 /* 7671 * Predefined entities override any extra definition 7672 */ 7673 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7674 ent = xmlGetPredefinedEntity(name); 7675 if (ent != NULL) 7676 return(ent); 7677 } 7678 7679 /* 7680 * Increase the number of entity references parsed 7681 */ 7682 ctxt->nbentities++; 7683 7684 /* 7685 * Ask first SAX for entity resolution, otherwise try the 7686 * entities which may have stored in the parser context. 7687 */ 7688 if (ctxt->sax != NULL) { 7689 if (ctxt->sax->getEntity != NULL) 7690 ent = ctxt->sax->getEntity(ctxt->userData, name); 7691 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7692 (ctxt->options & XML_PARSE_OLDSAX)) 7693 ent = xmlGetPredefinedEntity(name); 7694 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7695 (ctxt->userData==ctxt)) { 7696 ent = xmlSAX2GetEntity(ctxt, name); 7697 } 7698 } 7699 if (ctxt->instate == XML_PARSER_EOF) 7700 return(NULL); 7701 /* 7702 * [ WFC: Entity Declared ] 7703 * In a document without any DTD, a document with only an 7704 * internal DTD subset which contains no parameter entity 7705 * references, or a document with "standalone='yes'", the 7706 * Name given in the entity reference must match that in an 7707 * entity declaration, except that well-formed documents 7708 * need not declare any of the following entities: amp, lt, 7709 * gt, apos, quot. 7710 * The declaration of a parameter entity must precede any 7711 * reference to it. 7712 * Similarly, the declaration of a general entity must 7713 * precede any reference to it which appears in a default 7714 * value in an attribute-list declaration. Note that if 7715 * entities are declared in the external subset or in 7716 * external parameter entities, a non-validating processor 7717 * is not obligated to read and process their declarations; 7718 * for such documents, the rule that an entity must be 7719 * declared is a well-formedness constraint only if 7720 * standalone='yes'. 7721 */ 7722 if (ent == NULL) { 7723 if ((ctxt->standalone == 1) || 7724 ((ctxt->hasExternalSubset == 0) && 7725 (ctxt->hasPErefs == 0))) { 7726 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7727 "Entity '%s' not defined\n", name); 7728 } else { 7729 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7730 "Entity '%s' not defined\n", name); 7731 if ((ctxt->inSubset == 0) && 7732 (ctxt->sax != NULL) && 7733 (ctxt->sax->reference != NULL)) { 7734 ctxt->sax->reference(ctxt->userData, name); 7735 } 7736 } 7737 xmlParserEntityCheck(ctxt, 0, ent, 0); 7738 ctxt->valid = 0; 7739 } 7740 7741 /* 7742 * [ WFC: Parsed Entity ] 7743 * An entity reference must not contain the name of an 7744 * unparsed entity 7745 */ 7746 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7747 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7748 "Entity reference to unparsed entity %s\n", name); 7749 } 7750 7751 /* 7752 * [ WFC: No External Entity References ] 7753 * Attribute values cannot contain direct or indirect 7754 * entity references to external entities. 7755 */ 7756 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7757 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7758 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7759 "Attribute references external entity '%s'\n", name); 7760 } 7761 /* 7762 * [ WFC: No < in Attribute Values ] 7763 * The replacement text of any entity referred to directly or 7764 * indirectly in an attribute value (other than "<") must 7765 * not contain a <. 7766 */ 7767 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7768 (ent != NULL) && 7769 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7770 if (((ent->checked & 1) || (ent->checked == 0)) && 7771 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7772 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7773 "'<' in entity '%s' is not allowed in attributes values\n", name); 7774 } 7775 } 7776 7777 /* 7778 * Internal check, no parameter entities here ... 7779 */ 7780 else { 7781 switch (ent->etype) { 7782 case XML_INTERNAL_PARAMETER_ENTITY: 7783 case XML_EXTERNAL_PARAMETER_ENTITY: 7784 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7785 "Attempt to reference the parameter entity '%s'\n", 7786 name); 7787 break; 7788 default: 7789 break; 7790 } 7791 } 7792 7793 /* 7794 * [ WFC: No Recursion ] 7795 * A parsed entity must not contain a recursive reference 7796 * to itself, either directly or indirectly. 7797 * Done somewhere else 7798 */ 7799 return(ent); 7800} 7801 7802/** 7803 * xmlParseStringEntityRef: 7804 * @ctxt: an XML parser context 7805 * @str: a pointer to an index in the string 7806 * 7807 * parse ENTITY references declarations, but this version parses it from 7808 * a string value. 7809 * 7810 * [68] EntityRef ::= '&' Name ';' 7811 * 7812 * [ WFC: Entity Declared ] 7813 * In a document without any DTD, a document with only an internal DTD 7814 * subset which contains no parameter entity references, or a document 7815 * with "standalone='yes'", the Name given in the entity reference 7816 * must match that in an entity declaration, except that well-formed 7817 * documents need not declare any of the following entities: amp, lt, 7818 * gt, apos, quot. The declaration of a parameter entity must precede 7819 * any reference to it. Similarly, the declaration of a general entity 7820 * must precede any reference to it which appears in a default value in an 7821 * attribute-list declaration. Note that if entities are declared in the 7822 * external subset or in external parameter entities, a non-validating 7823 * processor is not obligated to read and process their declarations; 7824 * for such documents, the rule that an entity must be declared is a 7825 * well-formedness constraint only if standalone='yes'. 7826 * 7827 * [ WFC: Parsed Entity ] 7828 * An entity reference must not contain the name of an unparsed entity 7829 * 7830 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7831 * is updated to the current location in the string. 7832 */ 7833static xmlEntityPtr 7834xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7835 xmlChar *name; 7836 const xmlChar *ptr; 7837 xmlChar cur; 7838 xmlEntityPtr ent = NULL; 7839 7840 if ((str == NULL) || (*str == NULL)) 7841 return(NULL); 7842 ptr = *str; 7843 cur = *ptr; 7844 if (cur != '&') 7845 return(NULL); 7846 7847 ptr++; 7848 name = xmlParseStringName(ctxt, &ptr); 7849 if (name == NULL) { 7850 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7851 "xmlParseStringEntityRef: no name\n"); 7852 *str = ptr; 7853 return(NULL); 7854 } 7855 if (*ptr != ';') { 7856 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7857 xmlFree(name); 7858 *str = ptr; 7859 return(NULL); 7860 } 7861 ptr++; 7862 7863 7864 /* 7865 * Predefined entities override any extra definition 7866 */ 7867 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7868 ent = xmlGetPredefinedEntity(name); 7869 if (ent != NULL) { 7870 xmlFree(name); 7871 *str = ptr; 7872 return(ent); 7873 } 7874 } 7875 7876 /* 7877 * Increate the number of entity references parsed 7878 */ 7879 ctxt->nbentities++; 7880 7881 /* 7882 * Ask first SAX for entity resolution, otherwise try the 7883 * entities which may have stored in the parser context. 7884 */ 7885 if (ctxt->sax != NULL) { 7886 if (ctxt->sax->getEntity != NULL) 7887 ent = ctxt->sax->getEntity(ctxt->userData, name); 7888 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7889 ent = xmlGetPredefinedEntity(name); 7890 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7891 ent = xmlSAX2GetEntity(ctxt, name); 7892 } 7893 } 7894 if (ctxt->instate == XML_PARSER_EOF) { 7895 xmlFree(name); 7896 return(NULL); 7897 } 7898 7899 /* 7900 * [ WFC: Entity Declared ] 7901 * In a document without any DTD, a document with only an 7902 * internal DTD subset which contains no parameter entity 7903 * references, or a document with "standalone='yes'", the 7904 * Name given in the entity reference must match that in an 7905 * entity declaration, except that well-formed documents 7906 * need not declare any of the following entities: amp, lt, 7907 * gt, apos, quot. 7908 * The declaration of a parameter entity must precede any 7909 * reference to it. 7910 * Similarly, the declaration of a general entity must 7911 * precede any reference to it which appears in a default 7912 * value in an attribute-list declaration. Note that if 7913 * entities are declared in the external subset or in 7914 * external parameter entities, a non-validating processor 7915 * is not obligated to read and process their declarations; 7916 * for such documents, the rule that an entity must be 7917 * declared is a well-formedness constraint only if 7918 * standalone='yes'. 7919 */ 7920 if (ent == NULL) { 7921 if ((ctxt->standalone == 1) || 7922 ((ctxt->hasExternalSubset == 0) && 7923 (ctxt->hasPErefs == 0))) { 7924 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7925 "Entity '%s' not defined\n", name); 7926 } else { 7927 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7928 "Entity '%s' not defined\n", 7929 name); 7930 } 7931 xmlParserEntityCheck(ctxt, 0, ent, 0); 7932 /* TODO ? check regressions ctxt->valid = 0; */ 7933 } 7934 7935 /* 7936 * [ WFC: Parsed Entity ] 7937 * An entity reference must not contain the name of an 7938 * unparsed entity 7939 */ 7940 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7941 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7942 "Entity reference to unparsed entity %s\n", name); 7943 } 7944 7945 /* 7946 * [ WFC: No External Entity References ] 7947 * Attribute values cannot contain direct or indirect 7948 * entity references to external entities. 7949 */ 7950 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7951 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7952 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7953 "Attribute references external entity '%s'\n", name); 7954 } 7955 /* 7956 * [ WFC: No < in Attribute Values ] 7957 * The replacement text of any entity referred to directly or 7958 * indirectly in an attribute value (other than "<") must 7959 * not contain a <. 7960 */ 7961 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7962 (ent != NULL) && (ent->content != NULL) && 7963 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7964 (xmlStrchr(ent->content, '<'))) { 7965 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7966 "'<' in entity '%s' is not allowed in attributes values\n", 7967 name); 7968 } 7969 7970 /* 7971 * Internal check, no parameter entities here ... 7972 */ 7973 else { 7974 switch (ent->etype) { 7975 case XML_INTERNAL_PARAMETER_ENTITY: 7976 case XML_EXTERNAL_PARAMETER_ENTITY: 7977 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7978 "Attempt to reference the parameter entity '%s'\n", 7979 name); 7980 break; 7981 default: 7982 break; 7983 } 7984 } 7985 7986 /* 7987 * [ WFC: No Recursion ] 7988 * A parsed entity must not contain a recursive reference 7989 * to itself, either directly or indirectly. 7990 * Done somewhere else 7991 */ 7992 7993 xmlFree(name); 7994 *str = ptr; 7995 return(ent); 7996} 7997 7998/** 7999 * xmlParsePEReference: 8000 * @ctxt: an XML parser context 8001 * 8002 * parse PEReference declarations 8003 * The entity content is handled directly by pushing it's content as 8004 * a new input stream. 8005 * 8006 * [69] PEReference ::= '%' Name ';' 8007 * 8008 * [ WFC: No Recursion ] 8009 * A parsed entity must not contain a recursive 8010 * reference to itself, either directly or indirectly. 8011 * 8012 * [ WFC: Entity Declared ] 8013 * In a document without any DTD, a document with only an internal DTD 8014 * subset which contains no parameter entity references, or a document 8015 * with "standalone='yes'", ... ... The declaration of a parameter 8016 * entity must precede any reference to it... 8017 * 8018 * [ VC: Entity Declared ] 8019 * In a document with an external subset or external parameter entities 8020 * with "standalone='no'", ... ... The declaration of a parameter entity 8021 * must precede any reference to it... 8022 * 8023 * [ WFC: In DTD ] 8024 * Parameter-entity references may only appear in the DTD. 8025 * NOTE: misleading but this is handled. 8026 */ 8027void 8028xmlParsePEReference(xmlParserCtxtPtr ctxt) 8029{ 8030 const xmlChar *name; 8031 xmlEntityPtr entity = NULL; 8032 xmlParserInputPtr input; 8033 8034 if (RAW != '%') 8035 return; 8036 NEXT; 8037 name = xmlParseName(ctxt); 8038 if (name == NULL) { 8039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8040 "xmlParsePEReference: no name\n"); 8041 return; 8042 } 8043 if (RAW != ';') { 8044 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8045 return; 8046 } 8047 8048 NEXT; 8049 8050 /* 8051 * Increate the number of entity references parsed 8052 */ 8053 ctxt->nbentities++; 8054 8055 /* 8056 * Request the entity from SAX 8057 */ 8058 if ((ctxt->sax != NULL) && 8059 (ctxt->sax->getParameterEntity != NULL)) 8060 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8061 if (ctxt->instate == XML_PARSER_EOF) 8062 return; 8063 if (entity == NULL) { 8064 /* 8065 * [ WFC: Entity Declared ] 8066 * In a document without any DTD, a document with only an 8067 * internal DTD subset which contains no parameter entity 8068 * references, or a document with "standalone='yes'", ... 8069 * ... The declaration of a parameter entity must precede 8070 * any reference to it... 8071 */ 8072 if ((ctxt->standalone == 1) || 8073 ((ctxt->hasExternalSubset == 0) && 8074 (ctxt->hasPErefs == 0))) { 8075 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8076 "PEReference: %%%s; not found\n", 8077 name); 8078 } else { 8079 /* 8080 * [ VC: Entity Declared ] 8081 * In a document with an external subset or external 8082 * parameter entities with "standalone='no'", ... 8083 * ... The declaration of a parameter entity must 8084 * precede any reference to it... 8085 */ 8086 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8087 "PEReference: %%%s; not found\n", 8088 name, NULL); 8089 ctxt->valid = 0; 8090 } 8091 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8092 } else { 8093 /* 8094 * Internal checking in case the entity quest barfed 8095 */ 8096 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8097 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8098 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8099 "Internal: %%%s; is not a parameter entity\n", 8100 name, NULL); 8101 } else if (ctxt->input->free != deallocblankswrapper) { 8102 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8103 if (xmlPushInput(ctxt, input) < 0) 8104 return; 8105 } else { 8106 /* 8107 * TODO !!! 8108 * handle the extra spaces added before and after 8109 * c.f. http://www.w3.org/TR/REC-xml#as-PE 8110 */ 8111 input = xmlNewEntityInputStream(ctxt, entity); 8112 if (xmlPushInput(ctxt, input) < 0) 8113 return; 8114 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8115 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8116 (IS_BLANK_CH(NXT(5)))) { 8117 xmlParseTextDecl(ctxt); 8118 if (ctxt->errNo == 8119 XML_ERR_UNSUPPORTED_ENCODING) { 8120 /* 8121 * The XML REC instructs us to stop parsing 8122 * right here 8123 */ 8124 xmlHaltParser(ctxt); 8125 return; 8126 } 8127 } 8128 } 8129 } 8130 ctxt->hasPErefs = 1; 8131} 8132 8133/** 8134 * xmlLoadEntityContent: 8135 * @ctxt: an XML parser context 8136 * @entity: an unloaded system entity 8137 * 8138 * Load the original content of the given system entity from the 8139 * ExternalID/SystemID given. This is to be used for Included in Literal 8140 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8141 * 8142 * Returns 0 in case of success and -1 in case of failure 8143 */ 8144static int 8145xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8146 xmlParserInputPtr input; 8147 xmlBufferPtr buf; 8148 int l, c; 8149 int count = 0; 8150 8151 if ((ctxt == NULL) || (entity == NULL) || 8152 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8153 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8154 (entity->content != NULL)) { 8155 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8156 "xmlLoadEntityContent parameter error"); 8157 return(-1); 8158 } 8159 8160 if (xmlParserDebugEntities) 8161 xmlGenericError(xmlGenericErrorContext, 8162 "Reading %s entity content input\n", entity->name); 8163 8164 buf = xmlBufferCreate(); 8165 if (buf == NULL) { 8166 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8167 "xmlLoadEntityContent parameter error"); 8168 return(-1); 8169 } 8170 8171 input = xmlNewEntityInputStream(ctxt, entity); 8172 if (input == NULL) { 8173 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8174 "xmlLoadEntityContent input error"); 8175 xmlBufferFree(buf); 8176 return(-1); 8177 } 8178 8179 /* 8180 * Push the entity as the current input, read char by char 8181 * saving to the buffer until the end of the entity or an error 8182 */ 8183 if (xmlPushInput(ctxt, input) < 0) { 8184 xmlBufferFree(buf); 8185 return(-1); 8186 } 8187 8188 GROW; 8189 c = CUR_CHAR(l); 8190 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8191 (IS_CHAR(c))) { 8192 xmlBufferAdd(buf, ctxt->input->cur, l); 8193 if (count++ > XML_PARSER_CHUNK_SIZE) { 8194 count = 0; 8195 GROW; 8196 if (ctxt->instate == XML_PARSER_EOF) { 8197 xmlBufferFree(buf); 8198 return(-1); 8199 } 8200 } 8201 NEXTL(l); 8202 c = CUR_CHAR(l); 8203 if (c == 0) { 8204 count = 0; 8205 GROW; 8206 if (ctxt->instate == XML_PARSER_EOF) { 8207 xmlBufferFree(buf); 8208 return(-1); 8209 } 8210 c = CUR_CHAR(l); 8211 } 8212 } 8213 8214 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8215 xmlPopInput(ctxt); 8216 } else if (!IS_CHAR(c)) { 8217 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8218 "xmlLoadEntityContent: invalid char value %d\n", 8219 c); 8220 xmlBufferFree(buf); 8221 return(-1); 8222 } 8223 entity->content = buf->content; 8224 buf->content = NULL; 8225 xmlBufferFree(buf); 8226 8227 return(0); 8228} 8229 8230/** 8231 * xmlParseStringPEReference: 8232 * @ctxt: an XML parser context 8233 * @str: a pointer to an index in the string 8234 * 8235 * parse PEReference declarations 8236 * 8237 * [69] PEReference ::= '%' Name ';' 8238 * 8239 * [ WFC: No Recursion ] 8240 * A parsed entity must not contain a recursive 8241 * reference to itself, either directly or indirectly. 8242 * 8243 * [ WFC: Entity Declared ] 8244 * In a document without any DTD, a document with only an internal DTD 8245 * subset which contains no parameter entity references, or a document 8246 * with "standalone='yes'", ... ... The declaration of a parameter 8247 * entity must precede any reference to it... 8248 * 8249 * [ VC: Entity Declared ] 8250 * In a document with an external subset or external parameter entities 8251 * with "standalone='no'", ... ... The declaration of a parameter entity 8252 * must precede any reference to it... 8253 * 8254 * [ WFC: In DTD ] 8255 * Parameter-entity references may only appear in the DTD. 8256 * NOTE: misleading but this is handled. 8257 * 8258 * Returns the string of the entity content. 8259 * str is updated to the current value of the index 8260 */ 8261static xmlEntityPtr 8262xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8263 const xmlChar *ptr; 8264 xmlChar cur; 8265 xmlChar *name; 8266 xmlEntityPtr entity = NULL; 8267 8268 if ((str == NULL) || (*str == NULL)) return(NULL); 8269 ptr = *str; 8270 cur = *ptr; 8271 if (cur != '%') 8272 return(NULL); 8273 ptr++; 8274 name = xmlParseStringName(ctxt, &ptr); 8275 if (name == NULL) { 8276 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8277 "xmlParseStringPEReference: no name\n"); 8278 *str = ptr; 8279 return(NULL); 8280 } 8281 cur = *ptr; 8282 if (cur != ';') { 8283 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8284 xmlFree(name); 8285 *str = ptr; 8286 return(NULL); 8287 } 8288 ptr++; 8289 8290 /* 8291 * Increate the number of entity references parsed 8292 */ 8293 ctxt->nbentities++; 8294 8295 /* 8296 * Request the entity from SAX 8297 */ 8298 if ((ctxt->sax != NULL) && 8299 (ctxt->sax->getParameterEntity != NULL)) 8300 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8301 if (ctxt->instate == XML_PARSER_EOF) { 8302 xmlFree(name); 8303 return(NULL); 8304 } 8305 if (entity == NULL) { 8306 /* 8307 * [ WFC: Entity Declared ] 8308 * In a document without any DTD, a document with only an 8309 * internal DTD subset which contains no parameter entity 8310 * references, or a document with "standalone='yes'", ... 8311 * ... The declaration of a parameter entity must precede 8312 * any reference to it... 8313 */ 8314 if ((ctxt->standalone == 1) || 8315 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8316 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8317 "PEReference: %%%s; not found\n", name); 8318 } else { 8319 /* 8320 * [ VC: Entity Declared ] 8321 * In a document with an external subset or external 8322 * parameter entities with "standalone='no'", ... 8323 * ... The declaration of a parameter entity must 8324 * precede any reference to it... 8325 */ 8326 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8327 "PEReference: %%%s; not found\n", 8328 name, NULL); 8329 ctxt->valid = 0; 8330 } 8331 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8332 } else { 8333 /* 8334 * Internal checking in case the entity quest barfed 8335 */ 8336 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8337 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8338 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8339 "%%%s; is not a parameter entity\n", 8340 name, NULL); 8341 } 8342 } 8343 ctxt->hasPErefs = 1; 8344 xmlFree(name); 8345 *str = ptr; 8346 return(entity); 8347} 8348 8349/** 8350 * xmlParseDocTypeDecl: 8351 * @ctxt: an XML parser context 8352 * 8353 * parse a DOCTYPE declaration 8354 * 8355 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8356 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8357 * 8358 * [ VC: Root Element Type ] 8359 * The Name in the document type declaration must match the element 8360 * type of the root element. 8361 */ 8362 8363void 8364xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8365 const xmlChar *name = NULL; 8366 xmlChar *ExternalID = NULL; 8367 xmlChar *URI = NULL; 8368 8369 /* 8370 * We know that '<!DOCTYPE' has been detected. 8371 */ 8372 SKIP(9); 8373 8374 SKIP_BLANKS; 8375 8376 /* 8377 * Parse the DOCTYPE name. 8378 */ 8379 name = xmlParseName(ctxt); 8380 if (name == NULL) { 8381 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8382 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8383 } 8384 ctxt->intSubName = name; 8385 8386 SKIP_BLANKS; 8387 8388 /* 8389 * Check for SystemID and ExternalID 8390 */ 8391 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8392 8393 if ((URI != NULL) || (ExternalID != NULL)) { 8394 ctxt->hasExternalSubset = 1; 8395 } 8396 ctxt->extSubURI = URI; 8397 ctxt->extSubSystem = ExternalID; 8398 8399 SKIP_BLANKS; 8400 8401 /* 8402 * Create and update the internal subset. 8403 */ 8404 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8405 (!ctxt->disableSAX)) 8406 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8407 if (ctxt->instate == XML_PARSER_EOF) 8408 return; 8409 8410 /* 8411 * Is there any internal subset declarations ? 8412 * they are handled separately in xmlParseInternalSubset() 8413 */ 8414 if (RAW == '[') 8415 return; 8416 8417 /* 8418 * We should be at the end of the DOCTYPE declaration. 8419 */ 8420 if (RAW != '>') { 8421 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8422 } 8423 NEXT; 8424} 8425 8426/** 8427 * xmlParseInternalSubset: 8428 * @ctxt: an XML parser context 8429 * 8430 * parse the internal subset declaration 8431 * 8432 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8433 */ 8434 8435static void 8436xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8437 /* 8438 * Is there any DTD definition ? 8439 */ 8440 if (RAW == '[') { 8441 ctxt->instate = XML_PARSER_DTD; 8442 NEXT; 8443 /* 8444 * Parse the succession of Markup declarations and 8445 * PEReferences. 8446 * Subsequence (markupdecl | PEReference | S)* 8447 */ 8448 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8449 const xmlChar *check = CUR_PTR; 8450 unsigned int cons = ctxt->input->consumed; 8451 8452 SKIP_BLANKS; 8453 xmlParseMarkupDecl(ctxt); 8454 xmlParsePEReference(ctxt); 8455 8456 /* 8457 * Pop-up of finished entities. 8458 */ 8459 while ((RAW == 0) && (ctxt->inputNr > 1)) 8460 xmlPopInput(ctxt); 8461 8462 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8463 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8464 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8465 break; 8466 } 8467 } 8468 if (RAW == ']') { 8469 NEXT; 8470 SKIP_BLANKS; 8471 } 8472 } 8473 8474 /* 8475 * We should be at the end of the DOCTYPE declaration. 8476 */ 8477 if (RAW != '>') { 8478 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8479 return; 8480 } 8481 NEXT; 8482} 8483 8484#ifdef LIBXML_SAX1_ENABLED 8485/** 8486 * xmlParseAttribute: 8487 * @ctxt: an XML parser context 8488 * @value: a xmlChar ** used to store the value of the attribute 8489 * 8490 * parse an attribute 8491 * 8492 * [41] Attribute ::= Name Eq AttValue 8493 * 8494 * [ WFC: No External Entity References ] 8495 * Attribute values cannot contain direct or indirect entity references 8496 * to external entities. 8497 * 8498 * [ WFC: No < in Attribute Values ] 8499 * The replacement text of any entity referred to directly or indirectly in 8500 * an attribute value (other than "<") must not contain a <. 8501 * 8502 * [ VC: Attribute Value Type ] 8503 * The attribute must have been declared; the value must be of the type 8504 * declared for it. 8505 * 8506 * [25] Eq ::= S? '=' S? 8507 * 8508 * With namespace: 8509 * 8510 * [NS 11] Attribute ::= QName Eq AttValue 8511 * 8512 * Also the case QName == xmlns:??? is handled independently as a namespace 8513 * definition. 8514 * 8515 * Returns the attribute name, and the value in *value. 8516 */ 8517 8518const xmlChar * 8519xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8520 const xmlChar *name; 8521 xmlChar *val; 8522 8523 *value = NULL; 8524 GROW; 8525 name = xmlParseName(ctxt); 8526 if (name == NULL) { 8527 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8528 "error parsing attribute name\n"); 8529 return(NULL); 8530 } 8531 8532 /* 8533 * read the value 8534 */ 8535 SKIP_BLANKS; 8536 if (RAW == '=') { 8537 NEXT; 8538 SKIP_BLANKS; 8539 val = xmlParseAttValue(ctxt); 8540 ctxt->instate = XML_PARSER_CONTENT; 8541 } else { 8542 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8543 "Specification mandate value for attribute %s\n", name); 8544 return(NULL); 8545 } 8546 8547 /* 8548 * Check that xml:lang conforms to the specification 8549 * No more registered as an error, just generate a warning now 8550 * since this was deprecated in XML second edition 8551 */ 8552 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8553 if (!xmlCheckLanguageID(val)) { 8554 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8555 "Malformed value for xml:lang : %s\n", 8556 val, NULL); 8557 } 8558 } 8559 8560 /* 8561 * Check that xml:space conforms to the specification 8562 */ 8563 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8564 if (xmlStrEqual(val, BAD_CAST "default")) 8565 *(ctxt->space) = 0; 8566 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8567 *(ctxt->space) = 1; 8568 else { 8569 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8570"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8571 val, NULL); 8572 } 8573 } 8574 8575 *value = val; 8576 return(name); 8577} 8578 8579/** 8580 * xmlParseStartTag: 8581 * @ctxt: an XML parser context 8582 * 8583 * parse a start of tag either for rule element or 8584 * EmptyElement. In both case we don't parse the tag closing chars. 8585 * 8586 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8587 * 8588 * [ WFC: Unique Att Spec ] 8589 * No attribute name may appear more than once in the same start-tag or 8590 * empty-element tag. 8591 * 8592 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8593 * 8594 * [ WFC: Unique Att Spec ] 8595 * No attribute name may appear more than once in the same start-tag or 8596 * empty-element tag. 8597 * 8598 * With namespace: 8599 * 8600 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8601 * 8602 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8603 * 8604 * Returns the element name parsed 8605 */ 8606 8607const xmlChar * 8608xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8609 const xmlChar *name; 8610 const xmlChar *attname; 8611 xmlChar *attvalue; 8612 const xmlChar **atts = ctxt->atts; 8613 int nbatts = 0; 8614 int maxatts = ctxt->maxatts; 8615 int i; 8616 8617 if (RAW != '<') return(NULL); 8618 NEXT1; 8619 8620 name = xmlParseName(ctxt); 8621 if (name == NULL) { 8622 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8623 "xmlParseStartTag: invalid element name\n"); 8624 return(NULL); 8625 } 8626 8627 /* 8628 * Now parse the attributes, it ends up with the ending 8629 * 8630 * (S Attribute)* S? 8631 */ 8632 SKIP_BLANKS; 8633 GROW; 8634 8635 while (((RAW != '>') && 8636 ((RAW != '/') || (NXT(1) != '>')) && 8637 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8638 const xmlChar *q = CUR_PTR; 8639 unsigned int cons = ctxt->input->consumed; 8640 8641 attname = xmlParseAttribute(ctxt, &attvalue); 8642 if ((attname != NULL) && (attvalue != NULL)) { 8643 /* 8644 * [ WFC: Unique Att Spec ] 8645 * No attribute name may appear more than once in the same 8646 * start-tag or empty-element tag. 8647 */ 8648 for (i = 0; i < nbatts;i += 2) { 8649 if (xmlStrEqual(atts[i], attname)) { 8650 xmlErrAttributeDup(ctxt, NULL, attname); 8651 xmlFree(attvalue); 8652 goto failed; 8653 } 8654 } 8655 /* 8656 * Add the pair to atts 8657 */ 8658 if (atts == NULL) { 8659 maxatts = 22; /* allow for 10 attrs by default */ 8660 atts = (const xmlChar **) 8661 xmlMalloc(maxatts * sizeof(xmlChar *)); 8662 if (atts == NULL) { 8663 xmlErrMemory(ctxt, NULL); 8664 if (attvalue != NULL) 8665 xmlFree(attvalue); 8666 goto failed; 8667 } 8668 ctxt->atts = atts; 8669 ctxt->maxatts = maxatts; 8670 } else if (nbatts + 4 > maxatts) { 8671 const xmlChar **n; 8672 8673 maxatts *= 2; 8674 n = (const xmlChar **) xmlRealloc((void *) atts, 8675 maxatts * sizeof(const xmlChar *)); 8676 if (n == NULL) { 8677 xmlErrMemory(ctxt, NULL); 8678 if (attvalue != NULL) 8679 xmlFree(attvalue); 8680 goto failed; 8681 } 8682 atts = n; 8683 ctxt->atts = atts; 8684 ctxt->maxatts = maxatts; 8685 } 8686 atts[nbatts++] = attname; 8687 atts[nbatts++] = attvalue; 8688 atts[nbatts] = NULL; 8689 atts[nbatts + 1] = NULL; 8690 } else { 8691 if (attvalue != NULL) 8692 xmlFree(attvalue); 8693 } 8694 8695failed: 8696 8697 GROW 8698 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8699 break; 8700 if (!IS_BLANK_CH(RAW)) { 8701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8702 "attributes construct error\n"); 8703 } 8704 SKIP_BLANKS; 8705 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8706 (attname == NULL) && (attvalue == NULL)) { 8707 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8708 "xmlParseStartTag: problem parsing attributes\n"); 8709 break; 8710 } 8711 SHRINK; 8712 GROW; 8713 } 8714 8715 /* 8716 * SAX: Start of Element ! 8717 */ 8718 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8719 (!ctxt->disableSAX)) { 8720 if (nbatts > 0) 8721 ctxt->sax->startElement(ctxt->userData, name, atts); 8722 else 8723 ctxt->sax->startElement(ctxt->userData, name, NULL); 8724 } 8725 8726 if (atts != NULL) { 8727 /* Free only the content strings */ 8728 for (i = 1;i < nbatts;i+=2) 8729 if (atts[i] != NULL) 8730 xmlFree((xmlChar *) atts[i]); 8731 } 8732 return(name); 8733} 8734 8735/** 8736 * xmlParseEndTag1: 8737 * @ctxt: an XML parser context 8738 * @line: line of the start tag 8739 * @nsNr: number of namespaces on the start tag 8740 * 8741 * parse an end of tag 8742 * 8743 * [42] ETag ::= '</' Name S? '>' 8744 * 8745 * With namespace 8746 * 8747 * [NS 9] ETag ::= '</' QName S? '>' 8748 */ 8749 8750static void 8751xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8752 const xmlChar *name; 8753 8754 GROW; 8755 if ((RAW != '<') || (NXT(1) != '/')) { 8756 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8757 "xmlParseEndTag: '</' not found\n"); 8758 return; 8759 } 8760 SKIP(2); 8761 8762 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8763 8764 /* 8765 * We should definitely be at the ending "S? '>'" part 8766 */ 8767 GROW; 8768 SKIP_BLANKS; 8769 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8771 } else 8772 NEXT1; 8773 8774 /* 8775 * [ WFC: Element Type Match ] 8776 * The Name in an element's end-tag must match the element type in the 8777 * start-tag. 8778 * 8779 */ 8780 if (name != (xmlChar*)1) { 8781 if (name == NULL) name = BAD_CAST "unparseable"; 8782 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8783 "Opening and ending tag mismatch: %s line %d and %s\n", 8784 ctxt->name, line, name); 8785 } 8786 8787 /* 8788 * SAX: End of Tag 8789 */ 8790 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8791 (!ctxt->disableSAX)) 8792 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8793 8794 namePop(ctxt); 8795 spacePop(ctxt); 8796 return; 8797} 8798 8799/** 8800 * xmlParseEndTag: 8801 * @ctxt: an XML parser context 8802 * 8803 * parse an end of tag 8804 * 8805 * [42] ETag ::= '</' Name S? '>' 8806 * 8807 * With namespace 8808 * 8809 * [NS 9] ETag ::= '</' QName S? '>' 8810 */ 8811 8812void 8813xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8814 xmlParseEndTag1(ctxt, 0); 8815} 8816#endif /* LIBXML_SAX1_ENABLED */ 8817 8818/************************************************************************ 8819 * * 8820 * SAX 2 specific operations * 8821 * * 8822 ************************************************************************/ 8823 8824/* 8825 * xmlGetNamespace: 8826 * @ctxt: an XML parser context 8827 * @prefix: the prefix to lookup 8828 * 8829 * Lookup the namespace name for the @prefix (which ca be NULL) 8830 * The prefix must come from the @ctxt->dict dictionary 8831 * 8832 * Returns the namespace name or NULL if not bound 8833 */ 8834static const xmlChar * 8835xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8836 int i; 8837 8838 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8839 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8840 if (ctxt->nsTab[i] == prefix) { 8841 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8842 return(NULL); 8843 return(ctxt->nsTab[i + 1]); 8844 } 8845 return(NULL); 8846} 8847 8848/** 8849 * xmlParseQName: 8850 * @ctxt: an XML parser context 8851 * @prefix: pointer to store the prefix part 8852 * 8853 * parse an XML Namespace QName 8854 * 8855 * [6] QName ::= (Prefix ':')? LocalPart 8856 * [7] Prefix ::= NCName 8857 * [8] LocalPart ::= NCName 8858 * 8859 * Returns the Name parsed or NULL 8860 */ 8861 8862static const xmlChar * 8863xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8864 const xmlChar *l, *p; 8865 8866 GROW; 8867 8868 l = xmlParseNCName(ctxt); 8869 if (l == NULL) { 8870 if (CUR == ':') { 8871 l = xmlParseName(ctxt); 8872 if (l != NULL) { 8873 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8874 "Failed to parse QName '%s'\n", l, NULL, NULL); 8875 *prefix = NULL; 8876 return(l); 8877 } 8878 } 8879 return(NULL); 8880 } 8881 if (CUR == ':') { 8882 NEXT; 8883 p = l; 8884 l = xmlParseNCName(ctxt); 8885 if (l == NULL) { 8886 xmlChar *tmp; 8887 8888 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8889 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8890 l = xmlParseNmtoken(ctxt); 8891 if (l == NULL) 8892 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8893 else { 8894 tmp = xmlBuildQName(l, p, NULL, 0); 8895 xmlFree((char *)l); 8896 } 8897 p = xmlDictLookup(ctxt->dict, tmp, -1); 8898 if (tmp != NULL) xmlFree(tmp); 8899 *prefix = NULL; 8900 return(p); 8901 } 8902 if (CUR == ':') { 8903 xmlChar *tmp; 8904 8905 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8906 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8907 NEXT; 8908 tmp = (xmlChar *) xmlParseName(ctxt); 8909 if (tmp != NULL) { 8910 tmp = xmlBuildQName(tmp, l, NULL, 0); 8911 l = xmlDictLookup(ctxt->dict, tmp, -1); 8912 if (tmp != NULL) xmlFree(tmp); 8913 *prefix = p; 8914 return(l); 8915 } 8916 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8917 l = xmlDictLookup(ctxt->dict, tmp, -1); 8918 if (tmp != NULL) xmlFree(tmp); 8919 *prefix = p; 8920 return(l); 8921 } 8922 *prefix = p; 8923 } else 8924 *prefix = NULL; 8925 return(l); 8926} 8927 8928/** 8929 * xmlParseQNameAndCompare: 8930 * @ctxt: an XML parser context 8931 * @name: the localname 8932 * @prefix: the prefix, if any. 8933 * 8934 * parse an XML name and compares for match 8935 * (specialized for endtag parsing) 8936 * 8937 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8938 * and the name for mismatch 8939 */ 8940 8941static const xmlChar * 8942xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8943 xmlChar const *prefix) { 8944 const xmlChar *cmp; 8945 const xmlChar *in; 8946 const xmlChar *ret; 8947 const xmlChar *prefix2; 8948 8949 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8950 8951 GROW; 8952 in = ctxt->input->cur; 8953 8954 cmp = prefix; 8955 while (*in != 0 && *in == *cmp) { 8956 ++in; 8957 ++cmp; 8958 } 8959 if ((*cmp == 0) && (*in == ':')) { 8960 in++; 8961 cmp = name; 8962 while (*in != 0 && *in == *cmp) { 8963 ++in; 8964 ++cmp; 8965 } 8966 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8967 /* success */ 8968 ctxt->input->cur = in; 8969 return((const xmlChar*) 1); 8970 } 8971 } 8972 /* 8973 * all strings coms from the dictionary, equality can be done directly 8974 */ 8975 ret = xmlParseQName (ctxt, &prefix2); 8976 if ((ret == name) && (prefix == prefix2)) 8977 return((const xmlChar*) 1); 8978 return ret; 8979} 8980 8981/** 8982 * xmlParseAttValueInternal: 8983 * @ctxt: an XML parser context 8984 * @len: attribute len result 8985 * @alloc: whether the attribute was reallocated as a new string 8986 * @normalize: if 1 then further non-CDATA normalization must be done 8987 * 8988 * parse a value for an attribute. 8989 * NOTE: if no normalization is needed, the routine will return pointers 8990 * directly from the data buffer. 8991 * 8992 * 3.3.3 Attribute-Value Normalization: 8993 * Before the value of an attribute is passed to the application or 8994 * checked for validity, the XML processor must normalize it as follows: 8995 * - a character reference is processed by appending the referenced 8996 * character to the attribute value 8997 * - an entity reference is processed by recursively processing the 8998 * replacement text of the entity 8999 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 9000 * appending #x20 to the normalized value, except that only a single 9001 * #x20 is appended for a "#xD#xA" sequence that is part of an external 9002 * parsed entity or the literal entity value of an internal parsed entity 9003 * - other characters are processed by appending them to the normalized value 9004 * If the declared value is not CDATA, then the XML processor must further 9005 * process the normalized attribute value by discarding any leading and 9006 * trailing space (#x20) characters, and by replacing sequences of space 9007 * (#x20) characters by a single space (#x20) character. 9008 * All attributes for which no declaration has been read should be treated 9009 * by a non-validating parser as if declared CDATA. 9010 * 9011 * Returns the AttValue parsed or NULL. The value has to be freed by the 9012 * caller if it was copied, this can be detected by val[*len] == 0. 9013 */ 9014 9015static xmlChar * 9016xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 9017 int normalize) 9018{ 9019 xmlChar limit = 0; 9020 const xmlChar *in = NULL, *start, *end, *last; 9021 xmlChar *ret = NULL; 9022 int line, col; 9023 9024 GROW; 9025 in = (xmlChar *) CUR_PTR; 9026 line = ctxt->input->line; 9027 col = ctxt->input->col; 9028 if (*in != '"' && *in != '\'') { 9029 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 9030 return (NULL); 9031 } 9032 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 9033 9034 /* 9035 * try to handle in this routine the most common case where no 9036 * allocation of a new string is required and where content is 9037 * pure ASCII. 9038 */ 9039 limit = *in++; 9040 col++; 9041 end = ctxt->input->end; 9042 start = in; 9043 if (in >= end) { 9044 const xmlChar *oldbase = ctxt->input->base; 9045 GROW; 9046 if (oldbase != ctxt->input->base) { 9047 long delta = ctxt->input->base - oldbase; 9048 start = start + delta; 9049 in = in + delta; 9050 } 9051 end = ctxt->input->end; 9052 } 9053 if (normalize) { 9054 /* 9055 * Skip any leading spaces 9056 */ 9057 while ((in < end) && (*in != limit) && 9058 ((*in == 0x20) || (*in == 0x9) || 9059 (*in == 0xA) || (*in == 0xD))) { 9060 if (*in == 0xA) { 9061 line++; col = 1; 9062 } else { 9063 col++; 9064 } 9065 in++; 9066 start = in; 9067 if (in >= end) { 9068 const xmlChar *oldbase = ctxt->input->base; 9069 GROW; 9070 if (ctxt->instate == XML_PARSER_EOF) 9071 return(NULL); 9072 if (oldbase != ctxt->input->base) { 9073 long delta = ctxt->input->base - oldbase; 9074 start = start + delta; 9075 in = in + delta; 9076 } 9077 end = ctxt->input->end; 9078 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9079 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9080 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9081 "AttValue length too long\n"); 9082 return(NULL); 9083 } 9084 } 9085 } 9086 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9087 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9088 col++; 9089 if ((*in++ == 0x20) && (*in == 0x20)) break; 9090 if (in >= end) { 9091 const xmlChar *oldbase = ctxt->input->base; 9092 GROW; 9093 if (ctxt->instate == XML_PARSER_EOF) 9094 return(NULL); 9095 if (oldbase != ctxt->input->base) { 9096 long delta = ctxt->input->base - oldbase; 9097 start = start + delta; 9098 in = in + delta; 9099 } 9100 end = ctxt->input->end; 9101 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9102 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9103 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9104 "AttValue length too long\n"); 9105 return(NULL); 9106 } 9107 } 9108 } 9109 last = in; 9110 /* 9111 * skip the trailing blanks 9112 */ 9113 while ((last[-1] == 0x20) && (last > start)) last--; 9114 while ((in < end) && (*in != limit) && 9115 ((*in == 0x20) || (*in == 0x9) || 9116 (*in == 0xA) || (*in == 0xD))) { 9117 if (*in == 0xA) { 9118 line++, col = 1; 9119 } else { 9120 col++; 9121 } 9122 in++; 9123 if (in >= end) { 9124 const xmlChar *oldbase = ctxt->input->base; 9125 GROW; 9126 if (ctxt->instate == XML_PARSER_EOF) 9127 return(NULL); 9128 if (oldbase != ctxt->input->base) { 9129 long delta = ctxt->input->base - oldbase; 9130 start = start + delta; 9131 in = in + delta; 9132 last = last + delta; 9133 } 9134 end = ctxt->input->end; 9135 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9136 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9137 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9138 "AttValue length too long\n"); 9139 return(NULL); 9140 } 9141 } 9142 } 9143 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9144 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9145 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9146 "AttValue length too long\n"); 9147 return(NULL); 9148 } 9149 if (*in != limit) goto need_complex; 9150 } else { 9151 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9152 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9153 in++; 9154 col++; 9155 if (in >= end) { 9156 const xmlChar *oldbase = ctxt->input->base; 9157 GROW; 9158 if (ctxt->instate == XML_PARSER_EOF) 9159 return(NULL); 9160 if (oldbase != ctxt->input->base) { 9161 long delta = ctxt->input->base - oldbase; 9162 start = start + delta; 9163 in = in + delta; 9164 } 9165 end = ctxt->input->end; 9166 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9167 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9169 "AttValue length too long\n"); 9170 return(NULL); 9171 } 9172 } 9173 } 9174 last = in; 9175 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9176 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9177 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9178 "AttValue length too long\n"); 9179 return(NULL); 9180 } 9181 if (*in != limit) goto need_complex; 9182 } 9183 in++; 9184 col++; 9185 if (len != NULL) { 9186 *len = last - start; 9187 ret = (xmlChar *) start; 9188 } else { 9189 if (alloc) *alloc = 1; 9190 ret = xmlStrndup(start, last - start); 9191 } 9192 CUR_PTR = in; 9193 ctxt->input->line = line; 9194 ctxt->input->col = col; 9195 if (alloc) *alloc = 0; 9196 return ret; 9197need_complex: 9198 if (alloc) *alloc = 1; 9199 return xmlParseAttValueComplex(ctxt, len, normalize); 9200} 9201 9202/** 9203 * xmlParseAttribute2: 9204 * @ctxt: an XML parser context 9205 * @pref: the element prefix 9206 * @elem: the element name 9207 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9208 * @value: a xmlChar ** used to store the value of the attribute 9209 * @len: an int * to save the length of the attribute 9210 * @alloc: an int * to indicate if the attribute was allocated 9211 * 9212 * parse an attribute in the new SAX2 framework. 9213 * 9214 * Returns the attribute name, and the value in *value, . 9215 */ 9216 9217static const xmlChar * 9218xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9219 const xmlChar * pref, const xmlChar * elem, 9220 const xmlChar ** prefix, xmlChar ** value, 9221 int *len, int *alloc) 9222{ 9223 const xmlChar *name; 9224 xmlChar *val, *internal_val = NULL; 9225 int normalize = 0; 9226 9227 *value = NULL; 9228 GROW; 9229 name = xmlParseQName(ctxt, prefix); 9230 if (name == NULL) { 9231 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9232 "error parsing attribute name\n"); 9233 return (NULL); 9234 } 9235 9236 /* 9237 * get the type if needed 9238 */ 9239 if (ctxt->attsSpecial != NULL) { 9240 int type; 9241 9242 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9243 pref, elem, *prefix, name); 9244 if (type != 0) 9245 normalize = 1; 9246 } 9247 9248 /* 9249 * read the value 9250 */ 9251 SKIP_BLANKS; 9252 if (RAW == '=') { 9253 NEXT; 9254 SKIP_BLANKS; 9255 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9256 if (normalize) { 9257 /* 9258 * Sometimes a second normalisation pass for spaces is needed 9259 * but that only happens if charrefs or entities refernces 9260 * have been used in the attribute value, i.e. the attribute 9261 * value have been extracted in an allocated string already. 9262 */ 9263 if (*alloc) { 9264 const xmlChar *val2; 9265 9266 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9267 if ((val2 != NULL) && (val2 != val)) { 9268 xmlFree(val); 9269 val = (xmlChar *) val2; 9270 } 9271 } 9272 } 9273 ctxt->instate = XML_PARSER_CONTENT; 9274 } else { 9275 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9276 "Specification mandate value for attribute %s\n", 9277 name); 9278 return (NULL); 9279 } 9280 9281 if (*prefix == ctxt->str_xml) { 9282 /* 9283 * Check that xml:lang conforms to the specification 9284 * No more registered as an error, just generate a warning now 9285 * since this was deprecated in XML second edition 9286 */ 9287 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9288 internal_val = xmlStrndup(val, *len); 9289 if (!xmlCheckLanguageID(internal_val)) { 9290 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9291 "Malformed value for xml:lang : %s\n", 9292 internal_val, NULL); 9293 } 9294 } 9295 9296 /* 9297 * Check that xml:space conforms to the specification 9298 */ 9299 if (xmlStrEqual(name, BAD_CAST "space")) { 9300 internal_val = xmlStrndup(val, *len); 9301 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9302 *(ctxt->space) = 0; 9303 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9304 *(ctxt->space) = 1; 9305 else { 9306 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9307 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9308 internal_val, NULL); 9309 } 9310 } 9311 if (internal_val) { 9312 xmlFree(internal_val); 9313 } 9314 } 9315 9316 *value = val; 9317 return (name); 9318} 9319/** 9320 * xmlParseStartTag2: 9321 * @ctxt: an XML parser context 9322 * 9323 * parse a start of tag either for rule element or 9324 * EmptyElement. In both case we don't parse the tag closing chars. 9325 * This routine is called when running SAX2 parsing 9326 * 9327 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9328 * 9329 * [ WFC: Unique Att Spec ] 9330 * No attribute name may appear more than once in the same start-tag or 9331 * empty-element tag. 9332 * 9333 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9334 * 9335 * [ WFC: Unique Att Spec ] 9336 * No attribute name may appear more than once in the same start-tag or 9337 * empty-element tag. 9338 * 9339 * With namespace: 9340 * 9341 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9342 * 9343 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9344 * 9345 * Returns the element name parsed 9346 */ 9347 9348static const xmlChar * 9349xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9350 const xmlChar **URI, int *tlen) { 9351 const xmlChar *localname; 9352 const xmlChar *prefix; 9353 const xmlChar *attname; 9354 const xmlChar *aprefix; 9355 const xmlChar *nsname; 9356 xmlChar *attvalue; 9357 const xmlChar **atts = ctxt->atts; 9358 int maxatts = ctxt->maxatts; 9359 int nratts, nbatts, nbdef; 9360 int i, j, nbNs, attval, oldline, oldcol, inputNr; 9361 const xmlChar *base; 9362 unsigned long cur; 9363 int nsNr = ctxt->nsNr; 9364 9365 if (RAW != '<') return(NULL); 9366 NEXT1; 9367 9368 /* 9369 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9370 * point since the attribute values may be stored as pointers to 9371 * the buffer and calling SHRINK would destroy them ! 9372 * The Shrinking is only possible once the full set of attribute 9373 * callbacks have been done. 9374 */ 9375reparse: 9376 SHRINK; 9377 base = ctxt->input->base; 9378 cur = ctxt->input->cur - ctxt->input->base; 9379 inputNr = ctxt->inputNr; 9380 oldline = ctxt->input->line; 9381 oldcol = ctxt->input->col; 9382 nbatts = 0; 9383 nratts = 0; 9384 nbdef = 0; 9385 nbNs = 0; 9386 attval = 0; 9387 /* Forget any namespaces added during an earlier parse of this element. */ 9388 ctxt->nsNr = nsNr; 9389 9390 localname = xmlParseQName(ctxt, &prefix); 9391 if (localname == NULL) { 9392 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9393 "StartTag: invalid element name\n"); 9394 return(NULL); 9395 } 9396 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9397 9398 /* 9399 * Now parse the attributes, it ends up with the ending 9400 * 9401 * (S Attribute)* S? 9402 */ 9403 SKIP_BLANKS; 9404 GROW; 9405 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9406 goto base_changed; 9407 9408 while (((RAW != '>') && 9409 ((RAW != '/') || (NXT(1) != '>')) && 9410 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9411 const xmlChar *q = CUR_PTR; 9412 unsigned int cons = ctxt->input->consumed; 9413 int len = -1, alloc = 0; 9414 9415 attname = xmlParseAttribute2(ctxt, prefix, localname, 9416 &aprefix, &attvalue, &len, &alloc); 9417 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) { 9418 if ((attvalue != NULL) && (alloc != 0)) 9419 xmlFree(attvalue); 9420 attvalue = NULL; 9421 goto base_changed; 9422 } 9423 if ((attname != NULL) && (attvalue != NULL)) { 9424 if (len < 0) len = xmlStrlen(attvalue); 9425 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9426 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9427 xmlURIPtr uri; 9428 9429 if (URL == NULL) { 9430 xmlErrMemory(ctxt, "dictionary allocation failure"); 9431 if ((attvalue != NULL) && (alloc != 0)) 9432 xmlFree(attvalue); 9433 return(NULL); 9434 } 9435 if (*URL != 0) { 9436 uri = xmlParseURI((const char *) URL); 9437 if (uri == NULL) { 9438 xmlNsErr(ctxt, XML_WAR_NS_URI, 9439 "xmlns: '%s' is not a valid URI\n", 9440 URL, NULL, NULL); 9441 } else { 9442 if (uri->scheme == NULL) { 9443 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9444 "xmlns: URI %s is not absolute\n", 9445 URL, NULL, NULL); 9446 } 9447 xmlFreeURI(uri); 9448 } 9449 if (URL == ctxt->str_xml_ns) { 9450 if (attname != ctxt->str_xml) { 9451 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9452 "xml namespace URI cannot be the default namespace\n", 9453 NULL, NULL, NULL); 9454 } 9455 goto skip_default_ns; 9456 } 9457 if ((len == 29) && 9458 (xmlStrEqual(URL, 9459 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9460 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9461 "reuse of the xmlns namespace name is forbidden\n", 9462 NULL, NULL, NULL); 9463 goto skip_default_ns; 9464 } 9465 } 9466 /* 9467 * check that it's not a defined namespace 9468 */ 9469 for (j = 1;j <= nbNs;j++) 9470 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9471 break; 9472 if (j <= nbNs) 9473 xmlErrAttributeDup(ctxt, NULL, attname); 9474 else 9475 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9476skip_default_ns: 9477 if (alloc != 0) xmlFree(attvalue); 9478 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9479 break; 9480 if (!IS_BLANK_CH(RAW)) { 9481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9482 "attributes construct error\n"); 9483 break; 9484 } 9485 SKIP_BLANKS; 9486 continue; 9487 } 9488 if (aprefix == ctxt->str_xmlns) { 9489 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9490 xmlURIPtr uri; 9491 9492 if (attname == ctxt->str_xml) { 9493 if (URL != ctxt->str_xml_ns) { 9494 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9495 "xml namespace prefix mapped to wrong URI\n", 9496 NULL, NULL, NULL); 9497 } 9498 /* 9499 * Do not keep a namespace definition node 9500 */ 9501 goto skip_ns; 9502 } 9503 if (URL == ctxt->str_xml_ns) { 9504 if (attname != ctxt->str_xml) { 9505 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9506 "xml namespace URI mapped to wrong prefix\n", 9507 NULL, NULL, NULL); 9508 } 9509 goto skip_ns; 9510 } 9511 if (attname == ctxt->str_xmlns) { 9512 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9513 "redefinition of the xmlns prefix is forbidden\n", 9514 NULL, NULL, NULL); 9515 goto skip_ns; 9516 } 9517 if ((len == 29) && 9518 (xmlStrEqual(URL, 9519 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9520 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9521 "reuse of the xmlns namespace name is forbidden\n", 9522 NULL, NULL, NULL); 9523 goto skip_ns; 9524 } 9525 if ((URL == NULL) || (URL[0] == 0)) { 9526 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9527 "xmlns:%s: Empty XML namespace is not allowed\n", 9528 attname, NULL, NULL); 9529 goto skip_ns; 9530 } else { 9531 uri = xmlParseURI((const char *) URL); 9532 if (uri == NULL) { 9533 xmlNsErr(ctxt, XML_WAR_NS_URI, 9534 "xmlns:%s: '%s' is not a valid URI\n", 9535 attname, URL, NULL); 9536 } else { 9537 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9538 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9539 "xmlns:%s: URI %s is not absolute\n", 9540 attname, URL, NULL); 9541 } 9542 xmlFreeURI(uri); 9543 } 9544 } 9545 9546 /* 9547 * check that it's not a defined namespace 9548 */ 9549 for (j = 1;j <= nbNs;j++) 9550 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9551 break; 9552 if (j <= nbNs) 9553 xmlErrAttributeDup(ctxt, aprefix, attname); 9554 else 9555 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9556skip_ns: 9557 if (alloc != 0) xmlFree(attvalue); 9558 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9559 break; 9560 if (!IS_BLANK_CH(RAW)) { 9561 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9562 "attributes construct error\n"); 9563 break; 9564 } 9565 SKIP_BLANKS; 9566 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9567 goto base_changed; 9568 continue; 9569 } 9570 9571 /* 9572 * Add the pair to atts 9573 */ 9574 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9575 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9576 if (attvalue[len] == 0) 9577 xmlFree(attvalue); 9578 goto failed; 9579 } 9580 maxatts = ctxt->maxatts; 9581 atts = ctxt->atts; 9582 } 9583 ctxt->attallocs[nratts++] = alloc; 9584 atts[nbatts++] = attname; 9585 atts[nbatts++] = aprefix; 9586 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9587 atts[nbatts++] = attvalue; 9588 attvalue += len; 9589 atts[nbatts++] = attvalue; 9590 /* 9591 * tag if some deallocation is needed 9592 */ 9593 if (alloc != 0) attval = 1; 9594 } else { 9595 if ((attvalue != NULL) && (attvalue[len] == 0)) 9596 xmlFree(attvalue); 9597 } 9598 9599failed: 9600 9601 GROW 9602 if (ctxt->instate == XML_PARSER_EOF) 9603 break; 9604 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9605 goto base_changed; 9606 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9607 break; 9608 if (!IS_BLANK_CH(RAW)) { 9609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9610 "attributes construct error\n"); 9611 break; 9612 } 9613 SKIP_BLANKS; 9614 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9615 (attname == NULL) && (attvalue == NULL)) { 9616 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9617 "xmlParseStartTag: problem parsing attributes\n"); 9618 break; 9619 } 9620 GROW; 9621 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9622 goto base_changed; 9623 } 9624 9625 /* 9626 * The attributes defaulting 9627 */ 9628 if (ctxt->attsDefault != NULL) { 9629 xmlDefAttrsPtr defaults; 9630 9631 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9632 if (defaults != NULL) { 9633 for (i = 0;i < defaults->nbAttrs;i++) { 9634 attname = defaults->values[5 * i]; 9635 aprefix = defaults->values[5 * i + 1]; 9636 9637 /* 9638 * special work for namespaces defaulted defs 9639 */ 9640 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9641 /* 9642 * check that it's not a defined namespace 9643 */ 9644 for (j = 1;j <= nbNs;j++) 9645 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9646 break; 9647 if (j <= nbNs) continue; 9648 9649 nsname = xmlGetNamespace(ctxt, NULL); 9650 if (nsname != defaults->values[5 * i + 2]) { 9651 if (nsPush(ctxt, NULL, 9652 defaults->values[5 * i + 2]) > 0) 9653 nbNs++; 9654 } 9655 } else if (aprefix == ctxt->str_xmlns) { 9656 /* 9657 * check that it's not a defined namespace 9658 */ 9659 for (j = 1;j <= nbNs;j++) 9660 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9661 break; 9662 if (j <= nbNs) continue; 9663 9664 nsname = xmlGetNamespace(ctxt, attname); 9665 if (nsname != defaults->values[2]) { 9666 if (nsPush(ctxt, attname, 9667 defaults->values[5 * i + 2]) > 0) 9668 nbNs++; 9669 } 9670 } else { 9671 /* 9672 * check that it's not a defined attribute 9673 */ 9674 for (j = 0;j < nbatts;j+=5) { 9675 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9676 break; 9677 } 9678 if (j < nbatts) continue; 9679 9680 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9681 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9682 return(NULL); 9683 } 9684 maxatts = ctxt->maxatts; 9685 atts = ctxt->atts; 9686 } 9687 atts[nbatts++] = attname; 9688 atts[nbatts++] = aprefix; 9689 if (aprefix == NULL) 9690 atts[nbatts++] = NULL; 9691 else 9692 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9693 atts[nbatts++] = defaults->values[5 * i + 2]; 9694 atts[nbatts++] = defaults->values[5 * i + 3]; 9695 if ((ctxt->standalone == 1) && 9696 (defaults->values[5 * i + 4] != NULL)) { 9697 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9698 "standalone: attribute %s on %s defaulted from external subset\n", 9699 attname, localname); 9700 } 9701 nbdef++; 9702 } 9703 } 9704 } 9705 } 9706 9707 /* 9708 * The attributes checkings 9709 */ 9710 for (i = 0; i < nbatts;i += 5) { 9711 /* 9712 * The default namespace does not apply to attribute names. 9713 */ 9714 if (atts[i + 1] != NULL) { 9715 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9716 if (nsname == NULL) { 9717 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9718 "Namespace prefix %s for %s on %s is not defined\n", 9719 atts[i + 1], atts[i], localname); 9720 } 9721 atts[i + 2] = nsname; 9722 } else 9723 nsname = NULL; 9724 /* 9725 * [ WFC: Unique Att Spec ] 9726 * No attribute name may appear more than once in the same 9727 * start-tag or empty-element tag. 9728 * As extended by the Namespace in XML REC. 9729 */ 9730 for (j = 0; j < i;j += 5) { 9731 if (atts[i] == atts[j]) { 9732 if (atts[i+1] == atts[j+1]) { 9733 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9734 break; 9735 } 9736 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9737 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9738 "Namespaced Attribute %s in '%s' redefined\n", 9739 atts[i], nsname, NULL); 9740 break; 9741 } 9742 } 9743 } 9744 } 9745 9746 nsname = xmlGetNamespace(ctxt, prefix); 9747 if ((prefix != NULL) && (nsname == NULL)) { 9748 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9749 "Namespace prefix %s on %s is not defined\n", 9750 prefix, localname, NULL); 9751 } 9752 *pref = prefix; 9753 *URI = nsname; 9754 9755 /* 9756 * SAX: Start of Element ! 9757 */ 9758 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9759 (!ctxt->disableSAX)) { 9760 if (nbNs > 0) 9761 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9762 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9763 nbatts / 5, nbdef, atts); 9764 else 9765 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9766 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9767 } 9768 9769 /* 9770 * Free up attribute allocated strings if needed 9771 */ 9772 if (attval != 0) { 9773 for (i = 3,j = 0; j < nratts;i += 5,j++) 9774 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9775 xmlFree((xmlChar *) atts[i]); 9776 } 9777 9778 return(localname); 9779 9780base_changed: 9781 /* 9782 * the attribute strings are valid iif the base didn't changed 9783 */ 9784 if (attval != 0) { 9785 for (i = 3,j = 0; j < nratts;i += 5,j++) 9786 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9787 xmlFree((xmlChar *) atts[i]); 9788 } 9789 9790 /* 9791 * We can't switch from one entity to another in the middle 9792 * of a start tag 9793 */ 9794 if (inputNr != ctxt->inputNr) { 9795 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 9796 "Start tag doesn't start and stop in the same entity\n"); 9797 return(NULL); 9798 } 9799 9800 ctxt->input->cur = ctxt->input->base + cur; 9801 ctxt->input->line = oldline; 9802 ctxt->input->col = oldcol; 9803 if (ctxt->wellFormed == 1) { 9804 goto reparse; 9805 } 9806 return(NULL); 9807} 9808 9809/** 9810 * xmlParseEndTag2: 9811 * @ctxt: an XML parser context 9812 * @line: line of the start tag 9813 * @nsNr: number of namespaces on the start tag 9814 * 9815 * parse an end of tag 9816 * 9817 * [42] ETag ::= '</' Name S? '>' 9818 * 9819 * With namespace 9820 * 9821 * [NS 9] ETag ::= '</' QName S? '>' 9822 */ 9823 9824static void 9825xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9826 const xmlChar *URI, int line, int nsNr, int tlen) { 9827 const xmlChar *name; 9828 size_t curLength; 9829 9830 GROW; 9831 if ((RAW != '<') || (NXT(1) != '/')) { 9832 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9833 return; 9834 } 9835 SKIP(2); 9836 9837 curLength = ctxt->input->end - ctxt->input->cur; 9838 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9839 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9840 if ((curLength >= (size_t)(tlen + 1)) && 9841 (ctxt->input->cur[tlen] == '>')) { 9842 ctxt->input->cur += tlen + 1; 9843 ctxt->input->col += tlen + 1; 9844 goto done; 9845 } 9846 ctxt->input->cur += tlen; 9847 ctxt->input->col += tlen; 9848 name = (xmlChar*)1; 9849 } else { 9850 if (prefix == NULL) 9851 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9852 else 9853 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9854 } 9855 9856 /* 9857 * We should definitely be at the ending "S? '>'" part 9858 */ 9859 GROW; 9860 if (ctxt->instate == XML_PARSER_EOF) 9861 return; 9862 SKIP_BLANKS; 9863 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9864 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9865 } else 9866 NEXT1; 9867 9868 /* 9869 * [ WFC: Element Type Match ] 9870 * The Name in an element's end-tag must match the element type in the 9871 * start-tag. 9872 * 9873 */ 9874 if (name != (xmlChar*)1) { 9875 if (name == NULL) name = BAD_CAST "unparseable"; 9876 if ((line == 0) && (ctxt->node != NULL)) 9877 line = ctxt->node->line; 9878 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9879 "Opening and ending tag mismatch: %s line %d and %s\n", 9880 ctxt->name, line, name); 9881 } 9882 9883 /* 9884 * SAX: End of Tag 9885 */ 9886done: 9887 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9888 (!ctxt->disableSAX)) 9889 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9890 9891 spacePop(ctxt); 9892 if (nsNr != 0) 9893 nsPop(ctxt, nsNr); 9894 return; 9895} 9896 9897/** 9898 * xmlParseCDSect: 9899 * @ctxt: an XML parser context 9900 * 9901 * Parse escaped pure raw content. 9902 * 9903 * [18] CDSect ::= CDStart CData CDEnd 9904 * 9905 * [19] CDStart ::= '<![CDATA[' 9906 * 9907 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9908 * 9909 * [21] CDEnd ::= ']]>' 9910 */ 9911void 9912xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9913 xmlChar *buf = NULL; 9914 int len = 0; 9915 int size = XML_PARSER_BUFFER_SIZE; 9916 int r, rl; 9917 int s, sl; 9918 int cur, l; 9919 int count = 0; 9920 9921 /* Check 2.6.0 was NXT(0) not RAW */ 9922 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9923 SKIP(9); 9924 } else 9925 return; 9926 9927 ctxt->instate = XML_PARSER_CDATA_SECTION; 9928 r = CUR_CHAR(rl); 9929 if (!IS_CHAR(r)) { 9930 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9931 ctxt->instate = XML_PARSER_CONTENT; 9932 return; 9933 } 9934 NEXTL(rl); 9935 s = CUR_CHAR(sl); 9936 if (!IS_CHAR(s)) { 9937 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9938 ctxt->instate = XML_PARSER_CONTENT; 9939 return; 9940 } 9941 NEXTL(sl); 9942 cur = CUR_CHAR(l); 9943 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9944 if (buf == NULL) { 9945 xmlErrMemory(ctxt, NULL); 9946 return; 9947 } 9948 while (IS_CHAR(cur) && 9949 ((r != ']') || (s != ']') || (cur != '>'))) { 9950 if (len + 5 >= size) { 9951 xmlChar *tmp; 9952 9953 if ((size > XML_MAX_TEXT_LENGTH) && 9954 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9955 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9956 "CData section too big found", NULL); 9957 xmlFree (buf); 9958 return; 9959 } 9960 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9961 if (tmp == NULL) { 9962 xmlFree(buf); 9963 xmlErrMemory(ctxt, NULL); 9964 return; 9965 } 9966 buf = tmp; 9967 size *= 2; 9968 } 9969 COPY_BUF(rl,buf,len,r); 9970 r = s; 9971 rl = sl; 9972 s = cur; 9973 sl = l; 9974 count++; 9975 if (count > 50) { 9976 GROW; 9977 if (ctxt->instate == XML_PARSER_EOF) { 9978 xmlFree(buf); 9979 return; 9980 } 9981 count = 0; 9982 } 9983 NEXTL(l); 9984 cur = CUR_CHAR(l); 9985 } 9986 buf[len] = 0; 9987 ctxt->instate = XML_PARSER_CONTENT; 9988 if (cur != '>') { 9989 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9990 "CData section not finished\n%.50s\n", buf); 9991 xmlFree(buf); 9992 return; 9993 } 9994 NEXTL(l); 9995 9996 /* 9997 * OK the buffer is to be consumed as cdata. 9998 */ 9999 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10000 if (ctxt->sax->cdataBlock != NULL) 10001 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 10002 else if (ctxt->sax->characters != NULL) 10003 ctxt->sax->characters(ctxt->userData, buf, len); 10004 } 10005 xmlFree(buf); 10006} 10007 10008/** 10009 * xmlParseContent: 10010 * @ctxt: an XML parser context 10011 * 10012 * Parse a content: 10013 * 10014 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10015 */ 10016 10017void 10018xmlParseContent(xmlParserCtxtPtr ctxt) { 10019 GROW; 10020 while ((RAW != 0) && 10021 ((RAW != '<') || (NXT(1) != '/')) && 10022 (ctxt->instate != XML_PARSER_EOF)) { 10023 const xmlChar *test = CUR_PTR; 10024 unsigned int cons = ctxt->input->consumed; 10025 const xmlChar *cur = ctxt->input->cur; 10026 10027 /* 10028 * First case : a Processing Instruction. 10029 */ 10030 if ((*cur == '<') && (cur[1] == '?')) { 10031 xmlParsePI(ctxt); 10032 } 10033 10034 /* 10035 * Second case : a CDSection 10036 */ 10037 /* 2.6.0 test was *cur not RAW */ 10038 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10039 xmlParseCDSect(ctxt); 10040 } 10041 10042 /* 10043 * Third case : a comment 10044 */ 10045 else if ((*cur == '<') && (NXT(1) == '!') && 10046 (NXT(2) == '-') && (NXT(3) == '-')) { 10047 xmlParseComment(ctxt); 10048 ctxt->instate = XML_PARSER_CONTENT; 10049 } 10050 10051 /* 10052 * Fourth case : a sub-element. 10053 */ 10054 else if (*cur == '<') { 10055 xmlParseElement(ctxt); 10056 } 10057 10058 /* 10059 * Fifth case : a reference. If if has not been resolved, 10060 * parsing returns it's Name, create the node 10061 */ 10062 10063 else if (*cur == '&') { 10064 xmlParseReference(ctxt); 10065 } 10066 10067 /* 10068 * Last case, text. Note that References are handled directly. 10069 */ 10070 else { 10071 xmlParseCharData(ctxt, 0); 10072 } 10073 10074 GROW; 10075 /* 10076 * Pop-up of finished entities. 10077 */ 10078 while ((RAW == 0) && (ctxt->inputNr > 1)) 10079 xmlPopInput(ctxt); 10080 SHRINK; 10081 10082 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10083 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10084 "detected an error in element content\n"); 10085 xmlHaltParser(ctxt); 10086 break; 10087 } 10088 } 10089} 10090 10091/** 10092 * xmlParseElement: 10093 * @ctxt: an XML parser context 10094 * 10095 * parse an XML element, this is highly recursive 10096 * 10097 * [39] element ::= EmptyElemTag | STag content ETag 10098 * 10099 * [ WFC: Element Type Match ] 10100 * The Name in an element's end-tag must match the element type in the 10101 * start-tag. 10102 * 10103 */ 10104 10105void 10106xmlParseElement(xmlParserCtxtPtr ctxt) { 10107 const xmlChar *name; 10108 const xmlChar *prefix = NULL; 10109 const xmlChar *URI = NULL; 10110 xmlParserNodeInfo node_info; 10111 int line, tlen = 0; 10112 xmlNodePtr ret; 10113 int nsNr = ctxt->nsNr; 10114 10115 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10116 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10117 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10118 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10119 xmlParserMaxDepth); 10120 xmlHaltParser(ctxt); 10121 return; 10122 } 10123 10124 /* Capture start position */ 10125 if (ctxt->record_info) { 10126 node_info.begin_pos = ctxt->input->consumed + 10127 (CUR_PTR - ctxt->input->base); 10128 node_info.begin_line = ctxt->input->line; 10129 } 10130 10131 if (ctxt->spaceNr == 0) 10132 spacePush(ctxt, -1); 10133 else if (*ctxt->space == -2) 10134 spacePush(ctxt, -1); 10135 else 10136 spacePush(ctxt, *ctxt->space); 10137 10138 line = ctxt->input->line; 10139#ifdef LIBXML_SAX1_ENABLED 10140 if (ctxt->sax2) 10141#endif /* LIBXML_SAX1_ENABLED */ 10142 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10143#ifdef LIBXML_SAX1_ENABLED 10144 else 10145 name = xmlParseStartTag(ctxt); 10146#endif /* LIBXML_SAX1_ENABLED */ 10147 if (ctxt->instate == XML_PARSER_EOF) 10148 return; 10149 if (name == NULL) { 10150 spacePop(ctxt); 10151 return; 10152 } 10153 namePush(ctxt, name); 10154 ret = ctxt->node; 10155 10156#ifdef LIBXML_VALID_ENABLED 10157 /* 10158 * [ VC: Root Element Type ] 10159 * The Name in the document type declaration must match the element 10160 * type of the root element. 10161 */ 10162 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10163 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10164 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10165#endif /* LIBXML_VALID_ENABLED */ 10166 10167 /* 10168 * Check for an Empty Element. 10169 */ 10170 if ((RAW == '/') && (NXT(1) == '>')) { 10171 SKIP(2); 10172 if (ctxt->sax2) { 10173 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10174 (!ctxt->disableSAX)) 10175 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10176#ifdef LIBXML_SAX1_ENABLED 10177 } else { 10178 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10179 (!ctxt->disableSAX)) 10180 ctxt->sax->endElement(ctxt->userData, name); 10181#endif /* LIBXML_SAX1_ENABLED */ 10182 } 10183 namePop(ctxt); 10184 spacePop(ctxt); 10185 if (nsNr != ctxt->nsNr) 10186 nsPop(ctxt, ctxt->nsNr - nsNr); 10187 if ( ret != NULL && ctxt->record_info ) { 10188 node_info.end_pos = ctxt->input->consumed + 10189 (CUR_PTR - ctxt->input->base); 10190 node_info.end_line = ctxt->input->line; 10191 node_info.node = ret; 10192 xmlParserAddNodeInfo(ctxt, &node_info); 10193 } 10194 return; 10195 } 10196 if (RAW == '>') { 10197 NEXT1; 10198 } else { 10199 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10200 "Couldn't find end of Start Tag %s line %d\n", 10201 name, line, NULL); 10202 10203 /* 10204 * end of parsing of this node. 10205 */ 10206 nodePop(ctxt); 10207 namePop(ctxt); 10208 spacePop(ctxt); 10209 if (nsNr != ctxt->nsNr) 10210 nsPop(ctxt, ctxt->nsNr - nsNr); 10211 10212 /* 10213 * Capture end position and add node 10214 */ 10215 if ( ret != NULL && ctxt->record_info ) { 10216 node_info.end_pos = ctxt->input->consumed + 10217 (CUR_PTR - ctxt->input->base); 10218 node_info.end_line = ctxt->input->line; 10219 node_info.node = ret; 10220 xmlParserAddNodeInfo(ctxt, &node_info); 10221 } 10222 return; 10223 } 10224 10225 /* 10226 * Parse the content of the element: 10227 */ 10228 xmlParseContent(ctxt); 10229 if (ctxt->instate == XML_PARSER_EOF) 10230 return; 10231 if (!IS_BYTE_CHAR(RAW)) { 10232 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10233 "Premature end of data in tag %s line %d\n", 10234 name, line, NULL); 10235 10236 /* 10237 * end of parsing of this node. 10238 */ 10239 nodePop(ctxt); 10240 namePop(ctxt); 10241 spacePop(ctxt); 10242 if (nsNr != ctxt->nsNr) 10243 nsPop(ctxt, ctxt->nsNr - nsNr); 10244 return; 10245 } 10246 10247 /* 10248 * parse the end of tag: '</' should be here. 10249 */ 10250 if (ctxt->sax2) { 10251 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10252 namePop(ctxt); 10253 } 10254#ifdef LIBXML_SAX1_ENABLED 10255 else 10256 xmlParseEndTag1(ctxt, line); 10257#endif /* LIBXML_SAX1_ENABLED */ 10258 10259 /* 10260 * Capture end position and add node 10261 */ 10262 if ( ret != NULL && ctxt->record_info ) { 10263 node_info.end_pos = ctxt->input->consumed + 10264 (CUR_PTR - ctxt->input->base); 10265 node_info.end_line = ctxt->input->line; 10266 node_info.node = ret; 10267 xmlParserAddNodeInfo(ctxt, &node_info); 10268 } 10269} 10270 10271/** 10272 * xmlParseVersionNum: 10273 * @ctxt: an XML parser context 10274 * 10275 * parse the XML version value. 10276 * 10277 * [26] VersionNum ::= '1.' [0-9]+ 10278 * 10279 * In practice allow [0-9].[0-9]+ at that level 10280 * 10281 * Returns the string giving the XML version number, or NULL 10282 */ 10283xmlChar * 10284xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10285 xmlChar *buf = NULL; 10286 int len = 0; 10287 int size = 10; 10288 xmlChar cur; 10289 10290 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10291 if (buf == NULL) { 10292 xmlErrMemory(ctxt, NULL); 10293 return(NULL); 10294 } 10295 cur = CUR; 10296 if (!((cur >= '0') && (cur <= '9'))) { 10297 xmlFree(buf); 10298 return(NULL); 10299 } 10300 buf[len++] = cur; 10301 NEXT; 10302 cur=CUR; 10303 if (cur != '.') { 10304 xmlFree(buf); 10305 return(NULL); 10306 } 10307 buf[len++] = cur; 10308 NEXT; 10309 cur=CUR; 10310 while ((cur >= '0') && (cur <= '9')) { 10311 if (len + 1 >= size) { 10312 xmlChar *tmp; 10313 10314 size *= 2; 10315 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10316 if (tmp == NULL) { 10317 xmlFree(buf); 10318 xmlErrMemory(ctxt, NULL); 10319 return(NULL); 10320 } 10321 buf = tmp; 10322 } 10323 buf[len++] = cur; 10324 NEXT; 10325 cur=CUR; 10326 } 10327 buf[len] = 0; 10328 return(buf); 10329} 10330 10331/** 10332 * xmlParseVersionInfo: 10333 * @ctxt: an XML parser context 10334 * 10335 * parse the XML version. 10336 * 10337 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10338 * 10339 * [25] Eq ::= S? '=' S? 10340 * 10341 * Returns the version string, e.g. "1.0" 10342 */ 10343 10344xmlChar * 10345xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10346 xmlChar *version = NULL; 10347 10348 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10349 SKIP(7); 10350 SKIP_BLANKS; 10351 if (RAW != '=') { 10352 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10353 return(NULL); 10354 } 10355 NEXT; 10356 SKIP_BLANKS; 10357 if (RAW == '"') { 10358 NEXT; 10359 version = xmlParseVersionNum(ctxt); 10360 if (RAW != '"') { 10361 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10362 } else 10363 NEXT; 10364 } else if (RAW == '\''){ 10365 NEXT; 10366 version = xmlParseVersionNum(ctxt); 10367 if (RAW != '\'') { 10368 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10369 } else 10370 NEXT; 10371 } else { 10372 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10373 } 10374 } 10375 return(version); 10376} 10377 10378/** 10379 * xmlParseEncName: 10380 * @ctxt: an XML parser context 10381 * 10382 * parse the XML encoding name 10383 * 10384 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10385 * 10386 * Returns the encoding name value or NULL 10387 */ 10388xmlChar * 10389xmlParseEncName(xmlParserCtxtPtr ctxt) { 10390 xmlChar *buf = NULL; 10391 int len = 0; 10392 int size = 10; 10393 xmlChar cur; 10394 10395 cur = CUR; 10396 if (((cur >= 'a') && (cur <= 'z')) || 10397 ((cur >= 'A') && (cur <= 'Z'))) { 10398 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10399 if (buf == NULL) { 10400 xmlErrMemory(ctxt, NULL); 10401 return(NULL); 10402 } 10403 10404 buf[len++] = cur; 10405 NEXT; 10406 cur = CUR; 10407 while (((cur >= 'a') && (cur <= 'z')) || 10408 ((cur >= 'A') && (cur <= 'Z')) || 10409 ((cur >= '0') && (cur <= '9')) || 10410 (cur == '.') || (cur == '_') || 10411 (cur == '-')) { 10412 if (len + 1 >= size) { 10413 xmlChar *tmp; 10414 10415 size *= 2; 10416 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10417 if (tmp == NULL) { 10418 xmlErrMemory(ctxt, NULL); 10419 xmlFree(buf); 10420 return(NULL); 10421 } 10422 buf = tmp; 10423 } 10424 buf[len++] = cur; 10425 NEXT; 10426 cur = CUR; 10427 if (cur == 0) { 10428 SHRINK; 10429 GROW; 10430 cur = CUR; 10431 } 10432 } 10433 buf[len] = 0; 10434 } else { 10435 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10436 } 10437 return(buf); 10438} 10439 10440/** 10441 * xmlParseEncodingDecl: 10442 * @ctxt: an XML parser context 10443 * 10444 * parse the XML encoding declaration 10445 * 10446 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10447 * 10448 * this setups the conversion filters. 10449 * 10450 * Returns the encoding value or NULL 10451 */ 10452 10453const xmlChar * 10454xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10455 xmlChar *encoding = NULL; 10456 10457 SKIP_BLANKS; 10458 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10459 SKIP(8); 10460 SKIP_BLANKS; 10461 if (RAW != '=') { 10462 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10463 return(NULL); 10464 } 10465 NEXT; 10466 SKIP_BLANKS; 10467 if (RAW == '"') { 10468 NEXT; 10469 encoding = xmlParseEncName(ctxt); 10470 if (RAW != '"') { 10471 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10472 xmlFree((xmlChar *) encoding); 10473 return(NULL); 10474 } else 10475 NEXT; 10476 } else if (RAW == '\''){ 10477 NEXT; 10478 encoding = xmlParseEncName(ctxt); 10479 if (RAW != '\'') { 10480 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10481 xmlFree((xmlChar *) encoding); 10482 return(NULL); 10483 } else 10484 NEXT; 10485 } else { 10486 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10487 } 10488 10489 /* 10490 * Non standard parsing, allowing the user to ignore encoding 10491 */ 10492 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10493 xmlFree((xmlChar *) encoding); 10494 return(NULL); 10495 } 10496 10497 /* 10498 * UTF-16 encoding stwich has already taken place at this stage, 10499 * more over the little-endian/big-endian selection is already done 10500 */ 10501 if ((encoding != NULL) && 10502 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10503 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10504 /* 10505 * If no encoding was passed to the parser, that we are 10506 * using UTF-16 and no decoder is present i.e. the 10507 * document is apparently UTF-8 compatible, then raise an 10508 * encoding mismatch fatal error 10509 */ 10510 if ((ctxt->encoding == NULL) && 10511 (ctxt->input->buf != NULL) && 10512 (ctxt->input->buf->encoder == NULL)) { 10513 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10514 "Document labelled UTF-16 but has UTF-8 content\n"); 10515 } 10516 if (ctxt->encoding != NULL) 10517 xmlFree((xmlChar *) ctxt->encoding); 10518 ctxt->encoding = encoding; 10519 } 10520 /* 10521 * UTF-8 encoding is handled natively 10522 */ 10523 else if ((encoding != NULL) && 10524 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10525 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10526 if (ctxt->encoding != NULL) 10527 xmlFree((xmlChar *) ctxt->encoding); 10528 ctxt->encoding = encoding; 10529 } 10530 else if (encoding != NULL) { 10531 xmlCharEncodingHandlerPtr handler; 10532 10533 if (ctxt->input->encoding != NULL) 10534 xmlFree((xmlChar *) ctxt->input->encoding); 10535 ctxt->input->encoding = encoding; 10536 10537 handler = xmlFindCharEncodingHandler((const char *) encoding); 10538 if (handler != NULL) { 10539 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10540 /* failed to convert */ 10541 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10542 return(NULL); 10543 } 10544 } else { 10545 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10546 "Unsupported encoding %s\n", encoding); 10547 return(NULL); 10548 } 10549 } 10550 } 10551 return(encoding); 10552} 10553 10554/** 10555 * xmlParseSDDecl: 10556 * @ctxt: an XML parser context 10557 * 10558 * parse the XML standalone declaration 10559 * 10560 * [32] SDDecl ::= S 'standalone' Eq 10561 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10562 * 10563 * [ VC: Standalone Document Declaration ] 10564 * TODO The standalone document declaration must have the value "no" 10565 * if any external markup declarations contain declarations of: 10566 * - attributes with default values, if elements to which these 10567 * attributes apply appear in the document without specifications 10568 * of values for these attributes, or 10569 * - entities (other than amp, lt, gt, apos, quot), if references 10570 * to those entities appear in the document, or 10571 * - attributes with values subject to normalization, where the 10572 * attribute appears in the document with a value which will change 10573 * as a result of normalization, or 10574 * - element types with element content, if white space occurs directly 10575 * within any instance of those types. 10576 * 10577 * Returns: 10578 * 1 if standalone="yes" 10579 * 0 if standalone="no" 10580 * -2 if standalone attribute is missing or invalid 10581 * (A standalone value of -2 means that the XML declaration was found, 10582 * but no value was specified for the standalone attribute). 10583 */ 10584 10585int 10586xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10587 int standalone = -2; 10588 10589 SKIP_BLANKS; 10590 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10591 SKIP(10); 10592 SKIP_BLANKS; 10593 if (RAW != '=') { 10594 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10595 return(standalone); 10596 } 10597 NEXT; 10598 SKIP_BLANKS; 10599 if (RAW == '\''){ 10600 NEXT; 10601 if ((RAW == 'n') && (NXT(1) == 'o')) { 10602 standalone = 0; 10603 SKIP(2); 10604 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10605 (NXT(2) == 's')) { 10606 standalone = 1; 10607 SKIP(3); 10608 } else { 10609 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10610 } 10611 if (RAW != '\'') { 10612 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10613 } else 10614 NEXT; 10615 } else if (RAW == '"'){ 10616 NEXT; 10617 if ((RAW == 'n') && (NXT(1) == 'o')) { 10618 standalone = 0; 10619 SKIP(2); 10620 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10621 (NXT(2) == 's')) { 10622 standalone = 1; 10623 SKIP(3); 10624 } else { 10625 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10626 } 10627 if (RAW != '"') { 10628 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10629 } else 10630 NEXT; 10631 } else { 10632 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10633 } 10634 } 10635 return(standalone); 10636} 10637 10638/** 10639 * xmlParseXMLDecl: 10640 * @ctxt: an XML parser context 10641 * 10642 * parse an XML declaration header 10643 * 10644 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10645 */ 10646 10647void 10648xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10649 xmlChar *version; 10650 10651 /* 10652 * This value for standalone indicates that the document has an 10653 * XML declaration but it does not have a standalone attribute. 10654 * It will be overwritten later if a standalone attribute is found. 10655 */ 10656 ctxt->input->standalone = -2; 10657 10658 /* 10659 * We know that '<?xml' is here. 10660 */ 10661 SKIP(5); 10662 10663 if (!IS_BLANK_CH(RAW)) { 10664 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10665 "Blank needed after '<?xml'\n"); 10666 } 10667 SKIP_BLANKS; 10668 10669 /* 10670 * We must have the VersionInfo here. 10671 */ 10672 version = xmlParseVersionInfo(ctxt); 10673 if (version == NULL) { 10674 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10675 } else { 10676 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10677 /* 10678 * Changed here for XML-1.0 5th edition 10679 */ 10680 if (ctxt->options & XML_PARSE_OLD10) { 10681 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10682 "Unsupported version '%s'\n", 10683 version); 10684 } else { 10685 if ((version[0] == '1') && ((version[1] == '.'))) { 10686 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10687 "Unsupported version '%s'\n", 10688 version, NULL); 10689 } else { 10690 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10691 "Unsupported version '%s'\n", 10692 version); 10693 } 10694 } 10695 } 10696 if (ctxt->version != NULL) 10697 xmlFree((void *) ctxt->version); 10698 ctxt->version = version; 10699 } 10700 10701 /* 10702 * We may have the encoding declaration 10703 */ 10704 if (!IS_BLANK_CH(RAW)) { 10705 if ((RAW == '?') && (NXT(1) == '>')) { 10706 SKIP(2); 10707 return; 10708 } 10709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10710 } 10711 xmlParseEncodingDecl(ctxt); 10712 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10713 (ctxt->instate == XML_PARSER_EOF)) { 10714 /* 10715 * The XML REC instructs us to stop parsing right here 10716 */ 10717 return; 10718 } 10719 10720 /* 10721 * We may have the standalone status. 10722 */ 10723 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10724 if ((RAW == '?') && (NXT(1) == '>')) { 10725 SKIP(2); 10726 return; 10727 } 10728 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10729 } 10730 10731 /* 10732 * We can grow the input buffer freely at that point 10733 */ 10734 GROW; 10735 10736 SKIP_BLANKS; 10737 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10738 10739 SKIP_BLANKS; 10740 if ((RAW == '?') && (NXT(1) == '>')) { 10741 SKIP(2); 10742 } else if (RAW == '>') { 10743 /* Deprecated old WD ... */ 10744 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10745 NEXT; 10746 } else { 10747 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10748 MOVETO_ENDTAG(CUR_PTR); 10749 NEXT; 10750 } 10751} 10752 10753/** 10754 * xmlParseMisc: 10755 * @ctxt: an XML parser context 10756 * 10757 * parse an XML Misc* optional field. 10758 * 10759 * [27] Misc ::= Comment | PI | S 10760 */ 10761 10762void 10763xmlParseMisc(xmlParserCtxtPtr ctxt) { 10764 while ((ctxt->instate != XML_PARSER_EOF) && 10765 (((RAW == '<') && (NXT(1) == '?')) || 10766 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10767 IS_BLANK_CH(CUR))) { 10768 if ((RAW == '<') && (NXT(1) == '?')) { 10769 xmlParsePI(ctxt); 10770 } else if (IS_BLANK_CH(CUR)) { 10771 NEXT; 10772 } else 10773 xmlParseComment(ctxt); 10774 } 10775} 10776 10777/** 10778 * xmlParseDocument: 10779 * @ctxt: an XML parser context 10780 * 10781 * parse an XML document (and build a tree if using the standard SAX 10782 * interface). 10783 * 10784 * [1] document ::= prolog element Misc* 10785 * 10786 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10787 * 10788 * Returns 0, -1 in case of error. the parser context is augmented 10789 * as a result of the parsing. 10790 */ 10791 10792int 10793xmlParseDocument(xmlParserCtxtPtr ctxt) { 10794 xmlChar start[4]; 10795 xmlCharEncoding enc; 10796 10797 xmlInitParser(); 10798 10799 if ((ctxt == NULL) || (ctxt->input == NULL)) 10800 return(-1); 10801 10802 GROW; 10803 10804 /* 10805 * SAX: detecting the level. 10806 */ 10807 xmlDetectSAX2(ctxt); 10808 10809 /* 10810 * SAX: beginning of the document processing. 10811 */ 10812 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10813 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10814 if (ctxt->instate == XML_PARSER_EOF) 10815 return(-1); 10816 10817 if ((ctxt->encoding == NULL) && 10818 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10819 /* 10820 * Get the 4 first bytes and decode the charset 10821 * if enc != XML_CHAR_ENCODING_NONE 10822 * plug some encoding conversion routines. 10823 */ 10824 start[0] = RAW; 10825 start[1] = NXT(1); 10826 start[2] = NXT(2); 10827 start[3] = NXT(3); 10828 enc = xmlDetectCharEncoding(&start[0], 4); 10829 if (enc != XML_CHAR_ENCODING_NONE) { 10830 xmlSwitchEncoding(ctxt, enc); 10831 } 10832 } 10833 10834 10835 if (CUR == 0) { 10836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10837 return(-1); 10838 } 10839 10840 /* 10841 * Check for the XMLDecl in the Prolog. 10842 * do not GROW here to avoid the detected encoder to decode more 10843 * than just the first line, unless the amount of data is really 10844 * too small to hold "<?xml version="1.0" encoding="foo" 10845 */ 10846 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10847 GROW; 10848 } 10849 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10850 10851 /* 10852 * Note that we will switch encoding on the fly. 10853 */ 10854 xmlParseXMLDecl(ctxt); 10855 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10856 (ctxt->instate == XML_PARSER_EOF)) { 10857 /* 10858 * The XML REC instructs us to stop parsing right here 10859 */ 10860 return(-1); 10861 } 10862 ctxt->standalone = ctxt->input->standalone; 10863 SKIP_BLANKS; 10864 } else { 10865 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10866 } 10867 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10868 ctxt->sax->startDocument(ctxt->userData); 10869 if (ctxt->instate == XML_PARSER_EOF) 10870 return(-1); 10871 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10872 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10873 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10874 } 10875 10876 /* 10877 * The Misc part of the Prolog 10878 */ 10879 GROW; 10880 xmlParseMisc(ctxt); 10881 10882 /* 10883 * Then possibly doc type declaration(s) and more Misc 10884 * (doctypedecl Misc*)? 10885 */ 10886 GROW; 10887 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10888 10889 ctxt->inSubset = 1; 10890 xmlParseDocTypeDecl(ctxt); 10891 if (RAW == '[') { 10892 ctxt->instate = XML_PARSER_DTD; 10893 xmlParseInternalSubset(ctxt); 10894 if (ctxt->instate == XML_PARSER_EOF) 10895 return(-1); 10896 } 10897 10898 /* 10899 * Create and update the external subset. 10900 */ 10901 ctxt->inSubset = 2; 10902 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10903 (!ctxt->disableSAX)) 10904 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10905 ctxt->extSubSystem, ctxt->extSubURI); 10906 if (ctxt->instate == XML_PARSER_EOF) 10907 return(-1); 10908 ctxt->inSubset = 0; 10909 10910 xmlCleanSpecialAttr(ctxt); 10911 10912 ctxt->instate = XML_PARSER_PROLOG; 10913 xmlParseMisc(ctxt); 10914 } 10915 10916 /* 10917 * Time to start parsing the tree itself 10918 */ 10919 GROW; 10920 if (RAW != '<') { 10921 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10922 "Start tag expected, '<' not found\n"); 10923 } else { 10924 ctxt->instate = XML_PARSER_CONTENT; 10925 xmlParseElement(ctxt); 10926 ctxt->instate = XML_PARSER_EPILOG; 10927 10928 10929 /* 10930 * The Misc part at the end 10931 */ 10932 xmlParseMisc(ctxt); 10933 10934 if (RAW != 0) { 10935 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10936 } 10937 ctxt->instate = XML_PARSER_EOF; 10938 } 10939 10940 /* 10941 * SAX: end of the document processing. 10942 */ 10943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10944 ctxt->sax->endDocument(ctxt->userData); 10945 10946 /* 10947 * Remove locally kept entity definitions if the tree was not built 10948 */ 10949 if ((ctxt->myDoc != NULL) && 10950 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10951 xmlFreeDoc(ctxt->myDoc); 10952 ctxt->myDoc = NULL; 10953 } 10954 10955 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10956 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10957 if (ctxt->valid) 10958 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10959 if (ctxt->nsWellFormed) 10960 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10961 if (ctxt->options & XML_PARSE_OLD10) 10962 ctxt->myDoc->properties |= XML_DOC_OLD10; 10963 } 10964 if (! ctxt->wellFormed) { 10965 ctxt->valid = 0; 10966 return(-1); 10967 } 10968 return(0); 10969} 10970 10971/** 10972 * xmlParseExtParsedEnt: 10973 * @ctxt: an XML parser context 10974 * 10975 * parse a general parsed entity 10976 * An external general parsed entity is well-formed if it matches the 10977 * production labeled extParsedEnt. 10978 * 10979 * [78] extParsedEnt ::= TextDecl? content 10980 * 10981 * Returns 0, -1 in case of error. the parser context is augmented 10982 * as a result of the parsing. 10983 */ 10984 10985int 10986xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10987 xmlChar start[4]; 10988 xmlCharEncoding enc; 10989 10990 if ((ctxt == NULL) || (ctxt->input == NULL)) 10991 return(-1); 10992 10993 xmlDefaultSAXHandlerInit(); 10994 10995 xmlDetectSAX2(ctxt); 10996 10997 GROW; 10998 10999 /* 11000 * SAX: beginning of the document processing. 11001 */ 11002 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11003 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11004 11005 /* 11006 * Get the 4 first bytes and decode the charset 11007 * if enc != XML_CHAR_ENCODING_NONE 11008 * plug some encoding conversion routines. 11009 */ 11010 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11011 start[0] = RAW; 11012 start[1] = NXT(1); 11013 start[2] = NXT(2); 11014 start[3] = NXT(3); 11015 enc = xmlDetectCharEncoding(start, 4); 11016 if (enc != XML_CHAR_ENCODING_NONE) { 11017 xmlSwitchEncoding(ctxt, enc); 11018 } 11019 } 11020 11021 11022 if (CUR == 0) { 11023 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11024 } 11025 11026 /* 11027 * Check for the XMLDecl in the Prolog. 11028 */ 11029 GROW; 11030 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11031 11032 /* 11033 * Note that we will switch encoding on the fly. 11034 */ 11035 xmlParseXMLDecl(ctxt); 11036 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11037 /* 11038 * The XML REC instructs us to stop parsing right here 11039 */ 11040 return(-1); 11041 } 11042 SKIP_BLANKS; 11043 } else { 11044 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11045 } 11046 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11047 ctxt->sax->startDocument(ctxt->userData); 11048 if (ctxt->instate == XML_PARSER_EOF) 11049 return(-1); 11050 11051 /* 11052 * Doing validity checking on chunk doesn't make sense 11053 */ 11054 ctxt->instate = XML_PARSER_CONTENT; 11055 ctxt->validate = 0; 11056 ctxt->loadsubset = 0; 11057 ctxt->depth = 0; 11058 11059 xmlParseContent(ctxt); 11060 if (ctxt->instate == XML_PARSER_EOF) 11061 return(-1); 11062 11063 if ((RAW == '<') && (NXT(1) == '/')) { 11064 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11065 } else if (RAW != 0) { 11066 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11067 } 11068 11069 /* 11070 * SAX: end of the document processing. 11071 */ 11072 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11073 ctxt->sax->endDocument(ctxt->userData); 11074 11075 if (! ctxt->wellFormed) return(-1); 11076 return(0); 11077} 11078 11079#ifdef LIBXML_PUSH_ENABLED 11080/************************************************************************ 11081 * * 11082 * Progressive parsing interfaces * 11083 * * 11084 ************************************************************************/ 11085 11086/** 11087 * xmlParseLookupSequence: 11088 * @ctxt: an XML parser context 11089 * @first: the first char to lookup 11090 * @next: the next char to lookup or zero 11091 * @third: the next char to lookup or zero 11092 * 11093 * Try to find if a sequence (first, next, third) or just (first next) or 11094 * (first) is available in the input stream. 11095 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 11096 * to avoid rescanning sequences of bytes, it DOES change the state of the 11097 * parser, do not use liberally. 11098 * 11099 * Returns the index to the current parsing point if the full sequence 11100 * is available, -1 otherwise. 11101 */ 11102static int 11103xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11104 xmlChar next, xmlChar third) { 11105 int base, len; 11106 xmlParserInputPtr in; 11107 const xmlChar *buf; 11108 11109 in = ctxt->input; 11110 if (in == NULL) return(-1); 11111 base = in->cur - in->base; 11112 if (base < 0) return(-1); 11113 if (ctxt->checkIndex > base) 11114 base = ctxt->checkIndex; 11115 if (in->buf == NULL) { 11116 buf = in->base; 11117 len = in->length; 11118 } else { 11119 buf = xmlBufContent(in->buf->buffer); 11120 len = xmlBufUse(in->buf->buffer); 11121 } 11122 /* take into account the sequence length */ 11123 if (third) len -= 2; 11124 else if (next) len --; 11125 for (;base < len;base++) { 11126 if (buf[base] == first) { 11127 if (third != 0) { 11128 if ((buf[base + 1] != next) || 11129 (buf[base + 2] != third)) continue; 11130 } else if (next != 0) { 11131 if (buf[base + 1] != next) continue; 11132 } 11133 ctxt->checkIndex = 0; 11134#ifdef DEBUG_PUSH 11135 if (next == 0) 11136 xmlGenericError(xmlGenericErrorContext, 11137 "PP: lookup '%c' found at %d\n", 11138 first, base); 11139 else if (third == 0) 11140 xmlGenericError(xmlGenericErrorContext, 11141 "PP: lookup '%c%c' found at %d\n", 11142 first, next, base); 11143 else 11144 xmlGenericError(xmlGenericErrorContext, 11145 "PP: lookup '%c%c%c' found at %d\n", 11146 first, next, third, base); 11147#endif 11148 return(base - (in->cur - in->base)); 11149 } 11150 } 11151 ctxt->checkIndex = base; 11152#ifdef DEBUG_PUSH 11153 if (next == 0) 11154 xmlGenericError(xmlGenericErrorContext, 11155 "PP: lookup '%c' failed\n", first); 11156 else if (third == 0) 11157 xmlGenericError(xmlGenericErrorContext, 11158 "PP: lookup '%c%c' failed\n", first, next); 11159 else 11160 xmlGenericError(xmlGenericErrorContext, 11161 "PP: lookup '%c%c%c' failed\n", first, next, third); 11162#endif 11163 return(-1); 11164} 11165 11166/** 11167 * xmlParseGetLasts: 11168 * @ctxt: an XML parser context 11169 * @lastlt: pointer to store the last '<' from the input 11170 * @lastgt: pointer to store the last '>' from the input 11171 * 11172 * Lookup the last < and > in the current chunk 11173 */ 11174static void 11175xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11176 const xmlChar **lastgt) { 11177 const xmlChar *tmp; 11178 11179 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11180 xmlGenericError(xmlGenericErrorContext, 11181 "Internal error: xmlParseGetLasts\n"); 11182 return; 11183 } 11184 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11185 tmp = ctxt->input->end; 11186 tmp--; 11187 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11188 if (tmp < ctxt->input->base) { 11189 *lastlt = NULL; 11190 *lastgt = NULL; 11191 } else { 11192 *lastlt = tmp; 11193 tmp++; 11194 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11195 if (*tmp == '\'') { 11196 tmp++; 11197 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11198 if (tmp < ctxt->input->end) tmp++; 11199 } else if (*tmp == '"') { 11200 tmp++; 11201 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11202 if (tmp < ctxt->input->end) tmp++; 11203 } else 11204 tmp++; 11205 } 11206 if (tmp < ctxt->input->end) 11207 *lastgt = tmp; 11208 else { 11209 tmp = *lastlt; 11210 tmp--; 11211 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11212 if (tmp >= ctxt->input->base) 11213 *lastgt = tmp; 11214 else 11215 *lastgt = NULL; 11216 } 11217 } 11218 } else { 11219 *lastlt = NULL; 11220 *lastgt = NULL; 11221 } 11222} 11223/** 11224 * xmlCheckCdataPush: 11225 * @cur: pointer to the block of characters 11226 * @len: length of the block in bytes 11227 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11228 * 11229 * Check that the block of characters is okay as SCdata content [20] 11230 * 11231 * Returns the number of bytes to pass if okay, a negative index where an 11232 * UTF-8 error occured otherwise 11233 */ 11234static int 11235xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11236 int ix; 11237 unsigned char c; 11238 int codepoint; 11239 11240 if ((utf == NULL) || (len <= 0)) 11241 return(0); 11242 11243 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11244 c = utf[ix]; 11245 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11246 if (c >= 0x20) 11247 ix++; 11248 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11249 ix++; 11250 else 11251 return(-ix); 11252 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11253 if (ix + 2 > len) return(complete ? -ix : ix); 11254 if ((utf[ix+1] & 0xc0 ) != 0x80) 11255 return(-ix); 11256 codepoint = (utf[ix] & 0x1f) << 6; 11257 codepoint |= utf[ix+1] & 0x3f; 11258 if (!xmlIsCharQ(codepoint)) 11259 return(-ix); 11260 ix += 2; 11261 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11262 if (ix + 3 > len) return(complete ? -ix : ix); 11263 if (((utf[ix+1] & 0xc0) != 0x80) || 11264 ((utf[ix+2] & 0xc0) != 0x80)) 11265 return(-ix); 11266 codepoint = (utf[ix] & 0xf) << 12; 11267 codepoint |= (utf[ix+1] & 0x3f) << 6; 11268 codepoint |= utf[ix+2] & 0x3f; 11269 if (!xmlIsCharQ(codepoint)) 11270 return(-ix); 11271 ix += 3; 11272 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11273 if (ix + 4 > len) return(complete ? -ix : ix); 11274 if (((utf[ix+1] & 0xc0) != 0x80) || 11275 ((utf[ix+2] & 0xc0) != 0x80) || 11276 ((utf[ix+3] & 0xc0) != 0x80)) 11277 return(-ix); 11278 codepoint = (utf[ix] & 0x7) << 18; 11279 codepoint |= (utf[ix+1] & 0x3f) << 12; 11280 codepoint |= (utf[ix+2] & 0x3f) << 6; 11281 codepoint |= utf[ix+3] & 0x3f; 11282 if (!xmlIsCharQ(codepoint)) 11283 return(-ix); 11284 ix += 4; 11285 } else /* unknown encoding */ 11286 return(-ix); 11287 } 11288 return(ix); 11289} 11290 11291/** 11292 * xmlParseTryOrFinish: 11293 * @ctxt: an XML parser context 11294 * @terminate: last chunk indicator 11295 * 11296 * Try to progress on parsing 11297 * 11298 * Returns zero if no parsing was possible 11299 */ 11300static int 11301xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11302 int ret = 0; 11303 int avail, tlen; 11304 xmlChar cur, next; 11305 const xmlChar *lastlt, *lastgt; 11306 11307 if (ctxt->input == NULL) 11308 return(0); 11309 11310#ifdef DEBUG_PUSH 11311 switch (ctxt->instate) { 11312 case XML_PARSER_EOF: 11313 xmlGenericError(xmlGenericErrorContext, 11314 "PP: try EOF\n"); break; 11315 case XML_PARSER_START: 11316 xmlGenericError(xmlGenericErrorContext, 11317 "PP: try START\n"); break; 11318 case XML_PARSER_MISC: 11319 xmlGenericError(xmlGenericErrorContext, 11320 "PP: try MISC\n");break; 11321 case XML_PARSER_COMMENT: 11322 xmlGenericError(xmlGenericErrorContext, 11323 "PP: try COMMENT\n");break; 11324 case XML_PARSER_PROLOG: 11325 xmlGenericError(xmlGenericErrorContext, 11326 "PP: try PROLOG\n");break; 11327 case XML_PARSER_START_TAG: 11328 xmlGenericError(xmlGenericErrorContext, 11329 "PP: try START_TAG\n");break; 11330 case XML_PARSER_CONTENT: 11331 xmlGenericError(xmlGenericErrorContext, 11332 "PP: try CONTENT\n");break; 11333 case XML_PARSER_CDATA_SECTION: 11334 xmlGenericError(xmlGenericErrorContext, 11335 "PP: try CDATA_SECTION\n");break; 11336 case XML_PARSER_END_TAG: 11337 xmlGenericError(xmlGenericErrorContext, 11338 "PP: try END_TAG\n");break; 11339 case XML_PARSER_ENTITY_DECL: 11340 xmlGenericError(xmlGenericErrorContext, 11341 "PP: try ENTITY_DECL\n");break; 11342 case XML_PARSER_ENTITY_VALUE: 11343 xmlGenericError(xmlGenericErrorContext, 11344 "PP: try ENTITY_VALUE\n");break; 11345 case XML_PARSER_ATTRIBUTE_VALUE: 11346 xmlGenericError(xmlGenericErrorContext, 11347 "PP: try ATTRIBUTE_VALUE\n");break; 11348 case XML_PARSER_DTD: 11349 xmlGenericError(xmlGenericErrorContext, 11350 "PP: try DTD\n");break; 11351 case XML_PARSER_EPILOG: 11352 xmlGenericError(xmlGenericErrorContext, 11353 "PP: try EPILOG\n");break; 11354 case XML_PARSER_PI: 11355 xmlGenericError(xmlGenericErrorContext, 11356 "PP: try PI\n");break; 11357 case XML_PARSER_IGNORE: 11358 xmlGenericError(xmlGenericErrorContext, 11359 "PP: try IGNORE\n");break; 11360 } 11361#endif 11362 11363 if ((ctxt->input != NULL) && 11364 (ctxt->input->cur - ctxt->input->base > 4096)) { 11365 xmlSHRINK(ctxt); 11366 ctxt->checkIndex = 0; 11367 } 11368 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11369 11370 while (ctxt->instate != XML_PARSER_EOF) { 11371 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11372 return(0); 11373 11374 11375 /* 11376 * Pop-up of finished entities. 11377 */ 11378 while ((RAW == 0) && (ctxt->inputNr > 1)) 11379 xmlPopInput(ctxt); 11380 11381 if (ctxt->input == NULL) break; 11382 if (ctxt->input->buf == NULL) 11383 avail = ctxt->input->length - 11384 (ctxt->input->cur - ctxt->input->base); 11385 else { 11386 /* 11387 * If we are operating on converted input, try to flush 11388 * remainng chars to avoid them stalling in the non-converted 11389 * buffer. But do not do this in document start where 11390 * encoding="..." may not have been read and we work on a 11391 * guessed encoding. 11392 */ 11393 if ((ctxt->instate != XML_PARSER_START) && 11394 (ctxt->input->buf->raw != NULL) && 11395 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11396 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11397 ctxt->input); 11398 size_t current = ctxt->input->cur - ctxt->input->base; 11399 11400 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11401 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11402 base, current); 11403 } 11404 avail = xmlBufUse(ctxt->input->buf->buffer) - 11405 (ctxt->input->cur - ctxt->input->base); 11406 } 11407 if (avail < 1) 11408 goto done; 11409 switch (ctxt->instate) { 11410 case XML_PARSER_EOF: 11411 /* 11412 * Document parsing is done ! 11413 */ 11414 goto done; 11415 case XML_PARSER_START: 11416 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11417 xmlChar start[4]; 11418 xmlCharEncoding enc; 11419 11420 /* 11421 * Very first chars read from the document flow. 11422 */ 11423 if (avail < 4) 11424 goto done; 11425 11426 /* 11427 * Get the 4 first bytes and decode the charset 11428 * if enc != XML_CHAR_ENCODING_NONE 11429 * plug some encoding conversion routines, 11430 * else xmlSwitchEncoding will set to (default) 11431 * UTF8. 11432 */ 11433 start[0] = RAW; 11434 start[1] = NXT(1); 11435 start[2] = NXT(2); 11436 start[3] = NXT(3); 11437 enc = xmlDetectCharEncoding(start, 4); 11438 xmlSwitchEncoding(ctxt, enc); 11439 break; 11440 } 11441 11442 if (avail < 2) 11443 goto done; 11444 cur = ctxt->input->cur[0]; 11445 next = ctxt->input->cur[1]; 11446 if (cur == 0) { 11447 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11448 ctxt->sax->setDocumentLocator(ctxt->userData, 11449 &xmlDefaultSAXLocator); 11450 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11451 xmlHaltParser(ctxt); 11452#ifdef DEBUG_PUSH 11453 xmlGenericError(xmlGenericErrorContext, 11454 "PP: entering EOF\n"); 11455#endif 11456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11457 ctxt->sax->endDocument(ctxt->userData); 11458 goto done; 11459 } 11460 if ((cur == '<') && (next == '?')) { 11461 /* PI or XML decl */ 11462 if (avail < 5) return(ret); 11463 if ((!terminate) && 11464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11465 return(ret); 11466 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11467 ctxt->sax->setDocumentLocator(ctxt->userData, 11468 &xmlDefaultSAXLocator); 11469 if ((ctxt->input->cur[2] == 'x') && 11470 (ctxt->input->cur[3] == 'm') && 11471 (ctxt->input->cur[4] == 'l') && 11472 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11473 ret += 5; 11474#ifdef DEBUG_PUSH 11475 xmlGenericError(xmlGenericErrorContext, 11476 "PP: Parsing XML Decl\n"); 11477#endif 11478 xmlParseXMLDecl(ctxt); 11479 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11480 /* 11481 * The XML REC instructs us to stop parsing right 11482 * here 11483 */ 11484 xmlHaltParser(ctxt); 11485 return(0); 11486 } 11487 ctxt->standalone = ctxt->input->standalone; 11488 if ((ctxt->encoding == NULL) && 11489 (ctxt->input->encoding != NULL)) 11490 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11491 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11492 (!ctxt->disableSAX)) 11493 ctxt->sax->startDocument(ctxt->userData); 11494 ctxt->instate = XML_PARSER_MISC; 11495#ifdef DEBUG_PUSH 11496 xmlGenericError(xmlGenericErrorContext, 11497 "PP: entering MISC\n"); 11498#endif 11499 } else { 11500 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11501 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11502 (!ctxt->disableSAX)) 11503 ctxt->sax->startDocument(ctxt->userData); 11504 ctxt->instate = XML_PARSER_MISC; 11505#ifdef DEBUG_PUSH 11506 xmlGenericError(xmlGenericErrorContext, 11507 "PP: entering MISC\n"); 11508#endif 11509 } 11510 } else { 11511 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11512 ctxt->sax->setDocumentLocator(ctxt->userData, 11513 &xmlDefaultSAXLocator); 11514 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11515 if (ctxt->version == NULL) { 11516 xmlErrMemory(ctxt, NULL); 11517 break; 11518 } 11519 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11520 (!ctxt->disableSAX)) 11521 ctxt->sax->startDocument(ctxt->userData); 11522 ctxt->instate = XML_PARSER_MISC; 11523#ifdef DEBUG_PUSH 11524 xmlGenericError(xmlGenericErrorContext, 11525 "PP: entering MISC\n"); 11526#endif 11527 } 11528 break; 11529 case XML_PARSER_START_TAG: { 11530 const xmlChar *name; 11531 const xmlChar *prefix = NULL; 11532 const xmlChar *URI = NULL; 11533 int nsNr = ctxt->nsNr; 11534 11535 if ((avail < 2) && (ctxt->inputNr == 1)) 11536 goto done; 11537 cur = ctxt->input->cur[0]; 11538 if (cur != '<') { 11539 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11540 xmlHaltParser(ctxt); 11541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11542 ctxt->sax->endDocument(ctxt->userData); 11543 goto done; 11544 } 11545 if (!terminate) { 11546 if (ctxt->progressive) { 11547 /* > can be found unescaped in attribute values */ 11548 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11549 goto done; 11550 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11551 goto done; 11552 } 11553 } 11554 if (ctxt->spaceNr == 0) 11555 spacePush(ctxt, -1); 11556 else if (*ctxt->space == -2) 11557 spacePush(ctxt, -1); 11558 else 11559 spacePush(ctxt, *ctxt->space); 11560#ifdef LIBXML_SAX1_ENABLED 11561 if (ctxt->sax2) 11562#endif /* LIBXML_SAX1_ENABLED */ 11563 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11564#ifdef LIBXML_SAX1_ENABLED 11565 else 11566 name = xmlParseStartTag(ctxt); 11567#endif /* LIBXML_SAX1_ENABLED */ 11568 if (ctxt->instate == XML_PARSER_EOF) 11569 goto done; 11570 if (name == NULL) { 11571 spacePop(ctxt); 11572 xmlHaltParser(ctxt); 11573 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11574 ctxt->sax->endDocument(ctxt->userData); 11575 goto done; 11576 } 11577#ifdef LIBXML_VALID_ENABLED 11578 /* 11579 * [ VC: Root Element Type ] 11580 * The Name in the document type declaration must match 11581 * the element type of the root element. 11582 */ 11583 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11584 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11585 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11586#endif /* LIBXML_VALID_ENABLED */ 11587 11588 /* 11589 * Check for an Empty Element. 11590 */ 11591 if ((RAW == '/') && (NXT(1) == '>')) { 11592 SKIP(2); 11593 11594 if (ctxt->sax2) { 11595 if ((ctxt->sax != NULL) && 11596 (ctxt->sax->endElementNs != NULL) && 11597 (!ctxt->disableSAX)) 11598 ctxt->sax->endElementNs(ctxt->userData, name, 11599 prefix, URI); 11600 if (ctxt->nsNr - nsNr > 0) 11601 nsPop(ctxt, ctxt->nsNr - nsNr); 11602#ifdef LIBXML_SAX1_ENABLED 11603 } else { 11604 if ((ctxt->sax != NULL) && 11605 (ctxt->sax->endElement != NULL) && 11606 (!ctxt->disableSAX)) 11607 ctxt->sax->endElement(ctxt->userData, name); 11608#endif /* LIBXML_SAX1_ENABLED */ 11609 } 11610 if (ctxt->instate == XML_PARSER_EOF) 11611 goto done; 11612 spacePop(ctxt); 11613 if (ctxt->nameNr == 0) { 11614 ctxt->instate = XML_PARSER_EPILOG; 11615 } else { 11616 ctxt->instate = XML_PARSER_CONTENT; 11617 } 11618 ctxt->progressive = 1; 11619 break; 11620 } 11621 if (RAW == '>') { 11622 NEXT; 11623 } else { 11624 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11625 "Couldn't find end of Start Tag %s\n", 11626 name); 11627 nodePop(ctxt); 11628 spacePop(ctxt); 11629 } 11630 if (ctxt->sax2) 11631 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11632#ifdef LIBXML_SAX1_ENABLED 11633 else 11634 namePush(ctxt, name); 11635#endif /* LIBXML_SAX1_ENABLED */ 11636 11637 ctxt->instate = XML_PARSER_CONTENT; 11638 ctxt->progressive = 1; 11639 break; 11640 } 11641 case XML_PARSER_CONTENT: { 11642 const xmlChar *test; 11643 unsigned int cons; 11644 if ((avail < 2) && (ctxt->inputNr == 1)) 11645 goto done; 11646 cur = ctxt->input->cur[0]; 11647 next = ctxt->input->cur[1]; 11648 11649 test = CUR_PTR; 11650 cons = ctxt->input->consumed; 11651 if ((cur == '<') && (next == '/')) { 11652 ctxt->instate = XML_PARSER_END_TAG; 11653 break; 11654 } else if ((cur == '<') && (next == '?')) { 11655 if ((!terminate) && 11656 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11657 ctxt->progressive = XML_PARSER_PI; 11658 goto done; 11659 } 11660 xmlParsePI(ctxt); 11661 ctxt->instate = XML_PARSER_CONTENT; 11662 ctxt->progressive = 1; 11663 } else if ((cur == '<') && (next != '!')) { 11664 ctxt->instate = XML_PARSER_START_TAG; 11665 break; 11666 } else if ((cur == '<') && (next == '!') && 11667 (ctxt->input->cur[2] == '-') && 11668 (ctxt->input->cur[3] == '-')) { 11669 int term; 11670 11671 if (avail < 4) 11672 goto done; 11673 ctxt->input->cur += 4; 11674 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11675 ctxt->input->cur -= 4; 11676 if ((!terminate) && (term < 0)) { 11677 ctxt->progressive = XML_PARSER_COMMENT; 11678 goto done; 11679 } 11680 xmlParseComment(ctxt); 11681 ctxt->instate = XML_PARSER_CONTENT; 11682 ctxt->progressive = 1; 11683 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11684 (ctxt->input->cur[2] == '[') && 11685 (ctxt->input->cur[3] == 'C') && 11686 (ctxt->input->cur[4] == 'D') && 11687 (ctxt->input->cur[5] == 'A') && 11688 (ctxt->input->cur[6] == 'T') && 11689 (ctxt->input->cur[7] == 'A') && 11690 (ctxt->input->cur[8] == '[')) { 11691 SKIP(9); 11692 ctxt->instate = XML_PARSER_CDATA_SECTION; 11693 break; 11694 } else if ((cur == '<') && (next == '!') && 11695 (avail < 9)) { 11696 goto done; 11697 } else if (cur == '&') { 11698 if ((!terminate) && 11699 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11700 goto done; 11701 xmlParseReference(ctxt); 11702 } else { 11703 /* TODO Avoid the extra copy, handle directly !!! */ 11704 /* 11705 * Goal of the following test is: 11706 * - minimize calls to the SAX 'character' callback 11707 * when they are mergeable 11708 * - handle an problem for isBlank when we only parse 11709 * a sequence of blank chars and the next one is 11710 * not available to check against '<' presence. 11711 * - tries to homogenize the differences in SAX 11712 * callbacks between the push and pull versions 11713 * of the parser. 11714 */ 11715 if ((ctxt->inputNr == 1) && 11716 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11717 if (!terminate) { 11718 if (ctxt->progressive) { 11719 if ((lastlt == NULL) || 11720 (ctxt->input->cur > lastlt)) 11721 goto done; 11722 } else if (xmlParseLookupSequence(ctxt, 11723 '<', 0, 0) < 0) { 11724 goto done; 11725 } 11726 } 11727 } 11728 ctxt->checkIndex = 0; 11729 xmlParseCharData(ctxt, 0); 11730 } 11731 /* 11732 * Pop-up of finished entities. 11733 */ 11734 while ((RAW == 0) && (ctxt->inputNr > 1)) 11735 xmlPopInput(ctxt); 11736 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11737 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11738 "detected an error in element content\n"); 11739 xmlHaltParser(ctxt); 11740 break; 11741 } 11742 break; 11743 } 11744 case XML_PARSER_END_TAG: 11745 if (avail < 2) 11746 goto done; 11747 if (!terminate) { 11748 if (ctxt->progressive) { 11749 /* > can be found unescaped in attribute values */ 11750 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11751 goto done; 11752 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11753 goto done; 11754 } 11755 } 11756 if (ctxt->sax2) { 11757 xmlParseEndTag2(ctxt, 11758 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11759 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11760 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11761 nameNsPop(ctxt); 11762 } 11763#ifdef LIBXML_SAX1_ENABLED 11764 else 11765 xmlParseEndTag1(ctxt, 0); 11766#endif /* LIBXML_SAX1_ENABLED */ 11767 if (ctxt->instate == XML_PARSER_EOF) { 11768 /* Nothing */ 11769 } else if (ctxt->nameNr == 0) { 11770 ctxt->instate = XML_PARSER_EPILOG; 11771 } else { 11772 ctxt->instate = XML_PARSER_CONTENT; 11773 } 11774 break; 11775 case XML_PARSER_CDATA_SECTION: { 11776 /* 11777 * The Push mode need to have the SAX callback for 11778 * cdataBlock merge back contiguous callbacks. 11779 */ 11780 int base; 11781 11782 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11783 if (base < 0) { 11784 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11785 int tmp; 11786 11787 tmp = xmlCheckCdataPush(ctxt->input->cur, 11788 XML_PARSER_BIG_BUFFER_SIZE, 0); 11789 if (tmp < 0) { 11790 tmp = -tmp; 11791 ctxt->input->cur += tmp; 11792 goto encoding_error; 11793 } 11794 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11795 if (ctxt->sax->cdataBlock != NULL) 11796 ctxt->sax->cdataBlock(ctxt->userData, 11797 ctxt->input->cur, tmp); 11798 else if (ctxt->sax->characters != NULL) 11799 ctxt->sax->characters(ctxt->userData, 11800 ctxt->input->cur, tmp); 11801 } 11802 if (ctxt->instate == XML_PARSER_EOF) 11803 goto done; 11804 SKIPL(tmp); 11805 ctxt->checkIndex = 0; 11806 } 11807 goto done; 11808 } else { 11809 int tmp; 11810 11811 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11812 if ((tmp < 0) || (tmp != base)) { 11813 tmp = -tmp; 11814 ctxt->input->cur += tmp; 11815 goto encoding_error; 11816 } 11817 if ((ctxt->sax != NULL) && (base == 0) && 11818 (ctxt->sax->cdataBlock != NULL) && 11819 (!ctxt->disableSAX)) { 11820 /* 11821 * Special case to provide identical behaviour 11822 * between pull and push parsers on enpty CDATA 11823 * sections 11824 */ 11825 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11826 (!strncmp((const char *)&ctxt->input->cur[-9], 11827 "<![CDATA[", 9))) 11828 ctxt->sax->cdataBlock(ctxt->userData, 11829 BAD_CAST "", 0); 11830 } else if ((ctxt->sax != NULL) && (base > 0) && 11831 (!ctxt->disableSAX)) { 11832 if (ctxt->sax->cdataBlock != NULL) 11833 ctxt->sax->cdataBlock(ctxt->userData, 11834 ctxt->input->cur, base); 11835 else if (ctxt->sax->characters != NULL) 11836 ctxt->sax->characters(ctxt->userData, 11837 ctxt->input->cur, base); 11838 } 11839 if (ctxt->instate == XML_PARSER_EOF) 11840 goto done; 11841 SKIPL(base + 3); 11842 ctxt->checkIndex = 0; 11843 ctxt->instate = XML_PARSER_CONTENT; 11844#ifdef DEBUG_PUSH 11845 xmlGenericError(xmlGenericErrorContext, 11846 "PP: entering CONTENT\n"); 11847#endif 11848 } 11849 break; 11850 } 11851 case XML_PARSER_MISC: 11852 SKIP_BLANKS; 11853 if (ctxt->input->buf == NULL) 11854 avail = ctxt->input->length - 11855 (ctxt->input->cur - ctxt->input->base); 11856 else 11857 avail = xmlBufUse(ctxt->input->buf->buffer) - 11858 (ctxt->input->cur - ctxt->input->base); 11859 if (avail < 2) 11860 goto done; 11861 cur = ctxt->input->cur[0]; 11862 next = ctxt->input->cur[1]; 11863 if ((cur == '<') && (next == '?')) { 11864 if ((!terminate) && 11865 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11866 ctxt->progressive = XML_PARSER_PI; 11867 goto done; 11868 } 11869#ifdef DEBUG_PUSH 11870 xmlGenericError(xmlGenericErrorContext, 11871 "PP: Parsing PI\n"); 11872#endif 11873 xmlParsePI(ctxt); 11874 if (ctxt->instate == XML_PARSER_EOF) 11875 goto done; 11876 ctxt->instate = XML_PARSER_MISC; 11877 ctxt->progressive = 1; 11878 ctxt->checkIndex = 0; 11879 } else if ((cur == '<') && (next == '!') && 11880 (ctxt->input->cur[2] == '-') && 11881 (ctxt->input->cur[3] == '-')) { 11882 if ((!terminate) && 11883 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11884 ctxt->progressive = XML_PARSER_COMMENT; 11885 goto done; 11886 } 11887#ifdef DEBUG_PUSH 11888 xmlGenericError(xmlGenericErrorContext, 11889 "PP: Parsing Comment\n"); 11890#endif 11891 xmlParseComment(ctxt); 11892 if (ctxt->instate == XML_PARSER_EOF) 11893 goto done; 11894 ctxt->instate = XML_PARSER_MISC; 11895 ctxt->progressive = 1; 11896 ctxt->checkIndex = 0; 11897 } else if ((cur == '<') && (next == '!') && 11898 (ctxt->input->cur[2] == 'D') && 11899 (ctxt->input->cur[3] == 'O') && 11900 (ctxt->input->cur[4] == 'C') && 11901 (ctxt->input->cur[5] == 'T') && 11902 (ctxt->input->cur[6] == 'Y') && 11903 (ctxt->input->cur[7] == 'P') && 11904 (ctxt->input->cur[8] == 'E')) { 11905 if ((!terminate) && 11906 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11907 ctxt->progressive = XML_PARSER_DTD; 11908 goto done; 11909 } 11910#ifdef DEBUG_PUSH 11911 xmlGenericError(xmlGenericErrorContext, 11912 "PP: Parsing internal subset\n"); 11913#endif 11914 ctxt->inSubset = 1; 11915 ctxt->progressive = 0; 11916 ctxt->checkIndex = 0; 11917 xmlParseDocTypeDecl(ctxt); 11918 if (ctxt->instate == XML_PARSER_EOF) 11919 goto done; 11920 if (RAW == '[') { 11921 ctxt->instate = XML_PARSER_DTD; 11922#ifdef DEBUG_PUSH 11923 xmlGenericError(xmlGenericErrorContext, 11924 "PP: entering DTD\n"); 11925#endif 11926 } else { 11927 /* 11928 * Create and update the external subset. 11929 */ 11930 ctxt->inSubset = 2; 11931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11932 (ctxt->sax->externalSubset != NULL)) 11933 ctxt->sax->externalSubset(ctxt->userData, 11934 ctxt->intSubName, ctxt->extSubSystem, 11935 ctxt->extSubURI); 11936 ctxt->inSubset = 0; 11937 xmlCleanSpecialAttr(ctxt); 11938 ctxt->instate = XML_PARSER_PROLOG; 11939#ifdef DEBUG_PUSH 11940 xmlGenericError(xmlGenericErrorContext, 11941 "PP: entering PROLOG\n"); 11942#endif 11943 } 11944 } else if ((cur == '<') && (next == '!') && 11945 (avail < 9)) { 11946 goto done; 11947 } else { 11948 ctxt->instate = XML_PARSER_START_TAG; 11949 ctxt->progressive = XML_PARSER_START_TAG; 11950 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11951#ifdef DEBUG_PUSH 11952 xmlGenericError(xmlGenericErrorContext, 11953 "PP: entering START_TAG\n"); 11954#endif 11955 } 11956 break; 11957 case XML_PARSER_PROLOG: 11958 SKIP_BLANKS; 11959 if (ctxt->input->buf == NULL) 11960 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11961 else 11962 avail = xmlBufUse(ctxt->input->buf->buffer) - 11963 (ctxt->input->cur - ctxt->input->base); 11964 if (avail < 2) 11965 goto done; 11966 cur = ctxt->input->cur[0]; 11967 next = ctxt->input->cur[1]; 11968 if ((cur == '<') && (next == '?')) { 11969 if ((!terminate) && 11970 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11971 ctxt->progressive = XML_PARSER_PI; 11972 goto done; 11973 } 11974#ifdef DEBUG_PUSH 11975 xmlGenericError(xmlGenericErrorContext, 11976 "PP: Parsing PI\n"); 11977#endif 11978 xmlParsePI(ctxt); 11979 if (ctxt->instate == XML_PARSER_EOF) 11980 goto done; 11981 ctxt->instate = XML_PARSER_PROLOG; 11982 ctxt->progressive = 1; 11983 } else if ((cur == '<') && (next == '!') && 11984 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11985 if ((!terminate) && 11986 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11987 ctxt->progressive = XML_PARSER_COMMENT; 11988 goto done; 11989 } 11990#ifdef DEBUG_PUSH 11991 xmlGenericError(xmlGenericErrorContext, 11992 "PP: Parsing Comment\n"); 11993#endif 11994 xmlParseComment(ctxt); 11995 if (ctxt->instate == XML_PARSER_EOF) 11996 goto done; 11997 ctxt->instate = XML_PARSER_PROLOG; 11998 ctxt->progressive = 1; 11999 } else if ((cur == '<') && (next == '!') && 12000 (avail < 4)) { 12001 goto done; 12002 } else { 12003 ctxt->instate = XML_PARSER_START_TAG; 12004 if (ctxt->progressive == 0) 12005 ctxt->progressive = XML_PARSER_START_TAG; 12006 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 12007#ifdef DEBUG_PUSH 12008 xmlGenericError(xmlGenericErrorContext, 12009 "PP: entering START_TAG\n"); 12010#endif 12011 } 12012 break; 12013 case XML_PARSER_EPILOG: 12014 SKIP_BLANKS; 12015 if (ctxt->input->buf == NULL) 12016 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12017 else 12018 avail = xmlBufUse(ctxt->input->buf->buffer) - 12019 (ctxt->input->cur - ctxt->input->base); 12020 if (avail < 2) 12021 goto done; 12022 cur = ctxt->input->cur[0]; 12023 next = ctxt->input->cur[1]; 12024 if ((cur == '<') && (next == '?')) { 12025 if ((!terminate) && 12026 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12027 ctxt->progressive = XML_PARSER_PI; 12028 goto done; 12029 } 12030#ifdef DEBUG_PUSH 12031 xmlGenericError(xmlGenericErrorContext, 12032 "PP: Parsing PI\n"); 12033#endif 12034 xmlParsePI(ctxt); 12035 if (ctxt->instate == XML_PARSER_EOF) 12036 goto done; 12037 ctxt->instate = XML_PARSER_EPILOG; 12038 ctxt->progressive = 1; 12039 } else if ((cur == '<') && (next == '!') && 12040 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12041 if ((!terminate) && 12042 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12043 ctxt->progressive = XML_PARSER_COMMENT; 12044 goto done; 12045 } 12046#ifdef DEBUG_PUSH 12047 xmlGenericError(xmlGenericErrorContext, 12048 "PP: Parsing Comment\n"); 12049#endif 12050 xmlParseComment(ctxt); 12051 if (ctxt->instate == XML_PARSER_EOF) 12052 goto done; 12053 ctxt->instate = XML_PARSER_EPILOG; 12054 ctxt->progressive = 1; 12055 } else if ((cur == '<') && (next == '!') && 12056 (avail < 4)) { 12057 goto done; 12058 } else { 12059 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12060 xmlHaltParser(ctxt); 12061#ifdef DEBUG_PUSH 12062 xmlGenericError(xmlGenericErrorContext, 12063 "PP: entering EOF\n"); 12064#endif 12065 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12066 ctxt->sax->endDocument(ctxt->userData); 12067 goto done; 12068 } 12069 break; 12070 case XML_PARSER_DTD: { 12071 /* 12072 * Sorry but progressive parsing of the internal subset 12073 * is not expected to be supported. We first check that 12074 * the full content of the internal subset is available and 12075 * the parsing is launched only at that point. 12076 * Internal subset ends up with "']' S? '>'" in an unescaped 12077 * section and not in a ']]>' sequence which are conditional 12078 * sections (whoever argued to keep that crap in XML deserve 12079 * a place in hell !). 12080 */ 12081 int base, i; 12082 xmlChar *buf; 12083 xmlChar quote = 0; 12084 size_t use; 12085 12086 base = ctxt->input->cur - ctxt->input->base; 12087 if (base < 0) return(0); 12088 if (ctxt->checkIndex > base) 12089 base = ctxt->checkIndex; 12090 buf = xmlBufContent(ctxt->input->buf->buffer); 12091 use = xmlBufUse(ctxt->input->buf->buffer); 12092 for (;(unsigned int) base < use; base++) { 12093 if (quote != 0) { 12094 if (buf[base] == quote) 12095 quote = 0; 12096 continue; 12097 } 12098 if ((quote == 0) && (buf[base] == '<')) { 12099 int found = 0; 12100 /* special handling of comments */ 12101 if (((unsigned int) base + 4 < use) && 12102 (buf[base + 1] == '!') && 12103 (buf[base + 2] == '-') && 12104 (buf[base + 3] == '-')) { 12105 for (;(unsigned int) base + 3 < use; base++) { 12106 if ((buf[base] == '-') && 12107 (buf[base + 1] == '-') && 12108 (buf[base + 2] == '>')) { 12109 found = 1; 12110 base += 2; 12111 break; 12112 } 12113 } 12114 if (!found) { 12115#if 0 12116 fprintf(stderr, "unfinished comment\n"); 12117#endif 12118 break; /* for */ 12119 } 12120 continue; 12121 } 12122 } 12123 if (buf[base] == '"') { 12124 quote = '"'; 12125 continue; 12126 } 12127 if (buf[base] == '\'') { 12128 quote = '\''; 12129 continue; 12130 } 12131 if (buf[base] == ']') { 12132#if 0 12133 fprintf(stderr, "%c%c%c%c: ", buf[base], 12134 buf[base + 1], buf[base + 2], buf[base + 3]); 12135#endif 12136 if ((unsigned int) base +1 >= use) 12137 break; 12138 if (buf[base + 1] == ']') { 12139 /* conditional crap, skip both ']' ! */ 12140 base++; 12141 continue; 12142 } 12143 for (i = 1; (unsigned int) base + i < use; i++) { 12144 if (buf[base + i] == '>') { 12145#if 0 12146 fprintf(stderr, "found\n"); 12147#endif 12148 goto found_end_int_subset; 12149 } 12150 if (!IS_BLANK_CH(buf[base + i])) { 12151#if 0 12152 fprintf(stderr, "not found\n"); 12153#endif 12154 goto not_end_of_int_subset; 12155 } 12156 } 12157#if 0 12158 fprintf(stderr, "end of stream\n"); 12159#endif 12160 break; 12161 12162 } 12163not_end_of_int_subset: 12164 continue; /* for */ 12165 } 12166 /* 12167 * We didn't found the end of the Internal subset 12168 */ 12169 if (quote == 0) 12170 ctxt->checkIndex = base; 12171 else 12172 ctxt->checkIndex = 0; 12173#ifdef DEBUG_PUSH 12174 if (next == 0) 12175 xmlGenericError(xmlGenericErrorContext, 12176 "PP: lookup of int subset end filed\n"); 12177#endif 12178 goto done; 12179 12180found_end_int_subset: 12181 ctxt->checkIndex = 0; 12182 xmlParseInternalSubset(ctxt); 12183 if (ctxt->instate == XML_PARSER_EOF) 12184 goto done; 12185 ctxt->inSubset = 2; 12186 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12187 (ctxt->sax->externalSubset != NULL)) 12188 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12189 ctxt->extSubSystem, ctxt->extSubURI); 12190 ctxt->inSubset = 0; 12191 xmlCleanSpecialAttr(ctxt); 12192 if (ctxt->instate == XML_PARSER_EOF) 12193 goto done; 12194 ctxt->instate = XML_PARSER_PROLOG; 12195 ctxt->checkIndex = 0; 12196#ifdef DEBUG_PUSH 12197 xmlGenericError(xmlGenericErrorContext, 12198 "PP: entering PROLOG\n"); 12199#endif 12200 break; 12201 } 12202 case XML_PARSER_COMMENT: 12203 xmlGenericError(xmlGenericErrorContext, 12204 "PP: internal error, state == COMMENT\n"); 12205 ctxt->instate = XML_PARSER_CONTENT; 12206#ifdef DEBUG_PUSH 12207 xmlGenericError(xmlGenericErrorContext, 12208 "PP: entering CONTENT\n"); 12209#endif 12210 break; 12211 case XML_PARSER_IGNORE: 12212 xmlGenericError(xmlGenericErrorContext, 12213 "PP: internal error, state == IGNORE"); 12214 ctxt->instate = XML_PARSER_DTD; 12215#ifdef DEBUG_PUSH 12216 xmlGenericError(xmlGenericErrorContext, 12217 "PP: entering DTD\n"); 12218#endif 12219 break; 12220 case XML_PARSER_PI: 12221 xmlGenericError(xmlGenericErrorContext, 12222 "PP: internal error, state == PI\n"); 12223 ctxt->instate = XML_PARSER_CONTENT; 12224#ifdef DEBUG_PUSH 12225 xmlGenericError(xmlGenericErrorContext, 12226 "PP: entering CONTENT\n"); 12227#endif 12228 break; 12229 case XML_PARSER_ENTITY_DECL: 12230 xmlGenericError(xmlGenericErrorContext, 12231 "PP: internal error, state == ENTITY_DECL\n"); 12232 ctxt->instate = XML_PARSER_DTD; 12233#ifdef DEBUG_PUSH 12234 xmlGenericError(xmlGenericErrorContext, 12235 "PP: entering DTD\n"); 12236#endif 12237 break; 12238 case XML_PARSER_ENTITY_VALUE: 12239 xmlGenericError(xmlGenericErrorContext, 12240 "PP: internal error, state == ENTITY_VALUE\n"); 12241 ctxt->instate = XML_PARSER_CONTENT; 12242#ifdef DEBUG_PUSH 12243 xmlGenericError(xmlGenericErrorContext, 12244 "PP: entering DTD\n"); 12245#endif 12246 break; 12247 case XML_PARSER_ATTRIBUTE_VALUE: 12248 xmlGenericError(xmlGenericErrorContext, 12249 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12250 ctxt->instate = XML_PARSER_START_TAG; 12251#ifdef DEBUG_PUSH 12252 xmlGenericError(xmlGenericErrorContext, 12253 "PP: entering START_TAG\n"); 12254#endif 12255 break; 12256 case XML_PARSER_SYSTEM_LITERAL: 12257 xmlGenericError(xmlGenericErrorContext, 12258 "PP: internal error, state == SYSTEM_LITERAL\n"); 12259 ctxt->instate = XML_PARSER_START_TAG; 12260#ifdef DEBUG_PUSH 12261 xmlGenericError(xmlGenericErrorContext, 12262 "PP: entering START_TAG\n"); 12263#endif 12264 break; 12265 case XML_PARSER_PUBLIC_LITERAL: 12266 xmlGenericError(xmlGenericErrorContext, 12267 "PP: internal error, state == PUBLIC_LITERAL\n"); 12268 ctxt->instate = XML_PARSER_START_TAG; 12269#ifdef DEBUG_PUSH 12270 xmlGenericError(xmlGenericErrorContext, 12271 "PP: entering START_TAG\n"); 12272#endif 12273 break; 12274 } 12275 } 12276done: 12277#ifdef DEBUG_PUSH 12278 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12279#endif 12280 return(ret); 12281encoding_error: 12282 { 12283 char buffer[150]; 12284 12285 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12286 ctxt->input->cur[0], ctxt->input->cur[1], 12287 ctxt->input->cur[2], ctxt->input->cur[3]); 12288 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12289 "Input is not proper UTF-8, indicate encoding !\n%s", 12290 BAD_CAST buffer, NULL); 12291 } 12292 return(0); 12293} 12294 12295/** 12296 * xmlParseCheckTransition: 12297 * @ctxt: an XML parser context 12298 * @chunk: a char array 12299 * @size: the size in byte of the chunk 12300 * 12301 * Check depending on the current parser state if the chunk given must be 12302 * processed immediately or one need more data to advance on parsing. 12303 * 12304 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12305 */ 12306static int 12307xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12308 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12309 return(-1); 12310 if (ctxt->instate == XML_PARSER_START_TAG) { 12311 if (memchr(chunk, '>', size) != NULL) 12312 return(1); 12313 return(0); 12314 } 12315 if (ctxt->progressive == XML_PARSER_COMMENT) { 12316 if (memchr(chunk, '>', size) != NULL) 12317 return(1); 12318 return(0); 12319 } 12320 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12321 if (memchr(chunk, '>', size) != NULL) 12322 return(1); 12323 return(0); 12324 } 12325 if (ctxt->progressive == XML_PARSER_PI) { 12326 if (memchr(chunk, '>', size) != NULL) 12327 return(1); 12328 return(0); 12329 } 12330 if (ctxt->instate == XML_PARSER_END_TAG) { 12331 if (memchr(chunk, '>', size) != NULL) 12332 return(1); 12333 return(0); 12334 } 12335 if ((ctxt->progressive == XML_PARSER_DTD) || 12336 (ctxt->instate == XML_PARSER_DTD)) { 12337 if (memchr(chunk, '>', size) != NULL) 12338 return(1); 12339 return(0); 12340 } 12341 return(1); 12342} 12343 12344/** 12345 * xmlParseChunk: 12346 * @ctxt: an XML parser context 12347 * @chunk: an char array 12348 * @size: the size in byte of the chunk 12349 * @terminate: last chunk indicator 12350 * 12351 * Parse a Chunk of memory 12352 * 12353 * Returns zero if no error, the xmlParserErrors otherwise. 12354 */ 12355int 12356xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12357 int terminate) { 12358 int end_in_lf = 0; 12359 int remain = 0; 12360 size_t old_avail = 0; 12361 size_t avail = 0; 12362 12363 if (ctxt == NULL) 12364 return(XML_ERR_INTERNAL_ERROR); 12365 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12366 return(ctxt->errNo); 12367 if (ctxt->instate == XML_PARSER_EOF) 12368 return(-1); 12369 if (ctxt->instate == XML_PARSER_START) 12370 xmlDetectSAX2(ctxt); 12371 if ((size > 0) && (chunk != NULL) && (!terminate) && 12372 (chunk[size - 1] == '\r')) { 12373 end_in_lf = 1; 12374 size--; 12375 } 12376 12377xmldecl_done: 12378 12379 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12380 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12381 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12382 size_t cur = ctxt->input->cur - ctxt->input->base; 12383 int res; 12384 12385 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12386 /* 12387 * Specific handling if we autodetected an encoding, we should not 12388 * push more than the first line ... which depend on the encoding 12389 * And only push the rest once the final encoding was detected 12390 */ 12391 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12392 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12393 unsigned int len = 45; 12394 12395 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12396 BAD_CAST "UTF-16")) || 12397 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12398 BAD_CAST "UTF16"))) 12399 len = 90; 12400 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12401 BAD_CAST "UCS-4")) || 12402 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12403 BAD_CAST "UCS4"))) 12404 len = 180; 12405 12406 if (ctxt->input->buf->rawconsumed < len) 12407 len -= ctxt->input->buf->rawconsumed; 12408 12409 /* 12410 * Change size for reading the initial declaration only 12411 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12412 * will blindly copy extra bytes from memory. 12413 */ 12414 if ((unsigned int) size > len) { 12415 remain = size - len; 12416 size = len; 12417 } else { 12418 remain = 0; 12419 } 12420 } 12421 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12422 if (res < 0) { 12423 ctxt->errNo = XML_PARSER_EOF; 12424 xmlHaltParser(ctxt); 12425 return (XML_PARSER_EOF); 12426 } 12427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12428#ifdef DEBUG_PUSH 12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12430#endif 12431 12432 } else if (ctxt->instate != XML_PARSER_EOF) { 12433 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12434 xmlParserInputBufferPtr in = ctxt->input->buf; 12435 if ((in->encoder != NULL) && (in->buffer != NULL) && 12436 (in->raw != NULL)) { 12437 int nbchars; 12438 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12439 size_t current = ctxt->input->cur - ctxt->input->base; 12440 12441 nbchars = xmlCharEncInput(in, terminate); 12442 if (nbchars < 0) { 12443 /* TODO 2.6.0 */ 12444 xmlGenericError(xmlGenericErrorContext, 12445 "xmlParseChunk: encoder error\n"); 12446 return(XML_ERR_INVALID_ENCODING); 12447 } 12448 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12449 } 12450 } 12451 } 12452 if (remain != 0) { 12453 xmlParseTryOrFinish(ctxt, 0); 12454 } else { 12455 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12456 avail = xmlBufUse(ctxt->input->buf->buffer); 12457 /* 12458 * Depending on the current state it may not be such 12459 * a good idea to try parsing if there is nothing in the chunk 12460 * which would be worth doing a parser state transition and we 12461 * need to wait for more data 12462 */ 12463 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12464 (old_avail == 0) || (avail == 0) || 12465 (xmlParseCheckTransition(ctxt, 12466 (const char *)&ctxt->input->base[old_avail], 12467 avail - old_avail))) 12468 xmlParseTryOrFinish(ctxt, terminate); 12469 } 12470 if (ctxt->instate == XML_PARSER_EOF) 12471 return(ctxt->errNo); 12472 12473 if ((ctxt->input != NULL) && 12474 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12475 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12476 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12477 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12478 xmlHaltParser(ctxt); 12479 } 12480 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12481 return(ctxt->errNo); 12482 12483 if (remain != 0) { 12484 chunk += size; 12485 size = remain; 12486 remain = 0; 12487 goto xmldecl_done; 12488 } 12489 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12490 (ctxt->input->buf != NULL)) { 12491 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12492 ctxt->input); 12493 size_t current = ctxt->input->cur - ctxt->input->base; 12494 12495 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12496 12497 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12498 base, current); 12499 } 12500 if (terminate) { 12501 /* 12502 * Check for termination 12503 */ 12504 int cur_avail = 0; 12505 12506 if (ctxt->input != NULL) { 12507 if (ctxt->input->buf == NULL) 12508 cur_avail = ctxt->input->length - 12509 (ctxt->input->cur - ctxt->input->base); 12510 else 12511 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12512 (ctxt->input->cur - ctxt->input->base); 12513 } 12514 12515 if ((ctxt->instate != XML_PARSER_EOF) && 12516 (ctxt->instate != XML_PARSER_EPILOG)) { 12517 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12518 } 12519 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12520 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12521 } 12522 if (ctxt->instate != XML_PARSER_EOF) { 12523 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12524 ctxt->sax->endDocument(ctxt->userData); 12525 } 12526 ctxt->instate = XML_PARSER_EOF; 12527 } 12528 if (ctxt->wellFormed == 0) 12529 return((xmlParserErrors) ctxt->errNo); 12530 else 12531 return(0); 12532} 12533 12534/************************************************************************ 12535 * * 12536 * I/O front end functions to the parser * 12537 * * 12538 ************************************************************************/ 12539 12540/** 12541 * xmlCreatePushParserCtxt: 12542 * @sax: a SAX handler 12543 * @user_data: The user data returned on SAX callbacks 12544 * @chunk: a pointer to an array of chars 12545 * @size: number of chars in the array 12546 * @filename: an optional file name or URI 12547 * 12548 * Create a parser context for using the XML parser in push mode. 12549 * If @buffer and @size are non-NULL, the data is used to detect 12550 * the encoding. The remaining characters will be parsed so they 12551 * don't need to be fed in again through xmlParseChunk. 12552 * To allow content encoding detection, @size should be >= 4 12553 * The value of @filename is used for fetching external entities 12554 * and error/warning reports. 12555 * 12556 * Returns the new parser context or NULL 12557 */ 12558 12559xmlParserCtxtPtr 12560xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12561 const char *chunk, int size, const char *filename) { 12562 xmlParserCtxtPtr ctxt; 12563 xmlParserInputPtr inputStream; 12564 xmlParserInputBufferPtr buf; 12565 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12566 12567 /* 12568 * plug some encoding conversion routines 12569 */ 12570 if ((chunk != NULL) && (size >= 4)) 12571 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12572 12573 buf = xmlAllocParserInputBuffer(enc); 12574 if (buf == NULL) return(NULL); 12575 12576 ctxt = xmlNewParserCtxt(); 12577 if (ctxt == NULL) { 12578 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12579 xmlFreeParserInputBuffer(buf); 12580 return(NULL); 12581 } 12582 ctxt->dictNames = 1; 12583 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12584 if (ctxt->pushTab == NULL) { 12585 xmlErrMemory(ctxt, NULL); 12586 xmlFreeParserInputBuffer(buf); 12587 xmlFreeParserCtxt(ctxt); 12588 return(NULL); 12589 } 12590 if (sax != NULL) { 12591#ifdef LIBXML_SAX1_ENABLED 12592 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12593#endif /* LIBXML_SAX1_ENABLED */ 12594 xmlFree(ctxt->sax); 12595 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12596 if (ctxt->sax == NULL) { 12597 xmlErrMemory(ctxt, NULL); 12598 xmlFreeParserInputBuffer(buf); 12599 xmlFreeParserCtxt(ctxt); 12600 return(NULL); 12601 } 12602 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12603 if (sax->initialized == XML_SAX2_MAGIC) 12604 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12605 else 12606 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12607 if (user_data != NULL) 12608 ctxt->userData = user_data; 12609 } 12610 if (filename == NULL) { 12611 ctxt->directory = NULL; 12612 } else { 12613 ctxt->directory = xmlParserGetDirectory(filename); 12614 } 12615 12616 inputStream = xmlNewInputStream(ctxt); 12617 if (inputStream == NULL) { 12618 xmlFreeParserCtxt(ctxt); 12619 xmlFreeParserInputBuffer(buf); 12620 return(NULL); 12621 } 12622 12623 if (filename == NULL) 12624 inputStream->filename = NULL; 12625 else { 12626 inputStream->filename = (char *) 12627 xmlCanonicPath((const xmlChar *) filename); 12628 if (inputStream->filename == NULL) { 12629 xmlFreeParserCtxt(ctxt); 12630 xmlFreeParserInputBuffer(buf); 12631 return(NULL); 12632 } 12633 } 12634 inputStream->buf = buf; 12635 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12636 inputPush(ctxt, inputStream); 12637 12638 /* 12639 * If the caller didn't provide an initial 'chunk' for determining 12640 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12641 * that it can be automatically determined later 12642 */ 12643 if ((size == 0) || (chunk == NULL)) { 12644 ctxt->charset = XML_CHAR_ENCODING_NONE; 12645 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12646 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12647 size_t cur = ctxt->input->cur - ctxt->input->base; 12648 12649 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12650 12651 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12652#ifdef DEBUG_PUSH 12653 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12654#endif 12655 } 12656 12657 if (enc != XML_CHAR_ENCODING_NONE) { 12658 xmlSwitchEncoding(ctxt, enc); 12659 } 12660 12661 return(ctxt); 12662} 12663#endif /* LIBXML_PUSH_ENABLED */ 12664 12665/** 12666 * xmlHaltParser: 12667 * @ctxt: an XML parser context 12668 * 12669 * Blocks further parser processing don't override error 12670 * for internal use 12671 */ 12672static void 12673xmlHaltParser(xmlParserCtxtPtr ctxt) { 12674 if (ctxt == NULL) 12675 return; 12676 ctxt->instate = XML_PARSER_EOF; 12677 ctxt->disableSAX = 1; 12678 if (ctxt->input != NULL) { 12679 /* 12680 * in case there was a specific allocation deallocate before 12681 * overriding base 12682 */ 12683 if (ctxt->input->free != NULL) { 12684 ctxt->input->free((xmlChar *) ctxt->input->base); 12685 ctxt->input->free = NULL; 12686 } 12687 ctxt->input->cur = BAD_CAST""; 12688 ctxt->input->base = ctxt->input->cur; 12689 } 12690} 12691 12692/** 12693 * xmlStopParser: 12694 * @ctxt: an XML parser context 12695 * 12696 * Blocks further parser processing 12697 */ 12698void 12699xmlStopParser(xmlParserCtxtPtr ctxt) { 12700 if (ctxt == NULL) 12701 return; 12702 xmlHaltParser(ctxt); 12703 ctxt->errNo = XML_ERR_USER_STOP; 12704} 12705 12706/** 12707 * xmlCreateIOParserCtxt: 12708 * @sax: a SAX handler 12709 * @user_data: The user data returned on SAX callbacks 12710 * @ioread: an I/O read function 12711 * @ioclose: an I/O close function 12712 * @ioctx: an I/O handler 12713 * @enc: the charset encoding if known 12714 * 12715 * Create a parser context for using the XML parser with an existing 12716 * I/O stream 12717 * 12718 * Returns the new parser context or NULL 12719 */ 12720xmlParserCtxtPtr 12721xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12722 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12723 void *ioctx, xmlCharEncoding enc) { 12724 xmlParserCtxtPtr ctxt; 12725 xmlParserInputPtr inputStream; 12726 xmlParserInputBufferPtr buf; 12727 12728 if (ioread == NULL) return(NULL); 12729 12730 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12731 if (buf == NULL) { 12732 if (ioclose != NULL) 12733 ioclose(ioctx); 12734 return (NULL); 12735 } 12736 12737 ctxt = xmlNewParserCtxt(); 12738 if (ctxt == NULL) { 12739 xmlFreeParserInputBuffer(buf); 12740 return(NULL); 12741 } 12742 if (sax != NULL) { 12743#ifdef LIBXML_SAX1_ENABLED 12744 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12745#endif /* LIBXML_SAX1_ENABLED */ 12746 xmlFree(ctxt->sax); 12747 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12748 if (ctxt->sax == NULL) { 12749 xmlErrMemory(ctxt, NULL); 12750 xmlFreeParserCtxt(ctxt); 12751 return(NULL); 12752 } 12753 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12754 if (sax->initialized == XML_SAX2_MAGIC) 12755 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12756 else 12757 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12758 if (user_data != NULL) 12759 ctxt->userData = user_data; 12760 } 12761 12762 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12763 if (inputStream == NULL) { 12764 xmlFreeParserCtxt(ctxt); 12765 return(NULL); 12766 } 12767 inputPush(ctxt, inputStream); 12768 12769 return(ctxt); 12770} 12771 12772#ifdef LIBXML_VALID_ENABLED 12773/************************************************************************ 12774 * * 12775 * Front ends when parsing a DTD * 12776 * * 12777 ************************************************************************/ 12778 12779/** 12780 * xmlIOParseDTD: 12781 * @sax: the SAX handler block or NULL 12782 * @input: an Input Buffer 12783 * @enc: the charset encoding if known 12784 * 12785 * Load and parse a DTD 12786 * 12787 * Returns the resulting xmlDtdPtr or NULL in case of error. 12788 * @input will be freed by the function in any case. 12789 */ 12790 12791xmlDtdPtr 12792xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12793 xmlCharEncoding enc) { 12794 xmlDtdPtr ret = NULL; 12795 xmlParserCtxtPtr ctxt; 12796 xmlParserInputPtr pinput = NULL; 12797 xmlChar start[4]; 12798 12799 if (input == NULL) 12800 return(NULL); 12801 12802 ctxt = xmlNewParserCtxt(); 12803 if (ctxt == NULL) { 12804 xmlFreeParserInputBuffer(input); 12805 return(NULL); 12806 } 12807 12808 /* We are loading a DTD */ 12809 ctxt->options |= XML_PARSE_DTDLOAD; 12810 12811 /* 12812 * Set-up the SAX context 12813 */ 12814 if (sax != NULL) { 12815 if (ctxt->sax != NULL) 12816 xmlFree(ctxt->sax); 12817 ctxt->sax = sax; 12818 ctxt->userData = ctxt; 12819 } 12820 xmlDetectSAX2(ctxt); 12821 12822 /* 12823 * generate a parser input from the I/O handler 12824 */ 12825 12826 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12827 if (pinput == NULL) { 12828 if (sax != NULL) ctxt->sax = NULL; 12829 xmlFreeParserInputBuffer(input); 12830 xmlFreeParserCtxt(ctxt); 12831 return(NULL); 12832 } 12833 12834 /* 12835 * plug some encoding conversion routines here. 12836 */ 12837 if (xmlPushInput(ctxt, pinput) < 0) { 12838 if (sax != NULL) ctxt->sax = NULL; 12839 xmlFreeParserCtxt(ctxt); 12840 return(NULL); 12841 } 12842 if (enc != XML_CHAR_ENCODING_NONE) { 12843 xmlSwitchEncoding(ctxt, enc); 12844 } 12845 12846 pinput->filename = NULL; 12847 pinput->line = 1; 12848 pinput->col = 1; 12849 pinput->base = ctxt->input->cur; 12850 pinput->cur = ctxt->input->cur; 12851 pinput->free = NULL; 12852 12853 /* 12854 * let's parse that entity knowing it's an external subset. 12855 */ 12856 ctxt->inSubset = 2; 12857 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12858 if (ctxt->myDoc == NULL) { 12859 xmlErrMemory(ctxt, "New Doc failed"); 12860 return(NULL); 12861 } 12862 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12863 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12864 BAD_CAST "none", BAD_CAST "none"); 12865 12866 if ((enc == XML_CHAR_ENCODING_NONE) && 12867 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12868 /* 12869 * Get the 4 first bytes and decode the charset 12870 * if enc != XML_CHAR_ENCODING_NONE 12871 * plug some encoding conversion routines. 12872 */ 12873 start[0] = RAW; 12874 start[1] = NXT(1); 12875 start[2] = NXT(2); 12876 start[3] = NXT(3); 12877 enc = xmlDetectCharEncoding(start, 4); 12878 if (enc != XML_CHAR_ENCODING_NONE) { 12879 xmlSwitchEncoding(ctxt, enc); 12880 } 12881 } 12882 12883 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12884 12885 if (ctxt->myDoc != NULL) { 12886 if (ctxt->wellFormed) { 12887 ret = ctxt->myDoc->extSubset; 12888 ctxt->myDoc->extSubset = NULL; 12889 if (ret != NULL) { 12890 xmlNodePtr tmp; 12891 12892 ret->doc = NULL; 12893 tmp = ret->children; 12894 while (tmp != NULL) { 12895 tmp->doc = NULL; 12896 tmp = tmp->next; 12897 } 12898 } 12899 } else { 12900 ret = NULL; 12901 } 12902 xmlFreeDoc(ctxt->myDoc); 12903 ctxt->myDoc = NULL; 12904 } 12905 if (sax != NULL) ctxt->sax = NULL; 12906 xmlFreeParserCtxt(ctxt); 12907 12908 return(ret); 12909} 12910 12911/** 12912 * xmlSAXParseDTD: 12913 * @sax: the SAX handler block 12914 * @ExternalID: a NAME* containing the External ID of the DTD 12915 * @SystemID: a NAME* containing the URL to the DTD 12916 * 12917 * Load and parse an external subset. 12918 * 12919 * Returns the resulting xmlDtdPtr or NULL in case of error. 12920 */ 12921 12922xmlDtdPtr 12923xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12924 const xmlChar *SystemID) { 12925 xmlDtdPtr ret = NULL; 12926 xmlParserCtxtPtr ctxt; 12927 xmlParserInputPtr input = NULL; 12928 xmlCharEncoding enc; 12929 xmlChar* systemIdCanonic; 12930 12931 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12932 12933 ctxt = xmlNewParserCtxt(); 12934 if (ctxt == NULL) { 12935 return(NULL); 12936 } 12937 12938 /* We are loading a DTD */ 12939 ctxt->options |= XML_PARSE_DTDLOAD; 12940 12941 /* 12942 * Set-up the SAX context 12943 */ 12944 if (sax != NULL) { 12945 if (ctxt->sax != NULL) 12946 xmlFree(ctxt->sax); 12947 ctxt->sax = sax; 12948 ctxt->userData = ctxt; 12949 } 12950 12951 /* 12952 * Canonicalise the system ID 12953 */ 12954 systemIdCanonic = xmlCanonicPath(SystemID); 12955 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12956 xmlFreeParserCtxt(ctxt); 12957 return(NULL); 12958 } 12959 12960 /* 12961 * Ask the Entity resolver to load the damn thing 12962 */ 12963 12964 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12965 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12966 systemIdCanonic); 12967 if (input == NULL) { 12968 if (sax != NULL) ctxt->sax = NULL; 12969 xmlFreeParserCtxt(ctxt); 12970 if (systemIdCanonic != NULL) 12971 xmlFree(systemIdCanonic); 12972 return(NULL); 12973 } 12974 12975 /* 12976 * plug some encoding conversion routines here. 12977 */ 12978 if (xmlPushInput(ctxt, input) < 0) { 12979 if (sax != NULL) ctxt->sax = NULL; 12980 xmlFreeParserCtxt(ctxt); 12981 if (systemIdCanonic != NULL) 12982 xmlFree(systemIdCanonic); 12983 return(NULL); 12984 } 12985 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12986 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12987 xmlSwitchEncoding(ctxt, enc); 12988 } 12989 12990 if (input->filename == NULL) 12991 input->filename = (char *) systemIdCanonic; 12992 else 12993 xmlFree(systemIdCanonic); 12994 input->line = 1; 12995 input->col = 1; 12996 input->base = ctxt->input->cur; 12997 input->cur = ctxt->input->cur; 12998 input->free = NULL; 12999 13000 /* 13001 * let's parse that entity knowing it's an external subset. 13002 */ 13003 ctxt->inSubset = 2; 13004 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 13005 if (ctxt->myDoc == NULL) { 13006 xmlErrMemory(ctxt, "New Doc failed"); 13007 if (sax != NULL) ctxt->sax = NULL; 13008 xmlFreeParserCtxt(ctxt); 13009 return(NULL); 13010 } 13011 ctxt->myDoc->properties = XML_DOC_INTERNAL; 13012 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 13013 ExternalID, SystemID); 13014 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 13015 13016 if (ctxt->myDoc != NULL) { 13017 if (ctxt->wellFormed) { 13018 ret = ctxt->myDoc->extSubset; 13019 ctxt->myDoc->extSubset = NULL; 13020 if (ret != NULL) { 13021 xmlNodePtr tmp; 13022 13023 ret->doc = NULL; 13024 tmp = ret->children; 13025 while (tmp != NULL) { 13026 tmp->doc = NULL; 13027 tmp = tmp->next; 13028 } 13029 } 13030 } else { 13031 ret = NULL; 13032 } 13033 xmlFreeDoc(ctxt->myDoc); 13034 ctxt->myDoc = NULL; 13035 } 13036 if (sax != NULL) ctxt->sax = NULL; 13037 xmlFreeParserCtxt(ctxt); 13038 13039 return(ret); 13040} 13041 13042 13043/** 13044 * xmlParseDTD: 13045 * @ExternalID: a NAME* containing the External ID of the DTD 13046 * @SystemID: a NAME* containing the URL to the DTD 13047 * 13048 * Load and parse an external subset. 13049 * 13050 * Returns the resulting xmlDtdPtr or NULL in case of error. 13051 */ 13052 13053xmlDtdPtr 13054xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 13055 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 13056} 13057#endif /* LIBXML_VALID_ENABLED */ 13058 13059/************************************************************************ 13060 * * 13061 * Front ends when parsing an Entity * 13062 * * 13063 ************************************************************************/ 13064 13065/** 13066 * xmlParseCtxtExternalEntity: 13067 * @ctx: the existing parsing context 13068 * @URL: the URL for the entity to load 13069 * @ID: the System ID for the entity to load 13070 * @lst: the return value for the set of parsed nodes 13071 * 13072 * Parse an external general entity within an existing parsing context 13073 * An external general parsed entity is well-formed if it matches the 13074 * production labeled extParsedEnt. 13075 * 13076 * [78] extParsedEnt ::= TextDecl? content 13077 * 13078 * Returns 0 if the entity is well formed, -1 in case of args problem and 13079 * the parser error code otherwise 13080 */ 13081 13082int 13083xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 13084 const xmlChar *ID, xmlNodePtr *lst) { 13085 xmlParserCtxtPtr ctxt; 13086 xmlDocPtr newDoc; 13087 xmlNodePtr newRoot; 13088 xmlSAXHandlerPtr oldsax = NULL; 13089 int ret = 0; 13090 xmlChar start[4]; 13091 xmlCharEncoding enc; 13092 13093 if (ctx == NULL) return(-1); 13094 13095 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 13096 (ctx->depth > 1024)) { 13097 return(XML_ERR_ENTITY_LOOP); 13098 } 13099 13100 if (lst != NULL) 13101 *lst = NULL; 13102 if ((URL == NULL) && (ID == NULL)) 13103 return(-1); 13104 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 13105 return(-1); 13106 13107 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 13108 if (ctxt == NULL) { 13109 return(-1); 13110 } 13111 13112 oldsax = ctxt->sax; 13113 ctxt->sax = ctx->sax; 13114 xmlDetectSAX2(ctxt); 13115 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13116 if (newDoc == NULL) { 13117 xmlFreeParserCtxt(ctxt); 13118 return(-1); 13119 } 13120 newDoc->properties = XML_DOC_INTERNAL; 13121 if (ctx->myDoc->dict) { 13122 newDoc->dict = ctx->myDoc->dict; 13123 xmlDictReference(newDoc->dict); 13124 } 13125 if (ctx->myDoc != NULL) { 13126 newDoc->intSubset = ctx->myDoc->intSubset; 13127 newDoc->extSubset = ctx->myDoc->extSubset; 13128 } 13129 if (ctx->myDoc->URL != NULL) { 13130 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 13131 } 13132 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13133 if (newRoot == NULL) { 13134 ctxt->sax = oldsax; 13135 xmlFreeParserCtxt(ctxt); 13136 newDoc->intSubset = NULL; 13137 newDoc->extSubset = NULL; 13138 xmlFreeDoc(newDoc); 13139 return(-1); 13140 } 13141 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13142 nodePush(ctxt, newDoc->children); 13143 if (ctx->myDoc == NULL) { 13144 ctxt->myDoc = newDoc; 13145 } else { 13146 ctxt->myDoc = ctx->myDoc; 13147 newDoc->children->doc = ctx->myDoc; 13148 } 13149 13150 /* 13151 * Get the 4 first bytes and decode the charset 13152 * if enc != XML_CHAR_ENCODING_NONE 13153 * plug some encoding conversion routines. 13154 */ 13155 GROW 13156 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13157 start[0] = RAW; 13158 start[1] = NXT(1); 13159 start[2] = NXT(2); 13160 start[3] = NXT(3); 13161 enc = xmlDetectCharEncoding(start, 4); 13162 if (enc != XML_CHAR_ENCODING_NONE) { 13163 xmlSwitchEncoding(ctxt, enc); 13164 } 13165 } 13166 13167 /* 13168 * Parse a possible text declaration first 13169 */ 13170 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13171 xmlParseTextDecl(ctxt); 13172 /* 13173 * An XML-1.0 document can't reference an entity not XML-1.0 13174 */ 13175 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 13176 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13177 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13178 "Version mismatch between document and entity\n"); 13179 } 13180 } 13181 13182 /* 13183 * If the user provided its own SAX callbacks then reuse the 13184 * useData callback field, otherwise the expected setup in a 13185 * DOM builder is to have userData == ctxt 13186 */ 13187 if (ctx->userData == ctx) 13188 ctxt->userData = ctxt; 13189 else 13190 ctxt->userData = ctx->userData; 13191 13192 /* 13193 * Doing validity checking on chunk doesn't make sense 13194 */ 13195 ctxt->instate = XML_PARSER_CONTENT; 13196 ctxt->validate = ctx->validate; 13197 ctxt->valid = ctx->valid; 13198 ctxt->loadsubset = ctx->loadsubset; 13199 ctxt->depth = ctx->depth + 1; 13200 ctxt->replaceEntities = ctx->replaceEntities; 13201 if (ctxt->validate) { 13202 ctxt->vctxt.error = ctx->vctxt.error; 13203 ctxt->vctxt.warning = ctx->vctxt.warning; 13204 } else { 13205 ctxt->vctxt.error = NULL; 13206 ctxt->vctxt.warning = NULL; 13207 } 13208 ctxt->vctxt.nodeTab = NULL; 13209 ctxt->vctxt.nodeNr = 0; 13210 ctxt->vctxt.nodeMax = 0; 13211 ctxt->vctxt.node = NULL; 13212 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13213 ctxt->dict = ctx->dict; 13214 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13215 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13216 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13217 ctxt->dictNames = ctx->dictNames; 13218 ctxt->attsDefault = ctx->attsDefault; 13219 ctxt->attsSpecial = ctx->attsSpecial; 13220 ctxt->linenumbers = ctx->linenumbers; 13221 13222 xmlParseContent(ctxt); 13223 13224 ctx->validate = ctxt->validate; 13225 ctx->valid = ctxt->valid; 13226 if ((RAW == '<') && (NXT(1) == '/')) { 13227 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13228 } else if (RAW != 0) { 13229 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13230 } 13231 if (ctxt->node != newDoc->children) { 13232 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13233 } 13234 13235 if (!ctxt->wellFormed) { 13236 if (ctxt->errNo == 0) 13237 ret = 1; 13238 else 13239 ret = ctxt->errNo; 13240 } else { 13241 if (lst != NULL) { 13242 xmlNodePtr cur; 13243 13244 /* 13245 * Return the newly created nodeset after unlinking it from 13246 * they pseudo parent. 13247 */ 13248 cur = newDoc->children->children; 13249 *lst = cur; 13250 while (cur != NULL) { 13251 cur->parent = NULL; 13252 cur = cur->next; 13253 } 13254 newDoc->children->children = NULL; 13255 } 13256 ret = 0; 13257 } 13258 ctxt->sax = oldsax; 13259 ctxt->dict = NULL; 13260 ctxt->attsDefault = NULL; 13261 ctxt->attsSpecial = NULL; 13262 xmlFreeParserCtxt(ctxt); 13263 newDoc->intSubset = NULL; 13264 newDoc->extSubset = NULL; 13265 xmlFreeDoc(newDoc); 13266 13267 return(ret); 13268} 13269 13270/** 13271 * xmlParseExternalEntityPrivate: 13272 * @doc: the document the chunk pertains to 13273 * @oldctxt: the previous parser context if available 13274 * @sax: the SAX handler bloc (possibly NULL) 13275 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13276 * @depth: Used for loop detection, use 0 13277 * @URL: the URL for the entity to load 13278 * @ID: the System ID for the entity to load 13279 * @list: the return value for the set of parsed nodes 13280 * 13281 * Private version of xmlParseExternalEntity() 13282 * 13283 * Returns 0 if the entity is well formed, -1 in case of args problem and 13284 * the parser error code otherwise 13285 */ 13286 13287static xmlParserErrors 13288xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13289 xmlSAXHandlerPtr sax, 13290 void *user_data, int depth, const xmlChar *URL, 13291 const xmlChar *ID, xmlNodePtr *list) { 13292 xmlParserCtxtPtr ctxt; 13293 xmlDocPtr newDoc; 13294 xmlNodePtr newRoot; 13295 xmlSAXHandlerPtr oldsax = NULL; 13296 xmlParserErrors ret = XML_ERR_OK; 13297 xmlChar start[4]; 13298 xmlCharEncoding enc; 13299 13300 if (((depth > 40) && 13301 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13302 (depth > 1024)) { 13303 return(XML_ERR_ENTITY_LOOP); 13304 } 13305 13306 if (list != NULL) 13307 *list = NULL; 13308 if ((URL == NULL) && (ID == NULL)) 13309 return(XML_ERR_INTERNAL_ERROR); 13310 if (doc == NULL) 13311 return(XML_ERR_INTERNAL_ERROR); 13312 13313 13314 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13315 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13316 ctxt->userData = ctxt; 13317 if (oldctxt != NULL) { 13318 ctxt->_private = oldctxt->_private; 13319 ctxt->loadsubset = oldctxt->loadsubset; 13320 ctxt->validate = oldctxt->validate; 13321 ctxt->external = oldctxt->external; 13322 ctxt->record_info = oldctxt->record_info; 13323 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13324 ctxt->node_seq.length = oldctxt->node_seq.length; 13325 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13326 } else { 13327 /* 13328 * Doing validity checking on chunk without context 13329 * doesn't make sense 13330 */ 13331 ctxt->_private = NULL; 13332 ctxt->validate = 0; 13333 ctxt->external = 2; 13334 ctxt->loadsubset = 0; 13335 } 13336 if (sax != NULL) { 13337 oldsax = ctxt->sax; 13338 ctxt->sax = sax; 13339 if (user_data != NULL) 13340 ctxt->userData = user_data; 13341 } 13342 xmlDetectSAX2(ctxt); 13343 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13344 if (newDoc == NULL) { 13345 ctxt->node_seq.maximum = 0; 13346 ctxt->node_seq.length = 0; 13347 ctxt->node_seq.buffer = NULL; 13348 xmlFreeParserCtxt(ctxt); 13349 return(XML_ERR_INTERNAL_ERROR); 13350 } 13351 newDoc->properties = XML_DOC_INTERNAL; 13352 newDoc->intSubset = doc->intSubset; 13353 newDoc->extSubset = doc->extSubset; 13354 newDoc->dict = doc->dict; 13355 xmlDictReference(newDoc->dict); 13356 13357 if (doc->URL != NULL) { 13358 newDoc->URL = xmlStrdup(doc->URL); 13359 } 13360 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13361 if (newRoot == NULL) { 13362 if (sax != NULL) 13363 ctxt->sax = oldsax; 13364 ctxt->node_seq.maximum = 0; 13365 ctxt->node_seq.length = 0; 13366 ctxt->node_seq.buffer = NULL; 13367 xmlFreeParserCtxt(ctxt); 13368 newDoc->intSubset = NULL; 13369 newDoc->extSubset = NULL; 13370 xmlFreeDoc(newDoc); 13371 return(XML_ERR_INTERNAL_ERROR); 13372 } 13373 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13374 nodePush(ctxt, newDoc->children); 13375 ctxt->myDoc = doc; 13376 newRoot->doc = doc; 13377 13378 /* 13379 * Get the 4 first bytes and decode the charset 13380 * if enc != XML_CHAR_ENCODING_NONE 13381 * plug some encoding conversion routines. 13382 */ 13383 GROW; 13384 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13385 start[0] = RAW; 13386 start[1] = NXT(1); 13387 start[2] = NXT(2); 13388 start[3] = NXT(3); 13389 enc = xmlDetectCharEncoding(start, 4); 13390 if (enc != XML_CHAR_ENCODING_NONE) { 13391 xmlSwitchEncoding(ctxt, enc); 13392 } 13393 } 13394 13395 /* 13396 * Parse a possible text declaration first 13397 */ 13398 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13399 xmlParseTextDecl(ctxt); 13400 } 13401 13402 ctxt->instate = XML_PARSER_CONTENT; 13403 ctxt->depth = depth; 13404 13405 xmlParseContent(ctxt); 13406 13407 if ((RAW == '<') && (NXT(1) == '/')) { 13408 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13409 } else if (RAW != 0) { 13410 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13411 } 13412 if (ctxt->node != newDoc->children) { 13413 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13414 } 13415 13416 if (!ctxt->wellFormed) { 13417 if (ctxt->errNo == 0) 13418 ret = XML_ERR_INTERNAL_ERROR; 13419 else 13420 ret = (xmlParserErrors)ctxt->errNo; 13421 } else { 13422 if (list != NULL) { 13423 xmlNodePtr cur; 13424 13425 /* 13426 * Return the newly created nodeset after unlinking it from 13427 * they pseudo parent. 13428 */ 13429 cur = newDoc->children->children; 13430 *list = cur; 13431 while (cur != NULL) { 13432 cur->parent = NULL; 13433 cur = cur->next; 13434 } 13435 newDoc->children->children = NULL; 13436 } 13437 ret = XML_ERR_OK; 13438 } 13439 13440 /* 13441 * Record in the parent context the number of entities replacement 13442 * done when parsing that reference. 13443 */ 13444 if (oldctxt != NULL) 13445 oldctxt->nbentities += ctxt->nbentities; 13446 13447 /* 13448 * Also record the size of the entity parsed 13449 */ 13450 if (ctxt->input != NULL && oldctxt != NULL) { 13451 oldctxt->sizeentities += ctxt->input->consumed; 13452 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13453 } 13454 /* 13455 * And record the last error if any 13456 */ 13457 if (ctxt->lastError.code != XML_ERR_OK) 13458 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13459 13460 if (sax != NULL) 13461 ctxt->sax = oldsax; 13462 if (oldctxt != NULL) { 13463 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13464 oldctxt->node_seq.length = ctxt->node_seq.length; 13465 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13466 } 13467 ctxt->node_seq.maximum = 0; 13468 ctxt->node_seq.length = 0; 13469 ctxt->node_seq.buffer = NULL; 13470 xmlFreeParserCtxt(ctxt); 13471 newDoc->intSubset = NULL; 13472 newDoc->extSubset = NULL; 13473 xmlFreeDoc(newDoc); 13474 13475 return(ret); 13476} 13477 13478#ifdef LIBXML_SAX1_ENABLED 13479/** 13480 * xmlParseExternalEntity: 13481 * @doc: the document the chunk pertains to 13482 * @sax: the SAX handler bloc (possibly NULL) 13483 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13484 * @depth: Used for loop detection, use 0 13485 * @URL: the URL for the entity to load 13486 * @ID: the System ID for the entity to load 13487 * @lst: the return value for the set of parsed nodes 13488 * 13489 * Parse an external general entity 13490 * An external general parsed entity is well-formed if it matches the 13491 * production labeled extParsedEnt. 13492 * 13493 * [78] extParsedEnt ::= TextDecl? content 13494 * 13495 * Returns 0 if the entity is well formed, -1 in case of args problem and 13496 * the parser error code otherwise 13497 */ 13498 13499int 13500xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13501 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13502 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13503 ID, lst)); 13504} 13505 13506/** 13507 * xmlParseBalancedChunkMemory: 13508 * @doc: the document the chunk pertains to 13509 * @sax: the SAX handler bloc (possibly NULL) 13510 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13511 * @depth: Used for loop detection, use 0 13512 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13513 * @lst: the return value for the set of parsed nodes 13514 * 13515 * Parse a well-balanced chunk of an XML document 13516 * called by the parser 13517 * The allowed sequence for the Well Balanced Chunk is the one defined by 13518 * the content production in the XML grammar: 13519 * 13520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13521 * 13522 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13523 * the parser error code otherwise 13524 */ 13525 13526int 13527xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13528 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13529 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13530 depth, string, lst, 0 ); 13531} 13532#endif /* LIBXML_SAX1_ENABLED */ 13533 13534/** 13535 * xmlParseBalancedChunkMemoryInternal: 13536 * @oldctxt: the existing parsing context 13537 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13538 * @user_data: the user data field for the parser context 13539 * @lst: the return value for the set of parsed nodes 13540 * 13541 * 13542 * Parse a well-balanced chunk of an XML document 13543 * called by the parser 13544 * The allowed sequence for the Well Balanced Chunk is the one defined by 13545 * the content production in the XML grammar: 13546 * 13547 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13548 * 13549 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13550 * error code otherwise 13551 * 13552 * In case recover is set to 1, the nodelist will not be empty even if 13553 * the parsed chunk is not well balanced. 13554 */ 13555static xmlParserErrors 13556xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13557 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13558 xmlParserCtxtPtr ctxt; 13559 xmlDocPtr newDoc = NULL; 13560 xmlNodePtr newRoot; 13561 xmlSAXHandlerPtr oldsax = NULL; 13562 xmlNodePtr content = NULL; 13563 xmlNodePtr last = NULL; 13564 int size; 13565 xmlParserErrors ret = XML_ERR_OK; 13566#ifdef SAX2 13567 int i; 13568#endif 13569 13570 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13571 (oldctxt->depth > 1024)) { 13572 return(XML_ERR_ENTITY_LOOP); 13573 } 13574 13575 13576 if (lst != NULL) 13577 *lst = NULL; 13578 if (string == NULL) 13579 return(XML_ERR_INTERNAL_ERROR); 13580 13581 size = xmlStrlen(string); 13582 13583 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13584 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13585 if (user_data != NULL) 13586 ctxt->userData = user_data; 13587 else 13588 ctxt->userData = ctxt; 13589 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13590 ctxt->dict = oldctxt->dict; 13591 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13592 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13593 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13594 13595#ifdef SAX2 13596 /* propagate namespaces down the entity */ 13597 for (i = 0;i < oldctxt->nsNr;i += 2) { 13598 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13599 } 13600#endif 13601 13602 oldsax = ctxt->sax; 13603 ctxt->sax = oldctxt->sax; 13604 xmlDetectSAX2(ctxt); 13605 ctxt->replaceEntities = oldctxt->replaceEntities; 13606 ctxt->options = oldctxt->options; 13607 13608 ctxt->_private = oldctxt->_private; 13609 if (oldctxt->myDoc == NULL) { 13610 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13611 if (newDoc == NULL) { 13612 ctxt->sax = oldsax; 13613 ctxt->dict = NULL; 13614 xmlFreeParserCtxt(ctxt); 13615 return(XML_ERR_INTERNAL_ERROR); 13616 } 13617 newDoc->properties = XML_DOC_INTERNAL; 13618 newDoc->dict = ctxt->dict; 13619 xmlDictReference(newDoc->dict); 13620 ctxt->myDoc = newDoc; 13621 } else { 13622 ctxt->myDoc = oldctxt->myDoc; 13623 content = ctxt->myDoc->children; 13624 last = ctxt->myDoc->last; 13625 } 13626 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13627 if (newRoot == NULL) { 13628 ctxt->sax = oldsax; 13629 ctxt->dict = NULL; 13630 xmlFreeParserCtxt(ctxt); 13631 if (newDoc != NULL) { 13632 xmlFreeDoc(newDoc); 13633 } 13634 return(XML_ERR_INTERNAL_ERROR); 13635 } 13636 ctxt->myDoc->children = NULL; 13637 ctxt->myDoc->last = NULL; 13638 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13639 nodePush(ctxt, ctxt->myDoc->children); 13640 ctxt->instate = XML_PARSER_CONTENT; 13641 ctxt->depth = oldctxt->depth + 1; 13642 13643 ctxt->validate = 0; 13644 ctxt->loadsubset = oldctxt->loadsubset; 13645 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13646 /* 13647 * ID/IDREF registration will be done in xmlValidateElement below 13648 */ 13649 ctxt->loadsubset |= XML_SKIP_IDS; 13650 } 13651 ctxt->dictNames = oldctxt->dictNames; 13652 ctxt->attsDefault = oldctxt->attsDefault; 13653 ctxt->attsSpecial = oldctxt->attsSpecial; 13654 13655 xmlParseContent(ctxt); 13656 if ((RAW == '<') && (NXT(1) == '/')) { 13657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13658 } else if (RAW != 0) { 13659 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13660 } 13661 if (ctxt->node != ctxt->myDoc->children) { 13662 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13663 } 13664 13665 if (!ctxt->wellFormed) { 13666 if (ctxt->errNo == 0) 13667 ret = XML_ERR_INTERNAL_ERROR; 13668 else 13669 ret = (xmlParserErrors)ctxt->errNo; 13670 } else { 13671 ret = XML_ERR_OK; 13672 } 13673 13674 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13675 xmlNodePtr cur; 13676 13677 /* 13678 * Return the newly created nodeset after unlinking it from 13679 * they pseudo parent. 13680 */ 13681 cur = ctxt->myDoc->children->children; 13682 *lst = cur; 13683 while (cur != NULL) { 13684#ifdef LIBXML_VALID_ENABLED 13685 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13686 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13687 (cur->type == XML_ELEMENT_NODE)) { 13688 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13689 oldctxt->myDoc, cur); 13690 } 13691#endif /* LIBXML_VALID_ENABLED */ 13692 cur->parent = NULL; 13693 cur = cur->next; 13694 } 13695 ctxt->myDoc->children->children = NULL; 13696 } 13697 if (ctxt->myDoc != NULL) { 13698 xmlFreeNode(ctxt->myDoc->children); 13699 ctxt->myDoc->children = content; 13700 ctxt->myDoc->last = last; 13701 } 13702 13703 /* 13704 * Record in the parent context the number of entities replacement 13705 * done when parsing that reference. 13706 */ 13707 if (oldctxt != NULL) 13708 oldctxt->nbentities += ctxt->nbentities; 13709 13710 /* 13711 * Also record the last error if any 13712 */ 13713 if (ctxt->lastError.code != XML_ERR_OK) 13714 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13715 13716 ctxt->sax = oldsax; 13717 ctxt->dict = NULL; 13718 ctxt->attsDefault = NULL; 13719 ctxt->attsSpecial = NULL; 13720 xmlFreeParserCtxt(ctxt); 13721 if (newDoc != NULL) { 13722 xmlFreeDoc(newDoc); 13723 } 13724 13725 return(ret); 13726} 13727 13728/** 13729 * xmlParseInNodeContext: 13730 * @node: the context node 13731 * @data: the input string 13732 * @datalen: the input string length in bytes 13733 * @options: a combination of xmlParserOption 13734 * @lst: the return value for the set of parsed nodes 13735 * 13736 * Parse a well-balanced chunk of an XML document 13737 * within the context (DTD, namespaces, etc ...) of the given node. 13738 * 13739 * The allowed sequence for the data is a Well Balanced Chunk defined by 13740 * the content production in the XML grammar: 13741 * 13742 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13743 * 13744 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13745 * error code otherwise 13746 */ 13747xmlParserErrors 13748xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13749 int options, xmlNodePtr *lst) { 13750#ifdef SAX2 13751 xmlParserCtxtPtr ctxt; 13752 xmlDocPtr doc = NULL; 13753 xmlNodePtr fake, cur; 13754 int nsnr = 0; 13755 13756 xmlParserErrors ret = XML_ERR_OK; 13757 13758 /* 13759 * check all input parameters, grab the document 13760 */ 13761 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13762 return(XML_ERR_INTERNAL_ERROR); 13763 switch (node->type) { 13764 case XML_ELEMENT_NODE: 13765 case XML_ATTRIBUTE_NODE: 13766 case XML_TEXT_NODE: 13767 case XML_CDATA_SECTION_NODE: 13768 case XML_ENTITY_REF_NODE: 13769 case XML_PI_NODE: 13770 case XML_COMMENT_NODE: 13771 case XML_DOCUMENT_NODE: 13772 case XML_HTML_DOCUMENT_NODE: 13773 break; 13774 default: 13775 return(XML_ERR_INTERNAL_ERROR); 13776 13777 } 13778 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13779 (node->type != XML_DOCUMENT_NODE) && 13780 (node->type != XML_HTML_DOCUMENT_NODE)) 13781 node = node->parent; 13782 if (node == NULL) 13783 return(XML_ERR_INTERNAL_ERROR); 13784 if (node->type == XML_ELEMENT_NODE) 13785 doc = node->doc; 13786 else 13787 doc = (xmlDocPtr) node; 13788 if (doc == NULL) 13789 return(XML_ERR_INTERNAL_ERROR); 13790 13791 /* 13792 * allocate a context and set-up everything not related to the 13793 * node position in the tree 13794 */ 13795 if (doc->type == XML_DOCUMENT_NODE) 13796 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13797#ifdef LIBXML_HTML_ENABLED 13798 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13799 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13800 /* 13801 * When parsing in context, it makes no sense to add implied 13802 * elements like html/body/etc... 13803 */ 13804 options |= HTML_PARSE_NOIMPLIED; 13805 } 13806#endif 13807 else 13808 return(XML_ERR_INTERNAL_ERROR); 13809 13810 if (ctxt == NULL) 13811 return(XML_ERR_NO_MEMORY); 13812 13813 /* 13814 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13815 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13816 * we must wait until the last moment to free the original one. 13817 */ 13818 if (doc->dict != NULL) { 13819 if (ctxt->dict != NULL) 13820 xmlDictFree(ctxt->dict); 13821 ctxt->dict = doc->dict; 13822 } else 13823 options |= XML_PARSE_NODICT; 13824 13825 if (doc->encoding != NULL) { 13826 xmlCharEncodingHandlerPtr hdlr; 13827 13828 if (ctxt->encoding != NULL) 13829 xmlFree((xmlChar *) ctxt->encoding); 13830 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13831 13832 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13833 if (hdlr != NULL) { 13834 xmlSwitchToEncoding(ctxt, hdlr); 13835 } else { 13836 return(XML_ERR_UNSUPPORTED_ENCODING); 13837 } 13838 } 13839 13840 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13841 xmlDetectSAX2(ctxt); 13842 ctxt->myDoc = doc; 13843 /* parsing in context, i.e. as within existing content */ 13844 ctxt->instate = XML_PARSER_CONTENT; 13845 13846 fake = xmlNewComment(NULL); 13847 if (fake == NULL) { 13848 xmlFreeParserCtxt(ctxt); 13849 return(XML_ERR_NO_MEMORY); 13850 } 13851 xmlAddChild(node, fake); 13852 13853 if (node->type == XML_ELEMENT_NODE) { 13854 nodePush(ctxt, node); 13855 /* 13856 * initialize the SAX2 namespaces stack 13857 */ 13858 cur = node; 13859 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13860 xmlNsPtr ns = cur->nsDef; 13861 const xmlChar *iprefix, *ihref; 13862 13863 while (ns != NULL) { 13864 if (ctxt->dict) { 13865 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13866 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13867 } else { 13868 iprefix = ns->prefix; 13869 ihref = ns->href; 13870 } 13871 13872 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13873 nsPush(ctxt, iprefix, ihref); 13874 nsnr++; 13875 } 13876 ns = ns->next; 13877 } 13878 cur = cur->parent; 13879 } 13880 } 13881 13882 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13883 /* 13884 * ID/IDREF registration will be done in xmlValidateElement below 13885 */ 13886 ctxt->loadsubset |= XML_SKIP_IDS; 13887 } 13888 13889#ifdef LIBXML_HTML_ENABLED 13890 if (doc->type == XML_HTML_DOCUMENT_NODE) 13891 __htmlParseContent(ctxt); 13892 else 13893#endif 13894 xmlParseContent(ctxt); 13895 13896 nsPop(ctxt, nsnr); 13897 if ((RAW == '<') && (NXT(1) == '/')) { 13898 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13899 } else if (RAW != 0) { 13900 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13901 } 13902 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13903 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13904 ctxt->wellFormed = 0; 13905 } 13906 13907 if (!ctxt->wellFormed) { 13908 if (ctxt->errNo == 0) 13909 ret = XML_ERR_INTERNAL_ERROR; 13910 else 13911 ret = (xmlParserErrors)ctxt->errNo; 13912 } else { 13913 ret = XML_ERR_OK; 13914 } 13915 13916 /* 13917 * Return the newly created nodeset after unlinking it from 13918 * the pseudo sibling. 13919 */ 13920 13921 cur = fake->next; 13922 fake->next = NULL; 13923 node->last = fake; 13924 13925 if (cur != NULL) { 13926 cur->prev = NULL; 13927 } 13928 13929 *lst = cur; 13930 13931 while (cur != NULL) { 13932 cur->parent = NULL; 13933 cur = cur->next; 13934 } 13935 13936 xmlUnlinkNode(fake); 13937 xmlFreeNode(fake); 13938 13939 13940 if (ret != XML_ERR_OK) { 13941 xmlFreeNodeList(*lst); 13942 *lst = NULL; 13943 } 13944 13945 if (doc->dict != NULL) 13946 ctxt->dict = NULL; 13947 xmlFreeParserCtxt(ctxt); 13948 13949 return(ret); 13950#else /* !SAX2 */ 13951 return(XML_ERR_INTERNAL_ERROR); 13952#endif 13953} 13954 13955#ifdef LIBXML_SAX1_ENABLED 13956/** 13957 * xmlParseBalancedChunkMemoryRecover: 13958 * @doc: the document the chunk pertains to 13959 * @sax: the SAX handler bloc (possibly NULL) 13960 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13961 * @depth: Used for loop detection, use 0 13962 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13963 * @lst: the return value for the set of parsed nodes 13964 * @recover: return nodes even if the data is broken (use 0) 13965 * 13966 * 13967 * Parse a well-balanced chunk of an XML document 13968 * called by the parser 13969 * The allowed sequence for the Well Balanced Chunk is the one defined by 13970 * the content production in the XML grammar: 13971 * 13972 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13973 * 13974 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13975 * the parser error code otherwise 13976 * 13977 * In case recover is set to 1, the nodelist will not be empty even if 13978 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13979 * some extent. 13980 */ 13981int 13982xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13983 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13984 int recover) { 13985 xmlParserCtxtPtr ctxt; 13986 xmlDocPtr newDoc; 13987 xmlSAXHandlerPtr oldsax = NULL; 13988 xmlNodePtr content, newRoot; 13989 int size; 13990 int ret = 0; 13991 13992 if (depth > 40) { 13993 return(XML_ERR_ENTITY_LOOP); 13994 } 13995 13996 13997 if (lst != NULL) 13998 *lst = NULL; 13999 if (string == NULL) 14000 return(-1); 14001 14002 size = xmlStrlen(string); 14003 14004 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 14005 if (ctxt == NULL) return(-1); 14006 ctxt->userData = ctxt; 14007 if (sax != NULL) { 14008 oldsax = ctxt->sax; 14009 ctxt->sax = sax; 14010 if (user_data != NULL) 14011 ctxt->userData = user_data; 14012 } 14013 newDoc = xmlNewDoc(BAD_CAST "1.0"); 14014 if (newDoc == NULL) { 14015 xmlFreeParserCtxt(ctxt); 14016 return(-1); 14017 } 14018 newDoc->properties = XML_DOC_INTERNAL; 14019 if ((doc != NULL) && (doc->dict != NULL)) { 14020 xmlDictFree(ctxt->dict); 14021 ctxt->dict = doc->dict; 14022 xmlDictReference(ctxt->dict); 14023 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 14024 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 14025 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 14026 ctxt->dictNames = 1; 14027 } else { 14028 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 14029 } 14030 if (doc != NULL) { 14031 newDoc->intSubset = doc->intSubset; 14032 newDoc->extSubset = doc->extSubset; 14033 } 14034 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 14035 if (newRoot == NULL) { 14036 if (sax != NULL) 14037 ctxt->sax = oldsax; 14038 xmlFreeParserCtxt(ctxt); 14039 newDoc->intSubset = NULL; 14040 newDoc->extSubset = NULL; 14041 xmlFreeDoc(newDoc); 14042 return(-1); 14043 } 14044 xmlAddChild((xmlNodePtr) newDoc, newRoot); 14045 nodePush(ctxt, newRoot); 14046 if (doc == NULL) { 14047 ctxt->myDoc = newDoc; 14048 } else { 14049 ctxt->myDoc = newDoc; 14050 newDoc->children->doc = doc; 14051 /* Ensure that doc has XML spec namespace */ 14052 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 14053 newDoc->oldNs = doc->oldNs; 14054 } 14055 ctxt->instate = XML_PARSER_CONTENT; 14056 ctxt->depth = depth; 14057 14058 /* 14059 * Doing validity checking on chunk doesn't make sense 14060 */ 14061 ctxt->validate = 0; 14062 ctxt->loadsubset = 0; 14063 xmlDetectSAX2(ctxt); 14064 14065 if ( doc != NULL ){ 14066 content = doc->children; 14067 doc->children = NULL; 14068 xmlParseContent(ctxt); 14069 doc->children = content; 14070 } 14071 else { 14072 xmlParseContent(ctxt); 14073 } 14074 if ((RAW == '<') && (NXT(1) == '/')) { 14075 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14076 } else if (RAW != 0) { 14077 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 14078 } 14079 if (ctxt->node != newDoc->children) { 14080 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14081 } 14082 14083 if (!ctxt->wellFormed) { 14084 if (ctxt->errNo == 0) 14085 ret = 1; 14086 else 14087 ret = ctxt->errNo; 14088 } else { 14089 ret = 0; 14090 } 14091 14092 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 14093 xmlNodePtr cur; 14094 14095 /* 14096 * Return the newly created nodeset after unlinking it from 14097 * they pseudo parent. 14098 */ 14099 cur = newDoc->children->children; 14100 *lst = cur; 14101 while (cur != NULL) { 14102 xmlSetTreeDoc(cur, doc); 14103 cur->parent = NULL; 14104 cur = cur->next; 14105 } 14106 newDoc->children->children = NULL; 14107 } 14108 14109 if (sax != NULL) 14110 ctxt->sax = oldsax; 14111 xmlFreeParserCtxt(ctxt); 14112 newDoc->intSubset = NULL; 14113 newDoc->extSubset = NULL; 14114 newDoc->oldNs = NULL; 14115 xmlFreeDoc(newDoc); 14116 14117 return(ret); 14118} 14119 14120/** 14121 * xmlSAXParseEntity: 14122 * @sax: the SAX handler block 14123 * @filename: the filename 14124 * 14125 * parse an XML external entity out of context and build a tree. 14126 * It use the given SAX function block to handle the parsing callback. 14127 * If sax is NULL, fallback to the default DOM tree building routines. 14128 * 14129 * [78] extParsedEnt ::= TextDecl? content 14130 * 14131 * This correspond to a "Well Balanced" chunk 14132 * 14133 * Returns the resulting document tree 14134 */ 14135 14136xmlDocPtr 14137xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 14138 xmlDocPtr ret; 14139 xmlParserCtxtPtr ctxt; 14140 14141 ctxt = xmlCreateFileParserCtxt(filename); 14142 if (ctxt == NULL) { 14143 return(NULL); 14144 } 14145 if (sax != NULL) { 14146 if (ctxt->sax != NULL) 14147 xmlFree(ctxt->sax); 14148 ctxt->sax = sax; 14149 ctxt->userData = NULL; 14150 } 14151 14152 xmlParseExtParsedEnt(ctxt); 14153 14154 if (ctxt->wellFormed) 14155 ret = ctxt->myDoc; 14156 else { 14157 ret = NULL; 14158 xmlFreeDoc(ctxt->myDoc); 14159 ctxt->myDoc = NULL; 14160 } 14161 if (sax != NULL) 14162 ctxt->sax = NULL; 14163 xmlFreeParserCtxt(ctxt); 14164 14165 return(ret); 14166} 14167 14168/** 14169 * xmlParseEntity: 14170 * @filename: the filename 14171 * 14172 * parse an XML external entity out of context and build a tree. 14173 * 14174 * [78] extParsedEnt ::= TextDecl? content 14175 * 14176 * This correspond to a "Well Balanced" chunk 14177 * 14178 * Returns the resulting document tree 14179 */ 14180 14181xmlDocPtr 14182xmlParseEntity(const char *filename) { 14183 return(xmlSAXParseEntity(NULL, filename)); 14184} 14185#endif /* LIBXML_SAX1_ENABLED */ 14186 14187/** 14188 * xmlCreateEntityParserCtxtInternal: 14189 * @URL: the entity URL 14190 * @ID: the entity PUBLIC ID 14191 * @base: a possible base for the target URI 14192 * @pctx: parser context used to set options on new context 14193 * 14194 * Create a parser context for an external entity 14195 * Automatic support for ZLIB/Compress compressed document is provided 14196 * by default if found at compile-time. 14197 * 14198 * Returns the new parser context or NULL 14199 */ 14200static xmlParserCtxtPtr 14201xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14202 const xmlChar *base, xmlParserCtxtPtr pctx) { 14203 xmlParserCtxtPtr ctxt; 14204 xmlParserInputPtr inputStream; 14205 char *directory = NULL; 14206 xmlChar *uri; 14207 14208 ctxt = xmlNewParserCtxt(); 14209 if (ctxt == NULL) { 14210 return(NULL); 14211 } 14212 14213 if (pctx != NULL) { 14214 ctxt->options = pctx->options; 14215 ctxt->_private = pctx->_private; 14216 } 14217 14218 uri = xmlBuildURI(URL, base); 14219 14220 if (uri == NULL) { 14221 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14222 if (inputStream == NULL) { 14223 xmlFreeParserCtxt(ctxt); 14224 return(NULL); 14225 } 14226 14227 inputPush(ctxt, inputStream); 14228 14229 if ((ctxt->directory == NULL) && (directory == NULL)) 14230 directory = xmlParserGetDirectory((char *)URL); 14231 if ((ctxt->directory == NULL) && (directory != NULL)) 14232 ctxt->directory = directory; 14233 } else { 14234 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14235 if (inputStream == NULL) { 14236 xmlFree(uri); 14237 xmlFreeParserCtxt(ctxt); 14238 return(NULL); 14239 } 14240 14241 inputPush(ctxt, inputStream); 14242 14243 if ((ctxt->directory == NULL) && (directory == NULL)) 14244 directory = xmlParserGetDirectory((char *)uri); 14245 if ((ctxt->directory == NULL) && (directory != NULL)) 14246 ctxt->directory = directory; 14247 xmlFree(uri); 14248 } 14249 return(ctxt); 14250} 14251 14252/** 14253 * xmlCreateEntityParserCtxt: 14254 * @URL: the entity URL 14255 * @ID: the entity PUBLIC ID 14256 * @base: a possible base for the target URI 14257 * 14258 * Create a parser context for an external entity 14259 * Automatic support for ZLIB/Compress compressed document is provided 14260 * by default if found at compile-time. 14261 * 14262 * Returns the new parser context or NULL 14263 */ 14264xmlParserCtxtPtr 14265xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14266 const xmlChar *base) { 14267 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14268 14269} 14270 14271/************************************************************************ 14272 * * 14273 * Front ends when parsing from a file * 14274 * * 14275 ************************************************************************/ 14276 14277/** 14278 * xmlCreateURLParserCtxt: 14279 * @filename: the filename or URL 14280 * @options: a combination of xmlParserOption 14281 * 14282 * Create a parser context for a file or URL content. 14283 * Automatic support for ZLIB/Compress compressed document is provided 14284 * by default if found at compile-time and for file accesses 14285 * 14286 * Returns the new parser context or NULL 14287 */ 14288xmlParserCtxtPtr 14289xmlCreateURLParserCtxt(const char *filename, int options) 14290{ 14291 xmlParserCtxtPtr ctxt; 14292 xmlParserInputPtr inputStream; 14293 char *directory = NULL; 14294 14295 ctxt = xmlNewParserCtxt(); 14296 if (ctxt == NULL) { 14297 xmlErrMemory(NULL, "cannot allocate parser context"); 14298 return(NULL); 14299 } 14300 14301 if (options) 14302 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14303 ctxt->linenumbers = 1; 14304 14305 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14306 if (inputStream == NULL) { 14307 xmlFreeParserCtxt(ctxt); 14308 return(NULL); 14309 } 14310 14311 inputPush(ctxt, inputStream); 14312 if ((ctxt->directory == NULL) && (directory == NULL)) 14313 directory = xmlParserGetDirectory(filename); 14314 if ((ctxt->directory == NULL) && (directory != NULL)) 14315 ctxt->directory = directory; 14316 14317 return(ctxt); 14318} 14319 14320/** 14321 * xmlCreateFileParserCtxt: 14322 * @filename: the filename 14323 * 14324 * Create a parser context for a file content. 14325 * Automatic support for ZLIB/Compress compressed document is provided 14326 * by default if found at compile-time. 14327 * 14328 * Returns the new parser context or NULL 14329 */ 14330xmlParserCtxtPtr 14331xmlCreateFileParserCtxt(const char *filename) 14332{ 14333 return(xmlCreateURLParserCtxt(filename, 0)); 14334} 14335 14336#ifdef LIBXML_SAX1_ENABLED 14337/** 14338 * xmlSAXParseFileWithData: 14339 * @sax: the SAX handler block 14340 * @filename: the filename 14341 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14342 * documents 14343 * @data: the userdata 14344 * 14345 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14346 * compressed document is provided by default if found at compile-time. 14347 * It use the given SAX function block to handle the parsing callback. 14348 * If sax is NULL, fallback to the default DOM tree building routines. 14349 * 14350 * User data (void *) is stored within the parser context in the 14351 * context's _private member, so it is available nearly everywhere in libxml 14352 * 14353 * Returns the resulting document tree 14354 */ 14355 14356xmlDocPtr 14357xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14358 int recovery, void *data) { 14359 xmlDocPtr ret; 14360 xmlParserCtxtPtr ctxt; 14361 14362 xmlInitParser(); 14363 14364 ctxt = xmlCreateFileParserCtxt(filename); 14365 if (ctxt == NULL) { 14366 return(NULL); 14367 } 14368 if (sax != NULL) { 14369 if (ctxt->sax != NULL) 14370 xmlFree(ctxt->sax); 14371 ctxt->sax = sax; 14372 } 14373 xmlDetectSAX2(ctxt); 14374 if (data!=NULL) { 14375 ctxt->_private = data; 14376 } 14377 14378 if (ctxt->directory == NULL) 14379 ctxt->directory = xmlParserGetDirectory(filename); 14380 14381 ctxt->recovery = recovery; 14382 14383 xmlParseDocument(ctxt); 14384 14385 if ((ctxt->wellFormed) || recovery) { 14386 ret = ctxt->myDoc; 14387 if (ret != NULL) { 14388 if (ctxt->input->buf->compressed > 0) 14389 ret->compression = 9; 14390 else 14391 ret->compression = ctxt->input->buf->compressed; 14392 } 14393 } 14394 else { 14395 ret = NULL; 14396 xmlFreeDoc(ctxt->myDoc); 14397 ctxt->myDoc = NULL; 14398 } 14399 if (sax != NULL) 14400 ctxt->sax = NULL; 14401 xmlFreeParserCtxt(ctxt); 14402 14403 return(ret); 14404} 14405 14406/** 14407 * xmlSAXParseFile: 14408 * @sax: the SAX handler block 14409 * @filename: the filename 14410 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14411 * documents 14412 * 14413 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14414 * compressed document is provided by default if found at compile-time. 14415 * It use the given SAX function block to handle the parsing callback. 14416 * If sax is NULL, fallback to the default DOM tree building routines. 14417 * 14418 * Returns the resulting document tree 14419 */ 14420 14421xmlDocPtr 14422xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14423 int recovery) { 14424 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14425} 14426 14427/** 14428 * xmlRecoverDoc: 14429 * @cur: a pointer to an array of xmlChar 14430 * 14431 * parse an XML in-memory document and build a tree. 14432 * In the case the document is not Well Formed, a attempt to build a 14433 * tree is tried anyway 14434 * 14435 * Returns the resulting document tree or NULL in case of failure 14436 */ 14437 14438xmlDocPtr 14439xmlRecoverDoc(const xmlChar *cur) { 14440 return(xmlSAXParseDoc(NULL, cur, 1)); 14441} 14442 14443/** 14444 * xmlParseFile: 14445 * @filename: the filename 14446 * 14447 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14448 * compressed document is provided by default if found at compile-time. 14449 * 14450 * Returns the resulting document tree if the file was wellformed, 14451 * NULL otherwise. 14452 */ 14453 14454xmlDocPtr 14455xmlParseFile(const char *filename) { 14456 return(xmlSAXParseFile(NULL, filename, 0)); 14457} 14458 14459/** 14460 * xmlRecoverFile: 14461 * @filename: the filename 14462 * 14463 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14464 * compressed document is provided by default if found at compile-time. 14465 * In the case the document is not Well Formed, it attempts to build 14466 * a tree anyway 14467 * 14468 * Returns the resulting document tree or NULL in case of failure 14469 */ 14470 14471xmlDocPtr 14472xmlRecoverFile(const char *filename) { 14473 return(xmlSAXParseFile(NULL, filename, 1)); 14474} 14475 14476 14477/** 14478 * xmlSetupParserForBuffer: 14479 * @ctxt: an XML parser context 14480 * @buffer: a xmlChar * buffer 14481 * @filename: a file name 14482 * 14483 * Setup the parser context to parse a new buffer; Clears any prior 14484 * contents from the parser context. The buffer parameter must not be 14485 * NULL, but the filename parameter can be 14486 */ 14487void 14488xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14489 const char* filename) 14490{ 14491 xmlParserInputPtr input; 14492 14493 if ((ctxt == NULL) || (buffer == NULL)) 14494 return; 14495 14496 input = xmlNewInputStream(ctxt); 14497 if (input == NULL) { 14498 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14499 xmlClearParserCtxt(ctxt); 14500 return; 14501 } 14502 14503 xmlClearParserCtxt(ctxt); 14504 if (filename != NULL) 14505 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14506 input->base = buffer; 14507 input->cur = buffer; 14508 input->end = &buffer[xmlStrlen(buffer)]; 14509 inputPush(ctxt, input); 14510} 14511 14512/** 14513 * xmlSAXUserParseFile: 14514 * @sax: a SAX handler 14515 * @user_data: The user data returned on SAX callbacks 14516 * @filename: a file name 14517 * 14518 * parse an XML file and call the given SAX handler routines. 14519 * Automatic support for ZLIB/Compress compressed document is provided 14520 * 14521 * Returns 0 in case of success or a error number otherwise 14522 */ 14523int 14524xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14525 const char *filename) { 14526 int ret = 0; 14527 xmlParserCtxtPtr ctxt; 14528 14529 ctxt = xmlCreateFileParserCtxt(filename); 14530 if (ctxt == NULL) return -1; 14531 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14532 xmlFree(ctxt->sax); 14533 ctxt->sax = sax; 14534 xmlDetectSAX2(ctxt); 14535 14536 if (user_data != NULL) 14537 ctxt->userData = user_data; 14538 14539 xmlParseDocument(ctxt); 14540 14541 if (ctxt->wellFormed) 14542 ret = 0; 14543 else { 14544 if (ctxt->errNo != 0) 14545 ret = ctxt->errNo; 14546 else 14547 ret = -1; 14548 } 14549 if (sax != NULL) 14550 ctxt->sax = NULL; 14551 if (ctxt->myDoc != NULL) { 14552 xmlFreeDoc(ctxt->myDoc); 14553 ctxt->myDoc = NULL; 14554 } 14555 xmlFreeParserCtxt(ctxt); 14556 14557 return ret; 14558} 14559#endif /* LIBXML_SAX1_ENABLED */ 14560 14561/************************************************************************ 14562 * * 14563 * Front ends when parsing from memory * 14564 * * 14565 ************************************************************************/ 14566 14567/** 14568 * xmlCreateMemoryParserCtxt: 14569 * @buffer: a pointer to a char array 14570 * @size: the size of the array 14571 * 14572 * Create a parser context for an XML in-memory document. 14573 * 14574 * Returns the new parser context or NULL 14575 */ 14576xmlParserCtxtPtr 14577xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14578 xmlParserCtxtPtr ctxt; 14579 xmlParserInputPtr input; 14580 xmlParserInputBufferPtr buf; 14581 14582 if (buffer == NULL) 14583 return(NULL); 14584 if (size <= 0) 14585 return(NULL); 14586 14587 ctxt = xmlNewParserCtxt(); 14588 if (ctxt == NULL) 14589 return(NULL); 14590 14591 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14592 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14593 if (buf == NULL) { 14594 xmlFreeParserCtxt(ctxt); 14595 return(NULL); 14596 } 14597 14598 input = xmlNewInputStream(ctxt); 14599 if (input == NULL) { 14600 xmlFreeParserInputBuffer(buf); 14601 xmlFreeParserCtxt(ctxt); 14602 return(NULL); 14603 } 14604 14605 input->filename = NULL; 14606 input->buf = buf; 14607 xmlBufResetInput(input->buf->buffer, input); 14608 14609 inputPush(ctxt, input); 14610 return(ctxt); 14611} 14612 14613#ifdef LIBXML_SAX1_ENABLED 14614/** 14615 * xmlSAXParseMemoryWithData: 14616 * @sax: the SAX handler block 14617 * @buffer: an pointer to a char array 14618 * @size: the size of the array 14619 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14620 * documents 14621 * @data: the userdata 14622 * 14623 * parse an XML in-memory block and use the given SAX function block 14624 * to handle the parsing callback. If sax is NULL, fallback to the default 14625 * DOM tree building routines. 14626 * 14627 * User data (void *) is stored within the parser context in the 14628 * context's _private member, so it is available nearly everywhere in libxml 14629 * 14630 * Returns the resulting document tree 14631 */ 14632 14633xmlDocPtr 14634xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14635 int size, int recovery, void *data) { 14636 xmlDocPtr ret; 14637 xmlParserCtxtPtr ctxt; 14638 14639 xmlInitParser(); 14640 14641 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14642 if (ctxt == NULL) return(NULL); 14643 if (sax != NULL) { 14644 if (ctxt->sax != NULL) 14645 xmlFree(ctxt->sax); 14646 ctxt->sax = sax; 14647 } 14648 xmlDetectSAX2(ctxt); 14649 if (data!=NULL) { 14650 ctxt->_private=data; 14651 } 14652 14653 ctxt->recovery = recovery; 14654 14655 xmlParseDocument(ctxt); 14656 14657 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14658 else { 14659 ret = NULL; 14660 xmlFreeDoc(ctxt->myDoc); 14661 ctxt->myDoc = NULL; 14662 } 14663 if (sax != NULL) 14664 ctxt->sax = NULL; 14665 xmlFreeParserCtxt(ctxt); 14666 14667 return(ret); 14668} 14669 14670/** 14671 * xmlSAXParseMemory: 14672 * @sax: the SAX handler block 14673 * @buffer: an pointer to a char array 14674 * @size: the size of the array 14675 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14676 * documents 14677 * 14678 * parse an XML in-memory block and use the given SAX function block 14679 * to handle the parsing callback. If sax is NULL, fallback to the default 14680 * DOM tree building routines. 14681 * 14682 * Returns the resulting document tree 14683 */ 14684xmlDocPtr 14685xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14686 int size, int recovery) { 14687 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14688} 14689 14690/** 14691 * xmlParseMemory: 14692 * @buffer: an pointer to a char array 14693 * @size: the size of the array 14694 * 14695 * parse an XML in-memory block and build a tree. 14696 * 14697 * Returns the resulting document tree 14698 */ 14699 14700xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14701 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14702} 14703 14704/** 14705 * xmlRecoverMemory: 14706 * @buffer: an pointer to a char array 14707 * @size: the size of the array 14708 * 14709 * parse an XML in-memory block and build a tree. 14710 * In the case the document is not Well Formed, an attempt to 14711 * build a tree is tried anyway 14712 * 14713 * Returns the resulting document tree or NULL in case of error 14714 */ 14715 14716xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14717 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14718} 14719 14720/** 14721 * xmlSAXUserParseMemory: 14722 * @sax: a SAX handler 14723 * @user_data: The user data returned on SAX callbacks 14724 * @buffer: an in-memory XML document input 14725 * @size: the length of the XML document in bytes 14726 * 14727 * A better SAX parsing routine. 14728 * parse an XML in-memory buffer and call the given SAX handler routines. 14729 * 14730 * Returns 0 in case of success or a error number otherwise 14731 */ 14732int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14733 const char *buffer, int size) { 14734 int ret = 0; 14735 xmlParserCtxtPtr ctxt; 14736 14737 xmlInitParser(); 14738 14739 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14740 if (ctxt == NULL) return -1; 14741 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14742 xmlFree(ctxt->sax); 14743 ctxt->sax = sax; 14744 xmlDetectSAX2(ctxt); 14745 14746 if (user_data != NULL) 14747 ctxt->userData = user_data; 14748 14749 xmlParseDocument(ctxt); 14750 14751 if (ctxt->wellFormed) 14752 ret = 0; 14753 else { 14754 if (ctxt->errNo != 0) 14755 ret = ctxt->errNo; 14756 else 14757 ret = -1; 14758 } 14759 if (sax != NULL) 14760 ctxt->sax = NULL; 14761 if (ctxt->myDoc != NULL) { 14762 xmlFreeDoc(ctxt->myDoc); 14763 ctxt->myDoc = NULL; 14764 } 14765 xmlFreeParserCtxt(ctxt); 14766 14767 return ret; 14768} 14769#endif /* LIBXML_SAX1_ENABLED */ 14770 14771/** 14772 * xmlCreateDocParserCtxt: 14773 * @cur: a pointer to an array of xmlChar 14774 * 14775 * Creates a parser context for an XML in-memory document. 14776 * 14777 * Returns the new parser context or NULL 14778 */ 14779xmlParserCtxtPtr 14780xmlCreateDocParserCtxt(const xmlChar *cur) { 14781 int len; 14782 14783 if (cur == NULL) 14784 return(NULL); 14785 len = xmlStrlen(cur); 14786 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14787} 14788 14789#ifdef LIBXML_SAX1_ENABLED 14790/** 14791 * xmlSAXParseDoc: 14792 * @sax: the SAX handler block 14793 * @cur: a pointer to an array of xmlChar 14794 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14795 * documents 14796 * 14797 * parse an XML in-memory document and build a tree. 14798 * It use the given SAX function block to handle the parsing callback. 14799 * If sax is NULL, fallback to the default DOM tree building routines. 14800 * 14801 * Returns the resulting document tree 14802 */ 14803 14804xmlDocPtr 14805xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14806 xmlDocPtr ret; 14807 xmlParserCtxtPtr ctxt; 14808 xmlSAXHandlerPtr oldsax = NULL; 14809 14810 if (cur == NULL) return(NULL); 14811 14812 14813 ctxt = xmlCreateDocParserCtxt(cur); 14814 if (ctxt == NULL) return(NULL); 14815 if (sax != NULL) { 14816 oldsax = ctxt->sax; 14817 ctxt->sax = sax; 14818 ctxt->userData = NULL; 14819 } 14820 xmlDetectSAX2(ctxt); 14821 14822 xmlParseDocument(ctxt); 14823 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14824 else { 14825 ret = NULL; 14826 xmlFreeDoc(ctxt->myDoc); 14827 ctxt->myDoc = NULL; 14828 } 14829 if (sax != NULL) 14830 ctxt->sax = oldsax; 14831 xmlFreeParserCtxt(ctxt); 14832 14833 return(ret); 14834} 14835 14836/** 14837 * xmlParseDoc: 14838 * @cur: a pointer to an array of xmlChar 14839 * 14840 * parse an XML in-memory document and build a tree. 14841 * 14842 * Returns the resulting document tree 14843 */ 14844 14845xmlDocPtr 14846xmlParseDoc(const xmlChar *cur) { 14847 return(xmlSAXParseDoc(NULL, cur, 0)); 14848} 14849#endif /* LIBXML_SAX1_ENABLED */ 14850 14851#ifdef LIBXML_LEGACY_ENABLED 14852/************************************************************************ 14853 * * 14854 * Specific function to keep track of entities references * 14855 * and used by the XSLT debugger * 14856 * * 14857 ************************************************************************/ 14858 14859static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14860 14861/** 14862 * xmlAddEntityReference: 14863 * @ent : A valid entity 14864 * @firstNode : A valid first node for children of entity 14865 * @lastNode : A valid last node of children entity 14866 * 14867 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14868 */ 14869static void 14870xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14871 xmlNodePtr lastNode) 14872{ 14873 if (xmlEntityRefFunc != NULL) { 14874 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14875 } 14876} 14877 14878 14879/** 14880 * xmlSetEntityReferenceFunc: 14881 * @func: A valid function 14882 * 14883 * Set the function to call call back when a xml reference has been made 14884 */ 14885void 14886xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14887{ 14888 xmlEntityRefFunc = func; 14889} 14890#endif /* LIBXML_LEGACY_ENABLED */ 14891 14892/************************************************************************ 14893 * * 14894 * Miscellaneous * 14895 * * 14896 ************************************************************************/ 14897 14898#ifdef LIBXML_XPATH_ENABLED 14899#include <libxml/xpath.h> 14900#endif 14901 14902extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14903static int xmlParserInitialized = 0; 14904 14905/** 14906 * xmlInitParser: 14907 * 14908 * Initialization function for the XML parser. 14909 * This is not reentrant. Call once before processing in case of 14910 * use in multithreaded programs. 14911 */ 14912 14913void 14914xmlInitParser(void) { 14915 if (xmlParserInitialized != 0) 14916 return; 14917 14918#ifdef LIBXML_THREAD_ENABLED 14919 __xmlGlobalInitMutexLock(); 14920 if (xmlParserInitialized == 0) { 14921#endif 14922 xmlInitThreads(); 14923 xmlInitGlobals(); 14924 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14925 (xmlGenericError == NULL)) 14926 initGenericErrorDefaultFunc(NULL); 14927 xmlInitMemory(); 14928 xmlInitializeDict(); 14929 xmlInitCharEncodingHandlers(); 14930 xmlDefaultSAXHandlerInit(); 14931 xmlRegisterDefaultInputCallbacks(); 14932#ifdef LIBXML_OUTPUT_ENABLED 14933 xmlRegisterDefaultOutputCallbacks(); 14934#endif /* LIBXML_OUTPUT_ENABLED */ 14935#ifdef LIBXML_HTML_ENABLED 14936 htmlInitAutoClose(); 14937 htmlDefaultSAXHandlerInit(); 14938#endif 14939#ifdef LIBXML_XPATH_ENABLED 14940 xmlXPathInit(); 14941#endif 14942 xmlParserInitialized = 1; 14943#ifdef LIBXML_THREAD_ENABLED 14944 } 14945 __xmlGlobalInitMutexUnlock(); 14946#endif 14947} 14948 14949/** 14950 * xmlCleanupParser: 14951 * 14952 * This function name is somewhat misleading. It does not clean up 14953 * parser state, it cleans up memory allocated by the library itself. 14954 * It is a cleanup function for the XML library. It tries to reclaim all 14955 * related global memory allocated for the library processing. 14956 * It doesn't deallocate any document related memory. One should 14957 * call xmlCleanupParser() only when the process has finished using 14958 * the library and all XML/HTML documents built with it. 14959 * See also xmlInitParser() which has the opposite function of preparing 14960 * the library for operations. 14961 * 14962 * WARNING: if your application is multithreaded or has plugin support 14963 * calling this may crash the application if another thread or 14964 * a plugin is still using libxml2. It's sometimes very hard to 14965 * guess if libxml2 is in use in the application, some libraries 14966 * or plugins may use it without notice. In case of doubt abstain 14967 * from calling this function or do it just before calling exit() 14968 * to avoid leak reports from valgrind ! 14969 */ 14970 14971void 14972xmlCleanupParser(void) { 14973 if (!xmlParserInitialized) 14974 return; 14975 14976 xmlCleanupCharEncodingHandlers(); 14977#ifdef LIBXML_CATALOG_ENABLED 14978 xmlCatalogCleanup(); 14979#endif 14980 xmlDictCleanup(); 14981 xmlCleanupInputCallbacks(); 14982#ifdef LIBXML_OUTPUT_ENABLED 14983 xmlCleanupOutputCallbacks(); 14984#endif 14985#ifdef LIBXML_SCHEMAS_ENABLED 14986 xmlSchemaCleanupTypes(); 14987 xmlRelaxNGCleanupTypes(); 14988#endif 14989 xmlResetLastError(); 14990 xmlCleanupGlobals(); 14991 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14992 xmlCleanupMemory(); 14993 xmlParserInitialized = 0; 14994} 14995 14996/************************************************************************ 14997 * * 14998 * New set (2.6.0) of simpler and more flexible APIs * 14999 * * 15000 ************************************************************************/ 15001 15002/** 15003 * DICT_FREE: 15004 * @str: a string 15005 * 15006 * Free a string if it is not owned by the "dict" dictionary in the 15007 * current scope 15008 */ 15009#define DICT_FREE(str) \ 15010 if ((str) && ((!dict) || \ 15011 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 15012 xmlFree((char *)(str)); 15013 15014/** 15015 * xmlCtxtReset: 15016 * @ctxt: an XML parser context 15017 * 15018 * Reset a parser context 15019 */ 15020void 15021xmlCtxtReset(xmlParserCtxtPtr ctxt) 15022{ 15023 xmlParserInputPtr input; 15024 xmlDictPtr dict; 15025 15026 if (ctxt == NULL) 15027 return; 15028 15029 dict = ctxt->dict; 15030 15031 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 15032 xmlFreeInputStream(input); 15033 } 15034 ctxt->inputNr = 0; 15035 ctxt->input = NULL; 15036 15037 ctxt->spaceNr = 0; 15038 if (ctxt->spaceTab != NULL) { 15039 ctxt->spaceTab[0] = -1; 15040 ctxt->space = &ctxt->spaceTab[0]; 15041 } else { 15042 ctxt->space = NULL; 15043 } 15044 15045 15046 ctxt->nodeNr = 0; 15047 ctxt->node = NULL; 15048 15049 ctxt->nameNr = 0; 15050 ctxt->name = NULL; 15051 15052 DICT_FREE(ctxt->version); 15053 ctxt->version = NULL; 15054 DICT_FREE(ctxt->encoding); 15055 ctxt->encoding = NULL; 15056 DICT_FREE(ctxt->directory); 15057 ctxt->directory = NULL; 15058 DICT_FREE(ctxt->extSubURI); 15059 ctxt->extSubURI = NULL; 15060 DICT_FREE(ctxt->extSubSystem); 15061 ctxt->extSubSystem = NULL; 15062 if (ctxt->myDoc != NULL) 15063 xmlFreeDoc(ctxt->myDoc); 15064 ctxt->myDoc = NULL; 15065 15066 ctxt->standalone = -1; 15067 ctxt->hasExternalSubset = 0; 15068 ctxt->hasPErefs = 0; 15069 ctxt->html = 0; 15070 ctxt->external = 0; 15071 ctxt->instate = XML_PARSER_START; 15072 ctxt->token = 0; 15073 15074 ctxt->wellFormed = 1; 15075 ctxt->nsWellFormed = 1; 15076 ctxt->disableSAX = 0; 15077 ctxt->valid = 1; 15078#if 0 15079 ctxt->vctxt.userData = ctxt; 15080 ctxt->vctxt.error = xmlParserValidityError; 15081 ctxt->vctxt.warning = xmlParserValidityWarning; 15082#endif 15083 ctxt->record_info = 0; 15084 ctxt->nbChars = 0; 15085 ctxt->checkIndex = 0; 15086 ctxt->inSubset = 0; 15087 ctxt->errNo = XML_ERR_OK; 15088 ctxt->depth = 0; 15089 ctxt->charset = XML_CHAR_ENCODING_UTF8; 15090 ctxt->catalogs = NULL; 15091 ctxt->nbentities = 0; 15092 ctxt->sizeentities = 0; 15093 ctxt->sizeentcopy = 0; 15094 xmlInitNodeInfoSeq(&ctxt->node_seq); 15095 15096 if (ctxt->attsDefault != NULL) { 15097 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 15098 ctxt->attsDefault = NULL; 15099 } 15100 if (ctxt->attsSpecial != NULL) { 15101 xmlHashFree(ctxt->attsSpecial, NULL); 15102 ctxt->attsSpecial = NULL; 15103 } 15104 15105#ifdef LIBXML_CATALOG_ENABLED 15106 if (ctxt->catalogs != NULL) 15107 xmlCatalogFreeLocal(ctxt->catalogs); 15108#endif 15109 if (ctxt->lastError.code != XML_ERR_OK) 15110 xmlResetError(&ctxt->lastError); 15111} 15112 15113/** 15114 * xmlCtxtResetPush: 15115 * @ctxt: an XML parser context 15116 * @chunk: a pointer to an array of chars 15117 * @size: number of chars in the array 15118 * @filename: an optional file name or URI 15119 * @encoding: the document encoding, or NULL 15120 * 15121 * Reset a push parser context 15122 * 15123 * Returns 0 in case of success and 1 in case of error 15124 */ 15125int 15126xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 15127 int size, const char *filename, const char *encoding) 15128{ 15129 xmlParserInputPtr inputStream; 15130 xmlParserInputBufferPtr buf; 15131 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 15132 15133 if (ctxt == NULL) 15134 return(1); 15135 15136 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 15137 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 15138 15139 buf = xmlAllocParserInputBuffer(enc); 15140 if (buf == NULL) 15141 return(1); 15142 15143 if (ctxt == NULL) { 15144 xmlFreeParserInputBuffer(buf); 15145 return(1); 15146 } 15147 15148 xmlCtxtReset(ctxt); 15149 15150 if (ctxt->pushTab == NULL) { 15151 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 15152 sizeof(xmlChar *)); 15153 if (ctxt->pushTab == NULL) { 15154 xmlErrMemory(ctxt, NULL); 15155 xmlFreeParserInputBuffer(buf); 15156 return(1); 15157 } 15158 } 15159 15160 if (filename == NULL) { 15161 ctxt->directory = NULL; 15162 } else { 15163 ctxt->directory = xmlParserGetDirectory(filename); 15164 } 15165 15166 inputStream = xmlNewInputStream(ctxt); 15167 if (inputStream == NULL) { 15168 xmlFreeParserInputBuffer(buf); 15169 return(1); 15170 } 15171 15172 if (filename == NULL) 15173 inputStream->filename = NULL; 15174 else 15175 inputStream->filename = (char *) 15176 xmlCanonicPath((const xmlChar *) filename); 15177 inputStream->buf = buf; 15178 xmlBufResetInput(buf->buffer, inputStream); 15179 15180 inputPush(ctxt, inputStream); 15181 15182 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 15183 (ctxt->input->buf != NULL)) { 15184 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15185 size_t cur = ctxt->input->cur - ctxt->input->base; 15186 15187 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15188 15189 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15190#ifdef DEBUG_PUSH 15191 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15192#endif 15193 } 15194 15195 if (encoding != NULL) { 15196 xmlCharEncodingHandlerPtr hdlr; 15197 15198 if (ctxt->encoding != NULL) 15199 xmlFree((xmlChar *) ctxt->encoding); 15200 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15201 15202 hdlr = xmlFindCharEncodingHandler(encoding); 15203 if (hdlr != NULL) { 15204 xmlSwitchToEncoding(ctxt, hdlr); 15205 } else { 15206 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15207 "Unsupported encoding %s\n", BAD_CAST encoding); 15208 } 15209 } else if (enc != XML_CHAR_ENCODING_NONE) { 15210 xmlSwitchEncoding(ctxt, enc); 15211 } 15212 15213 return(0); 15214} 15215 15216 15217/** 15218 * xmlCtxtUseOptionsInternal: 15219 * @ctxt: an XML parser context 15220 * @options: a combination of xmlParserOption 15221 * @encoding: the user provided encoding to use 15222 * 15223 * Applies the options to the parser context 15224 * 15225 * Returns 0 in case of success, the set of unknown or unimplemented options 15226 * in case of error. 15227 */ 15228static int 15229xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15230{ 15231 if (ctxt == NULL) 15232 return(-1); 15233 if (encoding != NULL) { 15234 if (ctxt->encoding != NULL) 15235 xmlFree((xmlChar *) ctxt->encoding); 15236 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15237 } 15238 if (options & XML_PARSE_RECOVER) { 15239 ctxt->recovery = 1; 15240 options -= XML_PARSE_RECOVER; 15241 ctxt->options |= XML_PARSE_RECOVER; 15242 } else 15243 ctxt->recovery = 0; 15244 if (options & XML_PARSE_DTDLOAD) { 15245 ctxt->loadsubset = XML_DETECT_IDS; 15246 options -= XML_PARSE_DTDLOAD; 15247 ctxt->options |= XML_PARSE_DTDLOAD; 15248 } else 15249 ctxt->loadsubset = 0; 15250 if (options & XML_PARSE_DTDATTR) { 15251 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15252 options -= XML_PARSE_DTDATTR; 15253 ctxt->options |= XML_PARSE_DTDATTR; 15254 } 15255 if (options & XML_PARSE_NOENT) { 15256 ctxt->replaceEntities = 1; 15257 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15258 options -= XML_PARSE_NOENT; 15259 ctxt->options |= XML_PARSE_NOENT; 15260 } else 15261 ctxt->replaceEntities = 0; 15262 if (options & XML_PARSE_PEDANTIC) { 15263 ctxt->pedantic = 1; 15264 options -= XML_PARSE_PEDANTIC; 15265 ctxt->options |= XML_PARSE_PEDANTIC; 15266 } else 15267 ctxt->pedantic = 0; 15268 if (options & XML_PARSE_NOBLANKS) { 15269 ctxt->keepBlanks = 0; 15270 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15271 options -= XML_PARSE_NOBLANKS; 15272 ctxt->options |= XML_PARSE_NOBLANKS; 15273 } else 15274 ctxt->keepBlanks = 1; 15275 if (options & XML_PARSE_DTDVALID) { 15276 ctxt->validate = 1; 15277 if (options & XML_PARSE_NOWARNING) 15278 ctxt->vctxt.warning = NULL; 15279 if (options & XML_PARSE_NOERROR) 15280 ctxt->vctxt.error = NULL; 15281 options -= XML_PARSE_DTDVALID; 15282 ctxt->options |= XML_PARSE_DTDVALID; 15283 } else 15284 ctxt->validate = 0; 15285 if (options & XML_PARSE_NOWARNING) { 15286 ctxt->sax->warning = NULL; 15287 options -= XML_PARSE_NOWARNING; 15288 } 15289 if (options & XML_PARSE_NOERROR) { 15290 ctxt->sax->error = NULL; 15291 ctxt->sax->fatalError = NULL; 15292 options -= XML_PARSE_NOERROR; 15293 } 15294#ifdef LIBXML_SAX1_ENABLED 15295 if (options & XML_PARSE_SAX1) { 15296 ctxt->sax->startElement = xmlSAX2StartElement; 15297 ctxt->sax->endElement = xmlSAX2EndElement; 15298 ctxt->sax->startElementNs = NULL; 15299 ctxt->sax->endElementNs = NULL; 15300 ctxt->sax->initialized = 1; 15301 options -= XML_PARSE_SAX1; 15302 ctxt->options |= XML_PARSE_SAX1; 15303 } 15304#endif /* LIBXML_SAX1_ENABLED */ 15305 if (options & XML_PARSE_NODICT) { 15306 ctxt->dictNames = 0; 15307 options -= XML_PARSE_NODICT; 15308 ctxt->options |= XML_PARSE_NODICT; 15309 } else { 15310 ctxt->dictNames = 1; 15311 } 15312 if (options & XML_PARSE_NOCDATA) { 15313 ctxt->sax->cdataBlock = NULL; 15314 options -= XML_PARSE_NOCDATA; 15315 ctxt->options |= XML_PARSE_NOCDATA; 15316 } 15317 if (options & XML_PARSE_NSCLEAN) { 15318 ctxt->options |= XML_PARSE_NSCLEAN; 15319 options -= XML_PARSE_NSCLEAN; 15320 } 15321 if (options & XML_PARSE_NONET) { 15322 ctxt->options |= XML_PARSE_NONET; 15323 options -= XML_PARSE_NONET; 15324 } 15325 if (options & XML_PARSE_COMPACT) { 15326 ctxt->options |= XML_PARSE_COMPACT; 15327 options -= XML_PARSE_COMPACT; 15328 } 15329 if (options & XML_PARSE_OLD10) { 15330 ctxt->options |= XML_PARSE_OLD10; 15331 options -= XML_PARSE_OLD10; 15332 } 15333 if (options & XML_PARSE_NOBASEFIX) { 15334 ctxt->options |= XML_PARSE_NOBASEFIX; 15335 options -= XML_PARSE_NOBASEFIX; 15336 } 15337 if (options & XML_PARSE_HUGE) { 15338 ctxt->options |= XML_PARSE_HUGE; 15339 options -= XML_PARSE_HUGE; 15340 if (ctxt->dict != NULL) 15341 xmlDictSetLimit(ctxt->dict, 0); 15342 } 15343 if (options & XML_PARSE_OLDSAX) { 15344 ctxt->options |= XML_PARSE_OLDSAX; 15345 options -= XML_PARSE_OLDSAX; 15346 } 15347 if (options & XML_PARSE_IGNORE_ENC) { 15348 ctxt->options |= XML_PARSE_IGNORE_ENC; 15349 options -= XML_PARSE_IGNORE_ENC; 15350 } 15351 if (options & XML_PARSE_BIG_LINES) { 15352 ctxt->options |= XML_PARSE_BIG_LINES; 15353 options -= XML_PARSE_BIG_LINES; 15354 } 15355 ctxt->linenumbers = 1; 15356 return (options); 15357} 15358 15359/** 15360 * xmlCtxtUseOptions: 15361 * @ctxt: an XML parser context 15362 * @options: a combination of xmlParserOption 15363 * 15364 * Applies the options to the parser context 15365 * 15366 * Returns 0 in case of success, the set of unknown or unimplemented options 15367 * in case of error. 15368 */ 15369int 15370xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15371{ 15372 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15373} 15374 15375/** 15376 * xmlDoRead: 15377 * @ctxt: an XML parser context 15378 * @URL: the base URL to use for the document 15379 * @encoding: the document encoding, or NULL 15380 * @options: a combination of xmlParserOption 15381 * @reuse: keep the context for reuse 15382 * 15383 * Common front-end for the xmlRead functions 15384 * 15385 * Returns the resulting document tree or NULL 15386 */ 15387static xmlDocPtr 15388xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15389 int options, int reuse) 15390{ 15391 xmlDocPtr ret; 15392 15393 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15394 if (encoding != NULL) { 15395 xmlCharEncodingHandlerPtr hdlr; 15396 15397 hdlr = xmlFindCharEncodingHandler(encoding); 15398 if (hdlr != NULL) 15399 xmlSwitchToEncoding(ctxt, hdlr); 15400 } 15401 if ((URL != NULL) && (ctxt->input != NULL) && 15402 (ctxt->input->filename == NULL)) 15403 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15404 xmlParseDocument(ctxt); 15405 if ((ctxt->wellFormed) || ctxt->recovery) 15406 ret = ctxt->myDoc; 15407 else { 15408 ret = NULL; 15409 if (ctxt->myDoc != NULL) { 15410 xmlFreeDoc(ctxt->myDoc); 15411 } 15412 } 15413 ctxt->myDoc = NULL; 15414 if (!reuse) { 15415 xmlFreeParserCtxt(ctxt); 15416 } 15417 15418 return (ret); 15419} 15420 15421/** 15422 * xmlReadDoc: 15423 * @cur: a pointer to a zero terminated string 15424 * @URL: the base URL to use for the document 15425 * @encoding: the document encoding, or NULL 15426 * @options: a combination of xmlParserOption 15427 * 15428 * parse an XML in-memory document and build a tree. 15429 * 15430 * Returns the resulting document tree 15431 */ 15432xmlDocPtr 15433xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15434{ 15435 xmlParserCtxtPtr ctxt; 15436 15437 if (cur == NULL) 15438 return (NULL); 15439 xmlInitParser(); 15440 15441 ctxt = xmlCreateDocParserCtxt(cur); 15442 if (ctxt == NULL) 15443 return (NULL); 15444 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15445} 15446 15447/** 15448 * xmlReadFile: 15449 * @filename: a file or URL 15450 * @encoding: the document encoding, or NULL 15451 * @options: a combination of xmlParserOption 15452 * 15453 * parse an XML file from the filesystem or the network. 15454 * 15455 * Returns the resulting document tree 15456 */ 15457xmlDocPtr 15458xmlReadFile(const char *filename, const char *encoding, int options) 15459{ 15460 xmlParserCtxtPtr ctxt; 15461 15462 xmlInitParser(); 15463 ctxt = xmlCreateURLParserCtxt(filename, options); 15464 if (ctxt == NULL) 15465 return (NULL); 15466 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15467} 15468 15469/** 15470 * xmlReadMemory: 15471 * @buffer: a pointer to a char array 15472 * @size: the size of the array 15473 * @URL: the base URL to use for the document 15474 * @encoding: the document encoding, or NULL 15475 * @options: a combination of xmlParserOption 15476 * 15477 * parse an XML in-memory document and build a tree. 15478 * 15479 * Returns the resulting document tree 15480 */ 15481xmlDocPtr 15482xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15483{ 15484 xmlParserCtxtPtr ctxt; 15485 15486 xmlInitParser(); 15487 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15488 if (ctxt == NULL) 15489 return (NULL); 15490 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15491} 15492 15493/** 15494 * xmlReadFd: 15495 * @fd: an open file descriptor 15496 * @URL: the base URL to use for the document 15497 * @encoding: the document encoding, or NULL 15498 * @options: a combination of xmlParserOption 15499 * 15500 * parse an XML from a file descriptor and build a tree. 15501 * NOTE that the file descriptor will not be closed when the 15502 * reader is closed or reset. 15503 * 15504 * Returns the resulting document tree 15505 */ 15506xmlDocPtr 15507xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15508{ 15509 xmlParserCtxtPtr ctxt; 15510 xmlParserInputBufferPtr input; 15511 xmlParserInputPtr stream; 15512 15513 if (fd < 0) 15514 return (NULL); 15515 xmlInitParser(); 15516 15517 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15518 if (input == NULL) 15519 return (NULL); 15520 input->closecallback = NULL; 15521 ctxt = xmlNewParserCtxt(); 15522 if (ctxt == NULL) { 15523 xmlFreeParserInputBuffer(input); 15524 return (NULL); 15525 } 15526 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15527 if (stream == NULL) { 15528 xmlFreeParserInputBuffer(input); 15529 xmlFreeParserCtxt(ctxt); 15530 return (NULL); 15531 } 15532 inputPush(ctxt, stream); 15533 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15534} 15535 15536/** 15537 * xmlReadIO: 15538 * @ioread: an I/O read function 15539 * @ioclose: an I/O close function 15540 * @ioctx: an I/O handler 15541 * @URL: the base URL to use for the document 15542 * @encoding: the document encoding, or NULL 15543 * @options: a combination of xmlParserOption 15544 * 15545 * parse an XML document from I/O functions and source and build a tree. 15546 * 15547 * Returns the resulting document tree 15548 */ 15549xmlDocPtr 15550xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15551 void *ioctx, const char *URL, const char *encoding, int options) 15552{ 15553 xmlParserCtxtPtr ctxt; 15554 xmlParserInputBufferPtr input; 15555 xmlParserInputPtr stream; 15556 15557 if (ioread == NULL) 15558 return (NULL); 15559 xmlInitParser(); 15560 15561 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15562 XML_CHAR_ENCODING_NONE); 15563 if (input == NULL) { 15564 if (ioclose != NULL) 15565 ioclose(ioctx); 15566 return (NULL); 15567 } 15568 ctxt = xmlNewParserCtxt(); 15569 if (ctxt == NULL) { 15570 xmlFreeParserInputBuffer(input); 15571 return (NULL); 15572 } 15573 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15574 if (stream == NULL) { 15575 xmlFreeParserInputBuffer(input); 15576 xmlFreeParserCtxt(ctxt); 15577 return (NULL); 15578 } 15579 inputPush(ctxt, stream); 15580 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15581} 15582 15583/** 15584 * xmlCtxtReadDoc: 15585 * @ctxt: an XML parser context 15586 * @cur: a pointer to a zero terminated string 15587 * @URL: the base URL to use for the document 15588 * @encoding: the document encoding, or NULL 15589 * @options: a combination of xmlParserOption 15590 * 15591 * parse an XML in-memory document and build a tree. 15592 * This reuses the existing @ctxt parser context 15593 * 15594 * Returns the resulting document tree 15595 */ 15596xmlDocPtr 15597xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15598 const char *URL, const char *encoding, int options) 15599{ 15600 xmlParserInputPtr stream; 15601 15602 if (cur == NULL) 15603 return (NULL); 15604 if (ctxt == NULL) 15605 return (NULL); 15606 xmlInitParser(); 15607 15608 xmlCtxtReset(ctxt); 15609 15610 stream = xmlNewStringInputStream(ctxt, cur); 15611 if (stream == NULL) { 15612 return (NULL); 15613 } 15614 inputPush(ctxt, stream); 15615 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15616} 15617 15618/** 15619 * xmlCtxtReadFile: 15620 * @ctxt: an XML parser context 15621 * @filename: a file or URL 15622 * @encoding: the document encoding, or NULL 15623 * @options: a combination of xmlParserOption 15624 * 15625 * parse an XML file from the filesystem or the network. 15626 * This reuses the existing @ctxt parser context 15627 * 15628 * Returns the resulting document tree 15629 */ 15630xmlDocPtr 15631xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15632 const char *encoding, int options) 15633{ 15634 xmlParserInputPtr stream; 15635 15636 if (filename == NULL) 15637 return (NULL); 15638 if (ctxt == NULL) 15639 return (NULL); 15640 xmlInitParser(); 15641 15642 xmlCtxtReset(ctxt); 15643 15644 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15645 if (stream == NULL) { 15646 return (NULL); 15647 } 15648 inputPush(ctxt, stream); 15649 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15650} 15651 15652/** 15653 * xmlCtxtReadMemory: 15654 * @ctxt: an XML parser context 15655 * @buffer: a pointer to a char array 15656 * @size: the size of the array 15657 * @URL: the base URL to use for the document 15658 * @encoding: the document encoding, or NULL 15659 * @options: a combination of xmlParserOption 15660 * 15661 * parse an XML in-memory document and build a tree. 15662 * This reuses the existing @ctxt parser context 15663 * 15664 * Returns the resulting document tree 15665 */ 15666xmlDocPtr 15667xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15668 const char *URL, const char *encoding, int options) 15669{ 15670 xmlParserInputBufferPtr input; 15671 xmlParserInputPtr stream; 15672 15673 if (ctxt == NULL) 15674 return (NULL); 15675 if (buffer == NULL) 15676 return (NULL); 15677 xmlInitParser(); 15678 15679 xmlCtxtReset(ctxt); 15680 15681 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15682 if (input == NULL) { 15683 return(NULL); 15684 } 15685 15686 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15687 if (stream == NULL) { 15688 xmlFreeParserInputBuffer(input); 15689 return(NULL); 15690 } 15691 15692 inputPush(ctxt, stream); 15693 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15694} 15695 15696/** 15697 * xmlCtxtReadFd: 15698 * @ctxt: an XML parser context 15699 * @fd: an open file descriptor 15700 * @URL: the base URL to use for the document 15701 * @encoding: the document encoding, or NULL 15702 * @options: a combination of xmlParserOption 15703 * 15704 * parse an XML from a file descriptor and build a tree. 15705 * This reuses the existing @ctxt parser context 15706 * NOTE that the file descriptor will not be closed when the 15707 * reader is closed or reset. 15708 * 15709 * Returns the resulting document tree 15710 */ 15711xmlDocPtr 15712xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15713 const char *URL, const char *encoding, int options) 15714{ 15715 xmlParserInputBufferPtr input; 15716 xmlParserInputPtr stream; 15717 15718 if (fd < 0) 15719 return (NULL); 15720 if (ctxt == NULL) 15721 return (NULL); 15722 xmlInitParser(); 15723 15724 xmlCtxtReset(ctxt); 15725 15726 15727 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15728 if (input == NULL) 15729 return (NULL); 15730 input->closecallback = NULL; 15731 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15732 if (stream == NULL) { 15733 xmlFreeParserInputBuffer(input); 15734 return (NULL); 15735 } 15736 inputPush(ctxt, stream); 15737 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15738} 15739 15740/** 15741 * xmlCtxtReadIO: 15742 * @ctxt: an XML parser context 15743 * @ioread: an I/O read function 15744 * @ioclose: an I/O close function 15745 * @ioctx: an I/O handler 15746 * @URL: the base URL to use for the document 15747 * @encoding: the document encoding, or NULL 15748 * @options: a combination of xmlParserOption 15749 * 15750 * parse an XML document from I/O functions and source and build a tree. 15751 * This reuses the existing @ctxt parser context 15752 * 15753 * Returns the resulting document tree 15754 */ 15755xmlDocPtr 15756xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15757 xmlInputCloseCallback ioclose, void *ioctx, 15758 const char *URL, 15759 const char *encoding, int options) 15760{ 15761 xmlParserInputBufferPtr input; 15762 xmlParserInputPtr stream; 15763 15764 if (ioread == NULL) 15765 return (NULL); 15766 if (ctxt == NULL) 15767 return (NULL); 15768 xmlInitParser(); 15769 15770 xmlCtxtReset(ctxt); 15771 15772 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15773 XML_CHAR_ENCODING_NONE); 15774 if (input == NULL) { 15775 if (ioclose != NULL) 15776 ioclose(ioctx); 15777 return (NULL); 15778 } 15779 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15780 if (stream == NULL) { 15781 xmlFreeParserInputBuffer(input); 15782 return (NULL); 15783 } 15784 inputPush(ctxt, stream); 15785 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15786} 15787 15788#define bottom_parser 15789#include "elfgcchack.h" 15790