parser.c revision f8e3db0445a1bc8cfe3f77326b07ec161482caa2
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <limits.h> 44#include <string.h> 45#include <stdarg.h> 46#include <libxml/xmlmemory.h> 47#include <libxml/threads.h> 48#include <libxml/globals.h> 49#include <libxml/tree.h> 50#include <libxml/parser.h> 51#include <libxml/parserInternals.h> 52#include <libxml/valid.h> 53#include <libxml/entities.h> 54#include <libxml/xmlerror.h> 55#include <libxml/encoding.h> 56#include <libxml/xmlIO.h> 57#include <libxml/uri.h> 58#ifdef LIBXML_CATALOG_ENABLED 59#include <libxml/catalog.h> 60#endif 61#ifdef LIBXML_SCHEMAS_ENABLED 62#include <libxml/xmlschemastypes.h> 63#include <libxml/relaxng.h> 64#endif 65#ifdef HAVE_CTYPE_H 66#include <ctype.h> 67#endif 68#ifdef HAVE_STDLIB_H 69#include <stdlib.h> 70#endif 71#ifdef HAVE_SYS_STAT_H 72#include <sys/stat.h> 73#endif 74#ifdef HAVE_FCNTL_H 75#include <fcntl.h> 76#endif 77#ifdef HAVE_UNISTD_H 78#include <unistd.h> 79#endif 80#ifdef HAVE_ZLIB_H 81#include <zlib.h> 82#endif 83#ifdef HAVE_LZMA_H 84#include <lzma.h> 85#endif 86 87#include "buf.h" 88#include "enc.h" 89 90static void 91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93static xmlParserCtxtPtr 94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97/************************************************************************ 98 * * 99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 100 * * 101 ************************************************************************/ 102 103#define XML_PARSER_BIG_ENTITY 1000 104#define XML_PARSER_LOT_ENTITY 5000 105 106/* 107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 108 * replacement over the size in byte of the input indicates that you have 109 * and eponential behaviour. A value of 10 correspond to at least 3 entity 110 * replacement per byte of input. 111 */ 112#define XML_PARSER_NON_LINEAR 10 113 114/* 115 * xmlParserEntityCheck 116 * 117 * Function to check non-linear entity expansion behaviour 118 * This is here to detect and stop exponential linear entity expansion 119 * This is not a limitation of the parser but a safety 120 * boundary feature. It can be disabled with the XML_PARSE_HUGE 121 * parser option. 122 */ 123static int 124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 125 xmlEntityPtr ent) 126{ 127 size_t consumed = 0; 128 129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 130 return (0); 131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 132 return (1); 133 if (size != 0) { 134 /* 135 * Do the check based on the replacement size of the entity 136 */ 137 if (size < XML_PARSER_BIG_ENTITY) 138 return(0); 139 140 /* 141 * A limit on the amount of text data reasonably used 142 */ 143 if (ctxt->input != NULL) { 144 consumed = ctxt->input->consumed + 145 (ctxt->input->cur - ctxt->input->base); 146 } 147 consumed += ctxt->sizeentities; 148 149 if ((size < XML_PARSER_NON_LINEAR * consumed) && 150 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 151 return (0); 152 } else if (ent != NULL) { 153 /* 154 * use the number of parsed entities in the replacement 155 */ 156 size = ent->checked; 157 158 /* 159 * The amount of data parsed counting entities size only once 160 */ 161 if (ctxt->input != NULL) { 162 consumed = ctxt->input->consumed + 163 (ctxt->input->cur - ctxt->input->base); 164 } 165 consumed += ctxt->sizeentities; 166 167 /* 168 * Check the density of entities for the amount of data 169 * knowing an entity reference will take at least 3 bytes 170 */ 171 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 172 return (0); 173 } else { 174 /* 175 * strange we got no data for checking just return 176 */ 177 return (0); 178 } 179 180 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 181 return (1); 182} 183 184/** 185 * xmlParserMaxDepth: 186 * 187 * arbitrary depth limit for the XML documents that we allow to 188 * process. This is not a limitation of the parser but a safety 189 * boundary feature. It can be disabled with the XML_PARSE_HUGE 190 * parser option. 191 */ 192unsigned int xmlParserMaxDepth = 256; 193 194 195 196#define SAX2 1 197#define XML_PARSER_BIG_BUFFER_SIZE 300 198#define XML_PARSER_BUFFER_SIZE 100 199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 200 201/** 202 * XML_PARSER_CHUNK_SIZE 203 * 204 * When calling GROW that's the minimal amount of data 205 * the parser expected to have received. It is not a hard 206 * limit but an optimization when reading strings like Names 207 * It is not strictly needed as long as inputs available characters 208 * are followed by 0, which should be provided by the I/O level 209 */ 210#define XML_PARSER_CHUNK_SIZE 100 211 212/* 213 * List of XML prefixed PI allowed by W3C specs 214 */ 215 216static const char *xmlW3CPIs[] = { 217 "xml-stylesheet", 218 "xml-model", 219 NULL 220}; 221 222 223/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 224static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 225 const xmlChar **str); 226 227static xmlParserErrors 228xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 229 xmlSAXHandlerPtr sax, 230 void *user_data, int depth, const xmlChar *URL, 231 const xmlChar *ID, xmlNodePtr *list); 232 233static int 234xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 235 const char *encoding); 236#ifdef LIBXML_LEGACY_ENABLED 237static void 238xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 239 xmlNodePtr lastNode); 240#endif /* LIBXML_LEGACY_ENABLED */ 241 242static xmlParserErrors 243xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 244 const xmlChar *string, void *user_data, xmlNodePtr *lst); 245 246static int 247xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 248 249/************************************************************************ 250 * * 251 * Some factorized error routines * 252 * * 253 ************************************************************************/ 254 255/** 256 * xmlErrAttributeDup: 257 * @ctxt: an XML parser context 258 * @prefix: the attribute prefix 259 * @localname: the attribute localname 260 * 261 * Handle a redefinition of attribute error 262 */ 263static void 264xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 265 const xmlChar * localname) 266{ 267 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 268 (ctxt->instate == XML_PARSER_EOF)) 269 return; 270 if (ctxt != NULL) 271 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 272 273 if (prefix == NULL) 274 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 275 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 276 (const char *) localname, NULL, NULL, 0, 0, 277 "Attribute %s redefined\n", localname); 278 else 279 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 280 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 281 (const char *) prefix, (const char *) localname, 282 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 283 localname); 284 if (ctxt != NULL) { 285 ctxt->wellFormed = 0; 286 if (ctxt->recovery == 0) 287 ctxt->disableSAX = 1; 288 } 289} 290 291/** 292 * xmlFatalErr: 293 * @ctxt: an XML parser context 294 * @error: the error number 295 * @extra: extra information string 296 * 297 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 298 */ 299static void 300xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 301{ 302 const char *errmsg; 303 char errstr[129] = ""; 304 305 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 306 (ctxt->instate == XML_PARSER_EOF)) 307 return; 308 switch (error) { 309 case XML_ERR_INVALID_HEX_CHARREF: 310 errmsg = "CharRef: invalid hexadecimal value"; 311 break; 312 case XML_ERR_INVALID_DEC_CHARREF: 313 errmsg = "CharRef: invalid decimal value"; 314 break; 315 case XML_ERR_INVALID_CHARREF: 316 errmsg = "CharRef: invalid value"; 317 break; 318 case XML_ERR_INTERNAL_ERROR: 319 errmsg = "internal error"; 320 break; 321 case XML_ERR_PEREF_AT_EOF: 322 errmsg = "PEReference at end of document"; 323 break; 324 case XML_ERR_PEREF_IN_PROLOG: 325 errmsg = "PEReference in prolog"; 326 break; 327 case XML_ERR_PEREF_IN_EPILOG: 328 errmsg = "PEReference in epilog"; 329 break; 330 case XML_ERR_PEREF_NO_NAME: 331 errmsg = "PEReference: no name"; 332 break; 333 case XML_ERR_PEREF_SEMICOL_MISSING: 334 errmsg = "PEReference: expecting ';'"; 335 break; 336 case XML_ERR_ENTITY_LOOP: 337 errmsg = "Detected an entity reference loop"; 338 break; 339 case XML_ERR_ENTITY_NOT_STARTED: 340 errmsg = "EntityValue: \" or ' expected"; 341 break; 342 case XML_ERR_ENTITY_PE_INTERNAL: 343 errmsg = "PEReferences forbidden in internal subset"; 344 break; 345 case XML_ERR_ENTITY_NOT_FINISHED: 346 errmsg = "EntityValue: \" or ' expected"; 347 break; 348 case XML_ERR_ATTRIBUTE_NOT_STARTED: 349 errmsg = "AttValue: \" or ' expected"; 350 break; 351 case XML_ERR_LT_IN_ATTRIBUTE: 352 errmsg = "Unescaped '<' not allowed in attributes values"; 353 break; 354 case XML_ERR_LITERAL_NOT_STARTED: 355 errmsg = "SystemLiteral \" or ' expected"; 356 break; 357 case XML_ERR_LITERAL_NOT_FINISHED: 358 errmsg = "Unfinished System or Public ID \" or ' expected"; 359 break; 360 case XML_ERR_MISPLACED_CDATA_END: 361 errmsg = "Sequence ']]>' not allowed in content"; 362 break; 363 case XML_ERR_URI_REQUIRED: 364 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 365 break; 366 case XML_ERR_PUBID_REQUIRED: 367 errmsg = "PUBLIC, the Public Identifier is missing"; 368 break; 369 case XML_ERR_HYPHEN_IN_COMMENT: 370 errmsg = "Comment must not contain '--' (double-hyphen)"; 371 break; 372 case XML_ERR_PI_NOT_STARTED: 373 errmsg = "xmlParsePI : no target name"; 374 break; 375 case XML_ERR_RESERVED_XML_NAME: 376 errmsg = "Invalid PI name"; 377 break; 378 case XML_ERR_NOTATION_NOT_STARTED: 379 errmsg = "NOTATION: Name expected here"; 380 break; 381 case XML_ERR_NOTATION_NOT_FINISHED: 382 errmsg = "'>' required to close NOTATION declaration"; 383 break; 384 case XML_ERR_VALUE_REQUIRED: 385 errmsg = "Entity value required"; 386 break; 387 case XML_ERR_URI_FRAGMENT: 388 errmsg = "Fragment not allowed"; 389 break; 390 case XML_ERR_ATTLIST_NOT_STARTED: 391 errmsg = "'(' required to start ATTLIST enumeration"; 392 break; 393 case XML_ERR_NMTOKEN_REQUIRED: 394 errmsg = "NmToken expected in ATTLIST enumeration"; 395 break; 396 case XML_ERR_ATTLIST_NOT_FINISHED: 397 errmsg = "')' required to finish ATTLIST enumeration"; 398 break; 399 case XML_ERR_MIXED_NOT_STARTED: 400 errmsg = "MixedContentDecl : '|' or ')*' expected"; 401 break; 402 case XML_ERR_PCDATA_REQUIRED: 403 errmsg = "MixedContentDecl : '#PCDATA' expected"; 404 break; 405 case XML_ERR_ELEMCONTENT_NOT_STARTED: 406 errmsg = "ContentDecl : Name or '(' expected"; 407 break; 408 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 409 errmsg = "ContentDecl : ',' '|' or ')' expected"; 410 break; 411 case XML_ERR_PEREF_IN_INT_SUBSET: 412 errmsg = 413 "PEReference: forbidden within markup decl in internal subset"; 414 break; 415 case XML_ERR_GT_REQUIRED: 416 errmsg = "expected '>'"; 417 break; 418 case XML_ERR_CONDSEC_INVALID: 419 errmsg = "XML conditional section '[' expected"; 420 break; 421 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 422 errmsg = "Content error in the external subset"; 423 break; 424 case XML_ERR_CONDSEC_INVALID_KEYWORD: 425 errmsg = 426 "conditional section INCLUDE or IGNORE keyword expected"; 427 break; 428 case XML_ERR_CONDSEC_NOT_FINISHED: 429 errmsg = "XML conditional section not closed"; 430 break; 431 case XML_ERR_XMLDECL_NOT_STARTED: 432 errmsg = "Text declaration '<?xml' required"; 433 break; 434 case XML_ERR_XMLDECL_NOT_FINISHED: 435 errmsg = "parsing XML declaration: '?>' expected"; 436 break; 437 case XML_ERR_EXT_ENTITY_STANDALONE: 438 errmsg = "external parsed entities cannot be standalone"; 439 break; 440 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 441 errmsg = "EntityRef: expecting ';'"; 442 break; 443 case XML_ERR_DOCTYPE_NOT_FINISHED: 444 errmsg = "DOCTYPE improperly terminated"; 445 break; 446 case XML_ERR_LTSLASH_REQUIRED: 447 errmsg = "EndTag: '</' not found"; 448 break; 449 case XML_ERR_EQUAL_REQUIRED: 450 errmsg = "expected '='"; 451 break; 452 case XML_ERR_STRING_NOT_CLOSED: 453 errmsg = "String not closed expecting \" or '"; 454 break; 455 case XML_ERR_STRING_NOT_STARTED: 456 errmsg = "String not started expecting ' or \""; 457 break; 458 case XML_ERR_ENCODING_NAME: 459 errmsg = "Invalid XML encoding name"; 460 break; 461 case XML_ERR_STANDALONE_VALUE: 462 errmsg = "standalone accepts only 'yes' or 'no'"; 463 break; 464 case XML_ERR_DOCUMENT_EMPTY: 465 errmsg = "Document is empty"; 466 break; 467 case XML_ERR_DOCUMENT_END: 468 errmsg = "Extra content at the end of the document"; 469 break; 470 case XML_ERR_NOT_WELL_BALANCED: 471 errmsg = "chunk is not well balanced"; 472 break; 473 case XML_ERR_EXTRA_CONTENT: 474 errmsg = "extra content at the end of well balanced chunk"; 475 break; 476 case XML_ERR_VERSION_MISSING: 477 errmsg = "Malformed declaration expecting version"; 478 break; 479 case XML_ERR_NAME_TOO_LONG: 480 errmsg = "Name too long use XML_PARSE_HUGE option"; 481 break; 482#if 0 483 case: 484 errmsg = ""; 485 break; 486#endif 487 default: 488 errmsg = "Unregistered error message"; 489 } 490 if (info == NULL) 491 snprintf(errstr, 128, "%s\n", errmsg); 492 else 493 snprintf(errstr, 128, "%s: %%s\n", errmsg); 494 if (ctxt != NULL) 495 ctxt->errNo = error; 496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 497 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 498 info); 499 if (ctxt != NULL) { 500 ctxt->wellFormed = 0; 501 if (ctxt->recovery == 0) 502 ctxt->disableSAX = 1; 503 } 504} 505 506/** 507 * xmlFatalErrMsg: 508 * @ctxt: an XML parser context 509 * @error: the error number 510 * @msg: the error message 511 * 512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 513 */ 514static void 515xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 516 const char *msg) 517{ 518 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 519 (ctxt->instate == XML_PARSER_EOF)) 520 return; 521 if (ctxt != NULL) 522 ctxt->errNo = error; 523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 524 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 525 if (ctxt != NULL) { 526 ctxt->wellFormed = 0; 527 if (ctxt->recovery == 0) 528 ctxt->disableSAX = 1; 529 } 530} 531 532/** 533 * xmlWarningMsg: 534 * @ctxt: an XML parser context 535 * @error: the error number 536 * @msg: the error message 537 * @str1: extra data 538 * @str2: extra data 539 * 540 * Handle a warning. 541 */ 542static void 543xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 544 const char *msg, const xmlChar *str1, const xmlChar *str2) 545{ 546 xmlStructuredErrorFunc schannel = NULL; 547 548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 549 (ctxt->instate == XML_PARSER_EOF)) 550 return; 551 if ((ctxt != NULL) && (ctxt->sax != NULL) && 552 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 553 schannel = ctxt->sax->serror; 554 if (ctxt != NULL) { 555 __xmlRaiseError(schannel, 556 (ctxt->sax) ? ctxt->sax->warning : NULL, 557 ctxt->userData, 558 ctxt, NULL, XML_FROM_PARSER, error, 559 XML_ERR_WARNING, NULL, 0, 560 (const char *) str1, (const char *) str2, NULL, 0, 0, 561 msg, (const char *) str1, (const char *) str2); 562 } else { 563 __xmlRaiseError(schannel, NULL, NULL, 564 ctxt, NULL, XML_FROM_PARSER, error, 565 XML_ERR_WARNING, NULL, 0, 566 (const char *) str1, (const char *) str2, NULL, 0, 0, 567 msg, (const char *) str1, (const char *) str2); 568 } 569} 570 571/** 572 * xmlValidityError: 573 * @ctxt: an XML parser context 574 * @error: the error number 575 * @msg: the error message 576 * @str1: extra data 577 * 578 * Handle a validity error. 579 */ 580static void 581xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 582 const char *msg, const xmlChar *str1, const xmlChar *str2) 583{ 584 xmlStructuredErrorFunc schannel = NULL; 585 586 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 587 (ctxt->instate == XML_PARSER_EOF)) 588 return; 589 if (ctxt != NULL) { 590 ctxt->errNo = error; 591 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 592 schannel = ctxt->sax->serror; 593 } 594 if (ctxt != NULL) { 595 __xmlRaiseError(schannel, 596 ctxt->vctxt.error, ctxt->vctxt.userData, 597 ctxt, NULL, XML_FROM_DTD, error, 598 XML_ERR_ERROR, NULL, 0, (const char *) str1, 599 (const char *) str2, NULL, 0, 0, 600 msg, (const char *) str1, (const char *) str2); 601 ctxt->valid = 0; 602 } else { 603 __xmlRaiseError(schannel, NULL, NULL, 604 ctxt, NULL, XML_FROM_DTD, error, 605 XML_ERR_ERROR, NULL, 0, (const char *) str1, 606 (const char *) str2, NULL, 0, 0, 607 msg, (const char *) str1, (const char *) str2); 608 } 609} 610 611/** 612 * xmlFatalErrMsgInt: 613 * @ctxt: an XML parser context 614 * @error: the error number 615 * @msg: the error message 616 * @val: an integer value 617 * 618 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 619 */ 620static void 621xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 622 const char *msg, int val) 623{ 624 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 625 (ctxt->instate == XML_PARSER_EOF)) 626 return; 627 if (ctxt != NULL) 628 ctxt->errNo = error; 629 __xmlRaiseError(NULL, NULL, NULL, 630 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 631 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 632 if (ctxt != NULL) { 633 ctxt->wellFormed = 0; 634 if (ctxt->recovery == 0) 635 ctxt->disableSAX = 1; 636 } 637} 638 639/** 640 * xmlFatalErrMsgStrIntStr: 641 * @ctxt: an XML parser context 642 * @error: the error number 643 * @msg: the error message 644 * @str1: an string info 645 * @val: an integer value 646 * @str2: an string info 647 * 648 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 649 */ 650static void 651xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 652 const char *msg, const xmlChar *str1, int val, 653 const xmlChar *str2) 654{ 655 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 656 (ctxt->instate == XML_PARSER_EOF)) 657 return; 658 if (ctxt != NULL) 659 ctxt->errNo = error; 660 __xmlRaiseError(NULL, NULL, NULL, 661 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 662 NULL, 0, (const char *) str1, (const char *) str2, 663 NULL, val, 0, msg, str1, val, str2); 664 if (ctxt != NULL) { 665 ctxt->wellFormed = 0; 666 if (ctxt->recovery == 0) 667 ctxt->disableSAX = 1; 668 } 669} 670 671/** 672 * xmlFatalErrMsgStr: 673 * @ctxt: an XML parser context 674 * @error: the error number 675 * @msg: the error message 676 * @val: a string value 677 * 678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 679 */ 680static void 681xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 682 const char *msg, const xmlChar * val) 683{ 684 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 685 (ctxt->instate == XML_PARSER_EOF)) 686 return; 687 if (ctxt != NULL) 688 ctxt->errNo = error; 689 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 690 XML_FROM_PARSER, error, XML_ERR_FATAL, 691 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 692 val); 693 if (ctxt != NULL) { 694 ctxt->wellFormed = 0; 695 if (ctxt->recovery == 0) 696 ctxt->disableSAX = 1; 697 } 698} 699 700/** 701 * xmlErrMsgStr: 702 * @ctxt: an XML parser context 703 * @error: the error number 704 * @msg: the error message 705 * @val: a string value 706 * 707 * Handle a non fatal parser error 708 */ 709static void 710xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 711 const char *msg, const xmlChar * val) 712{ 713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 714 (ctxt->instate == XML_PARSER_EOF)) 715 return; 716 if (ctxt != NULL) 717 ctxt->errNo = error; 718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 719 XML_FROM_PARSER, error, XML_ERR_ERROR, 720 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 721 val); 722} 723 724/** 725 * xmlNsErr: 726 * @ctxt: an XML parser context 727 * @error: the error number 728 * @msg: the message 729 * @info1: extra information string 730 * @info2: extra information string 731 * 732 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 733 */ 734static void 735xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 736 const char *msg, 737 const xmlChar * info1, const xmlChar * info2, 738 const xmlChar * info3) 739{ 740 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 741 (ctxt->instate == XML_PARSER_EOF)) 742 return; 743 if (ctxt != NULL) 744 ctxt->errNo = error; 745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 746 XML_ERR_ERROR, NULL, 0, (const char *) info1, 747 (const char *) info2, (const char *) info3, 0, 0, msg, 748 info1, info2, info3); 749 if (ctxt != NULL) 750 ctxt->nsWellFormed = 0; 751} 752 753/** 754 * xmlNsWarn 755 * @ctxt: an XML parser context 756 * @error: the error number 757 * @msg: the message 758 * @info1: extra information string 759 * @info2: extra information string 760 * 761 * Handle a namespace warning error 762 */ 763static void 764xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 765 const char *msg, 766 const xmlChar * info1, const xmlChar * info2, 767 const xmlChar * info3) 768{ 769 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 770 (ctxt->instate == XML_PARSER_EOF)) 771 return; 772 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 773 XML_ERR_WARNING, NULL, 0, (const char *) info1, 774 (const char *) info2, (const char *) info3, 0, 0, msg, 775 info1, info2, info3); 776} 777 778/************************************************************************ 779 * * 780 * Library wide options * 781 * * 782 ************************************************************************/ 783 784/** 785 * xmlHasFeature: 786 * @feature: the feature to be examined 787 * 788 * Examines if the library has been compiled with a given feature. 789 * 790 * Returns a non-zero value if the feature exist, otherwise zero. 791 * Returns zero (0) if the feature does not exist or an unknown 792 * unknown feature is requested, non-zero otherwise. 793 */ 794int 795xmlHasFeature(xmlFeature feature) 796{ 797 switch (feature) { 798 case XML_WITH_THREAD: 799#ifdef LIBXML_THREAD_ENABLED 800 return(1); 801#else 802 return(0); 803#endif 804 case XML_WITH_TREE: 805#ifdef LIBXML_TREE_ENABLED 806 return(1); 807#else 808 return(0); 809#endif 810 case XML_WITH_OUTPUT: 811#ifdef LIBXML_OUTPUT_ENABLED 812 return(1); 813#else 814 return(0); 815#endif 816 case XML_WITH_PUSH: 817#ifdef LIBXML_PUSH_ENABLED 818 return(1); 819#else 820 return(0); 821#endif 822 case XML_WITH_READER: 823#ifdef LIBXML_READER_ENABLED 824 return(1); 825#else 826 return(0); 827#endif 828 case XML_WITH_PATTERN: 829#ifdef LIBXML_PATTERN_ENABLED 830 return(1); 831#else 832 return(0); 833#endif 834 case XML_WITH_WRITER: 835#ifdef LIBXML_WRITER_ENABLED 836 return(1); 837#else 838 return(0); 839#endif 840 case XML_WITH_SAX1: 841#ifdef LIBXML_SAX1_ENABLED 842 return(1); 843#else 844 return(0); 845#endif 846 case XML_WITH_FTP: 847#ifdef LIBXML_FTP_ENABLED 848 return(1); 849#else 850 return(0); 851#endif 852 case XML_WITH_HTTP: 853#ifdef LIBXML_HTTP_ENABLED 854 return(1); 855#else 856 return(0); 857#endif 858 case XML_WITH_VALID: 859#ifdef LIBXML_VALID_ENABLED 860 return(1); 861#else 862 return(0); 863#endif 864 case XML_WITH_HTML: 865#ifdef LIBXML_HTML_ENABLED 866 return(1); 867#else 868 return(0); 869#endif 870 case XML_WITH_LEGACY: 871#ifdef LIBXML_LEGACY_ENABLED 872 return(1); 873#else 874 return(0); 875#endif 876 case XML_WITH_C14N: 877#ifdef LIBXML_C14N_ENABLED 878 return(1); 879#else 880 return(0); 881#endif 882 case XML_WITH_CATALOG: 883#ifdef LIBXML_CATALOG_ENABLED 884 return(1); 885#else 886 return(0); 887#endif 888 case XML_WITH_XPATH: 889#ifdef LIBXML_XPATH_ENABLED 890 return(1); 891#else 892 return(0); 893#endif 894 case XML_WITH_XPTR: 895#ifdef LIBXML_XPTR_ENABLED 896 return(1); 897#else 898 return(0); 899#endif 900 case XML_WITH_XINCLUDE: 901#ifdef LIBXML_XINCLUDE_ENABLED 902 return(1); 903#else 904 return(0); 905#endif 906 case XML_WITH_ICONV: 907#ifdef LIBXML_ICONV_ENABLED 908 return(1); 909#else 910 return(0); 911#endif 912 case XML_WITH_ISO8859X: 913#ifdef LIBXML_ISO8859X_ENABLED 914 return(1); 915#else 916 return(0); 917#endif 918 case XML_WITH_UNICODE: 919#ifdef LIBXML_UNICODE_ENABLED 920 return(1); 921#else 922 return(0); 923#endif 924 case XML_WITH_REGEXP: 925#ifdef LIBXML_REGEXP_ENABLED 926 return(1); 927#else 928 return(0); 929#endif 930 case XML_WITH_AUTOMATA: 931#ifdef LIBXML_AUTOMATA_ENABLED 932 return(1); 933#else 934 return(0); 935#endif 936 case XML_WITH_EXPR: 937#ifdef LIBXML_EXPR_ENABLED 938 return(1); 939#else 940 return(0); 941#endif 942 case XML_WITH_SCHEMAS: 943#ifdef LIBXML_SCHEMAS_ENABLED 944 return(1); 945#else 946 return(0); 947#endif 948 case XML_WITH_SCHEMATRON: 949#ifdef LIBXML_SCHEMATRON_ENABLED 950 return(1); 951#else 952 return(0); 953#endif 954 case XML_WITH_MODULES: 955#ifdef LIBXML_MODULES_ENABLED 956 return(1); 957#else 958 return(0); 959#endif 960 case XML_WITH_DEBUG: 961#ifdef LIBXML_DEBUG_ENABLED 962 return(1); 963#else 964 return(0); 965#endif 966 case XML_WITH_DEBUG_MEM: 967#ifdef DEBUG_MEMORY_LOCATION 968 return(1); 969#else 970 return(0); 971#endif 972 case XML_WITH_DEBUG_RUN: 973#ifdef LIBXML_DEBUG_RUNTIME 974 return(1); 975#else 976 return(0); 977#endif 978 case XML_WITH_ZLIB: 979#ifdef LIBXML_ZLIB_ENABLED 980 return(1); 981#else 982 return(0); 983#endif 984 case XML_WITH_LZMA: 985#ifdef LIBXML_LZMA_ENABLED 986 return(1); 987#else 988 return(0); 989#endif 990 case XML_WITH_ICU: 991#ifdef LIBXML_ICU_ENABLED 992 return(1); 993#else 994 return(0); 995#endif 996 default: 997 break; 998 } 999 return(0); 1000} 1001 1002/************************************************************************ 1003 * * 1004 * SAX2 defaulted attributes handling * 1005 * * 1006 ************************************************************************/ 1007 1008/** 1009 * xmlDetectSAX2: 1010 * @ctxt: an XML parser context 1011 * 1012 * Do the SAX2 detection and specific intialization 1013 */ 1014static void 1015xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1016 if (ctxt == NULL) return; 1017#ifdef LIBXML_SAX1_ENABLED 1018 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1019 ((ctxt->sax->startElementNs != NULL) || 1020 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1021#else 1022 ctxt->sax2 = 1; 1023#endif /* LIBXML_SAX1_ENABLED */ 1024 1025 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1026 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1027 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1028 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1029 (ctxt->str_xml_ns == NULL)) { 1030 xmlErrMemory(ctxt, NULL); 1031 } 1032} 1033 1034typedef struct _xmlDefAttrs xmlDefAttrs; 1035typedef xmlDefAttrs *xmlDefAttrsPtr; 1036struct _xmlDefAttrs { 1037 int nbAttrs; /* number of defaulted attributes on that element */ 1038 int maxAttrs; /* the size of the array */ 1039 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1040}; 1041 1042/** 1043 * xmlAttrNormalizeSpace: 1044 * @src: the source string 1045 * @dst: the target string 1046 * 1047 * Normalize the space in non CDATA attribute values: 1048 * If the attribute type is not CDATA, then the XML processor MUST further 1049 * process the normalized attribute value by discarding any leading and 1050 * trailing space (#x20) characters, and by replacing sequences of space 1051 * (#x20) characters by a single space (#x20) character. 1052 * Note that the size of dst need to be at least src, and if one doesn't need 1053 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1054 * passing src as dst is just fine. 1055 * 1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1057 * is needed. 1058 */ 1059static xmlChar * 1060xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1061{ 1062 if ((src == NULL) || (dst == NULL)) 1063 return(NULL); 1064 1065 while (*src == 0x20) src++; 1066 while (*src != 0) { 1067 if (*src == 0x20) { 1068 while (*src == 0x20) src++; 1069 if (*src != 0) 1070 *dst++ = 0x20; 1071 } else { 1072 *dst++ = *src++; 1073 } 1074 } 1075 *dst = 0; 1076 if (dst == src) 1077 return(NULL); 1078 return(dst); 1079} 1080 1081/** 1082 * xmlAttrNormalizeSpace2: 1083 * @src: the source string 1084 * 1085 * Normalize the space in non CDATA attribute values, a slightly more complex 1086 * front end to avoid allocation problems when running on attribute values 1087 * coming from the input. 1088 * 1089 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1090 * is needed. 1091 */ 1092static const xmlChar * 1093xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1094{ 1095 int i; 1096 int remove_head = 0; 1097 int need_realloc = 0; 1098 const xmlChar *cur; 1099 1100 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1101 return(NULL); 1102 i = *len; 1103 if (i <= 0) 1104 return(NULL); 1105 1106 cur = src; 1107 while (*cur == 0x20) { 1108 cur++; 1109 remove_head++; 1110 } 1111 while (*cur != 0) { 1112 if (*cur == 0x20) { 1113 cur++; 1114 if ((*cur == 0x20) || (*cur == 0)) { 1115 need_realloc = 1; 1116 break; 1117 } 1118 } else 1119 cur++; 1120 } 1121 if (need_realloc) { 1122 xmlChar *ret; 1123 1124 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1125 if (ret == NULL) { 1126 xmlErrMemory(ctxt, NULL); 1127 return(NULL); 1128 } 1129 xmlAttrNormalizeSpace(ret, ret); 1130 *len = (int) strlen((const char *)ret); 1131 return(ret); 1132 } else if (remove_head) { 1133 *len -= remove_head; 1134 memmove(src, src + remove_head, 1 + *len); 1135 return(src); 1136 } 1137 return(NULL); 1138} 1139 1140/** 1141 * xmlAddDefAttrs: 1142 * @ctxt: an XML parser context 1143 * @fullname: the element fullname 1144 * @fullattr: the attribute fullname 1145 * @value: the attribute value 1146 * 1147 * Add a defaulted attribute for an element 1148 */ 1149static void 1150xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1151 const xmlChar *fullname, 1152 const xmlChar *fullattr, 1153 const xmlChar *value) { 1154 xmlDefAttrsPtr defaults; 1155 int len; 1156 const xmlChar *name; 1157 const xmlChar *prefix; 1158 1159 /* 1160 * Allows to detect attribute redefinitions 1161 */ 1162 if (ctxt->attsSpecial != NULL) { 1163 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1164 return; 1165 } 1166 1167 if (ctxt->attsDefault == NULL) { 1168 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1169 if (ctxt->attsDefault == NULL) 1170 goto mem_error; 1171 } 1172 1173 /* 1174 * split the element name into prefix:localname , the string found 1175 * are within the DTD and then not associated to namespace names. 1176 */ 1177 name = xmlSplitQName3(fullname, &len); 1178 if (name == NULL) { 1179 name = xmlDictLookup(ctxt->dict, fullname, -1); 1180 prefix = NULL; 1181 } else { 1182 name = xmlDictLookup(ctxt->dict, name, -1); 1183 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1184 } 1185 1186 /* 1187 * make sure there is some storage 1188 */ 1189 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1190 if (defaults == NULL) { 1191 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1192 (4 * 5) * sizeof(const xmlChar *)); 1193 if (defaults == NULL) 1194 goto mem_error; 1195 defaults->nbAttrs = 0; 1196 defaults->maxAttrs = 4; 1197 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1198 defaults, NULL) < 0) { 1199 xmlFree(defaults); 1200 goto mem_error; 1201 } 1202 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1203 xmlDefAttrsPtr temp; 1204 1205 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1206 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1207 if (temp == NULL) 1208 goto mem_error; 1209 defaults = temp; 1210 defaults->maxAttrs *= 2; 1211 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1212 defaults, NULL) < 0) { 1213 xmlFree(defaults); 1214 goto mem_error; 1215 } 1216 } 1217 1218 /* 1219 * Split the element name into prefix:localname , the string found 1220 * are within the DTD and hen not associated to namespace names. 1221 */ 1222 name = xmlSplitQName3(fullattr, &len); 1223 if (name == NULL) { 1224 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1225 prefix = NULL; 1226 } else { 1227 name = xmlDictLookup(ctxt->dict, name, -1); 1228 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1229 } 1230 1231 defaults->values[5 * defaults->nbAttrs] = name; 1232 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1233 /* intern the string and precompute the end */ 1234 len = xmlStrlen(value); 1235 value = xmlDictLookup(ctxt->dict, value, len); 1236 defaults->values[5 * defaults->nbAttrs + 2] = value; 1237 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1238 if (ctxt->external) 1239 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1240 else 1241 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1242 defaults->nbAttrs++; 1243 1244 return; 1245 1246mem_error: 1247 xmlErrMemory(ctxt, NULL); 1248 return; 1249} 1250 1251/** 1252 * xmlAddSpecialAttr: 1253 * @ctxt: an XML parser context 1254 * @fullname: the element fullname 1255 * @fullattr: the attribute fullname 1256 * @type: the attribute type 1257 * 1258 * Register this attribute type 1259 */ 1260static void 1261xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1262 const xmlChar *fullname, 1263 const xmlChar *fullattr, 1264 int type) 1265{ 1266 if (ctxt->attsSpecial == NULL) { 1267 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1268 if (ctxt->attsSpecial == NULL) 1269 goto mem_error; 1270 } 1271 1272 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1273 return; 1274 1275 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1276 (void *) (long) type); 1277 return; 1278 1279mem_error: 1280 xmlErrMemory(ctxt, NULL); 1281 return; 1282} 1283 1284/** 1285 * xmlCleanSpecialAttrCallback: 1286 * 1287 * Removes CDATA attributes from the special attribute table 1288 */ 1289static void 1290xmlCleanSpecialAttrCallback(void *payload, void *data, 1291 const xmlChar *fullname, const xmlChar *fullattr, 1292 const xmlChar *unused ATTRIBUTE_UNUSED) { 1293 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1294 1295 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1296 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1297 } 1298} 1299 1300/** 1301 * xmlCleanSpecialAttr: 1302 * @ctxt: an XML parser context 1303 * 1304 * Trim the list of attributes defined to remove all those of type 1305 * CDATA as they are not special. This call should be done when finishing 1306 * to parse the DTD and before starting to parse the document root. 1307 */ 1308static void 1309xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1310{ 1311 if (ctxt->attsSpecial == NULL) 1312 return; 1313 1314 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1315 1316 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1317 xmlHashFree(ctxt->attsSpecial, NULL); 1318 ctxt->attsSpecial = NULL; 1319 } 1320 return; 1321} 1322 1323/** 1324 * xmlCheckLanguageID: 1325 * @lang: pointer to the string value 1326 * 1327 * Checks that the value conforms to the LanguageID production: 1328 * 1329 * NOTE: this is somewhat deprecated, those productions were removed from 1330 * the XML Second edition. 1331 * 1332 * [33] LanguageID ::= Langcode ('-' Subcode)* 1333 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1334 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1335 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1336 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1337 * [38] Subcode ::= ([a-z] | [A-Z])+ 1338 * 1339 * The current REC reference the sucessors of RFC 1766, currently 5646 1340 * 1341 * http://www.rfc-editor.org/rfc/rfc5646.txt 1342 * langtag = language 1343 * ["-" script] 1344 * ["-" region] 1345 * *("-" variant) 1346 * *("-" extension) 1347 * ["-" privateuse] 1348 * language = 2*3ALPHA ; shortest ISO 639 code 1349 * ["-" extlang] ; sometimes followed by 1350 * ; extended language subtags 1351 * / 4ALPHA ; or reserved for future use 1352 * / 5*8ALPHA ; or registered language subtag 1353 * 1354 * extlang = 3ALPHA ; selected ISO 639 codes 1355 * *2("-" 3ALPHA) ; permanently reserved 1356 * 1357 * script = 4ALPHA ; ISO 15924 code 1358 * 1359 * region = 2ALPHA ; ISO 3166-1 code 1360 * / 3DIGIT ; UN M.49 code 1361 * 1362 * variant = 5*8alphanum ; registered variants 1363 * / (DIGIT 3alphanum) 1364 * 1365 * extension = singleton 1*("-" (2*8alphanum)) 1366 * 1367 * ; Single alphanumerics 1368 * ; "x" reserved for private use 1369 * singleton = DIGIT ; 0 - 9 1370 * / %x41-57 ; A - W 1371 * / %x59-5A ; Y - Z 1372 * / %x61-77 ; a - w 1373 * / %x79-7A ; y - z 1374 * 1375 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1376 * The parser below doesn't try to cope with extension or privateuse 1377 * that could be added but that's not interoperable anyway 1378 * 1379 * Returns 1 if correct 0 otherwise 1380 **/ 1381int 1382xmlCheckLanguageID(const xmlChar * lang) 1383{ 1384 const xmlChar *cur = lang, *nxt; 1385 1386 if (cur == NULL) 1387 return (0); 1388 if (((cur[0] == 'i') && (cur[1] == '-')) || 1389 ((cur[0] == 'I') && (cur[1] == '-')) || 1390 ((cur[0] == 'x') && (cur[1] == '-')) || 1391 ((cur[0] == 'X') && (cur[1] == '-'))) { 1392 /* 1393 * Still allow IANA code and user code which were coming 1394 * from the previous version of the XML-1.0 specification 1395 * it's deprecated but we should not fail 1396 */ 1397 cur += 2; 1398 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1399 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1400 cur++; 1401 return(cur[0] == 0); 1402 } 1403 nxt = cur; 1404 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1405 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1406 nxt++; 1407 if (nxt - cur >= 4) { 1408 /* 1409 * Reserved 1410 */ 1411 if ((nxt - cur > 8) || (nxt[0] != 0)) 1412 return(0); 1413 return(1); 1414 } 1415 if (nxt - cur < 2) 1416 return(0); 1417 /* we got an ISO 639 code */ 1418 if (nxt[0] == 0) 1419 return(1); 1420 if (nxt[0] != '-') 1421 return(0); 1422 1423 nxt++; 1424 cur = nxt; 1425 /* now we can have extlang or script or region or variant */ 1426 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1427 goto region_m49; 1428 1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1431 nxt++; 1432 if (nxt - cur == 4) 1433 goto script; 1434 if (nxt - cur == 2) 1435 goto region; 1436 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1437 goto variant; 1438 if (nxt - cur != 3) 1439 return(0); 1440 /* we parsed an extlang */ 1441 if (nxt[0] == 0) 1442 return(1); 1443 if (nxt[0] != '-') 1444 return(0); 1445 1446 nxt++; 1447 cur = nxt; 1448 /* now we can have script or region or variant */ 1449 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1450 goto region_m49; 1451 1452 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1453 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1454 nxt++; 1455 if (nxt - cur == 2) 1456 goto region; 1457 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1458 goto variant; 1459 if (nxt - cur != 4) 1460 return(0); 1461 /* we parsed a script */ 1462script: 1463 if (nxt[0] == 0) 1464 return(1); 1465 if (nxt[0] != '-') 1466 return(0); 1467 1468 nxt++; 1469 cur = nxt; 1470 /* now we can have region or variant */ 1471 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1472 goto region_m49; 1473 1474 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1475 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1476 nxt++; 1477 1478 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1479 goto variant; 1480 if (nxt - cur != 2) 1481 return(0); 1482 /* we parsed a region */ 1483region: 1484 if (nxt[0] == 0) 1485 return(1); 1486 if (nxt[0] != '-') 1487 return(0); 1488 1489 nxt++; 1490 cur = nxt; 1491 /* now we can just have a variant */ 1492 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1493 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1494 nxt++; 1495 1496 if ((nxt - cur < 5) || (nxt - cur > 8)) 1497 return(0); 1498 1499 /* we parsed a variant */ 1500variant: 1501 if (nxt[0] == 0) 1502 return(1); 1503 if (nxt[0] != '-') 1504 return(0); 1505 /* extensions and private use subtags not checked */ 1506 return (1); 1507 1508region_m49: 1509 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1510 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1511 nxt += 3; 1512 goto region; 1513 } 1514 return(0); 1515} 1516 1517/************************************************************************ 1518 * * 1519 * Parser stacks related functions and macros * 1520 * * 1521 ************************************************************************/ 1522 1523static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1524 const xmlChar ** str); 1525 1526#ifdef SAX2 1527/** 1528 * nsPush: 1529 * @ctxt: an XML parser context 1530 * @prefix: the namespace prefix or NULL 1531 * @URL: the namespace name 1532 * 1533 * Pushes a new parser namespace on top of the ns stack 1534 * 1535 * Returns -1 in case of error, -2 if the namespace should be discarded 1536 * and the index in the stack otherwise. 1537 */ 1538static int 1539nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1540{ 1541 if (ctxt->options & XML_PARSE_NSCLEAN) { 1542 int i; 1543 for (i = 0;i < ctxt->nsNr;i += 2) { 1544 if (ctxt->nsTab[i] == prefix) { 1545 /* in scope */ 1546 if (ctxt->nsTab[i + 1] == URL) 1547 return(-2); 1548 /* out of scope keep it */ 1549 break; 1550 } 1551 } 1552 } 1553 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1554 ctxt->nsMax = 10; 1555 ctxt->nsNr = 0; 1556 ctxt->nsTab = (const xmlChar **) 1557 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1558 if (ctxt->nsTab == NULL) { 1559 xmlErrMemory(ctxt, NULL); 1560 ctxt->nsMax = 0; 1561 return (-1); 1562 } 1563 } else if (ctxt->nsNr >= ctxt->nsMax) { 1564 const xmlChar ** tmp; 1565 ctxt->nsMax *= 2; 1566 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1567 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1568 if (tmp == NULL) { 1569 xmlErrMemory(ctxt, NULL); 1570 ctxt->nsMax /= 2; 1571 return (-1); 1572 } 1573 ctxt->nsTab = tmp; 1574 } 1575 ctxt->nsTab[ctxt->nsNr++] = prefix; 1576 ctxt->nsTab[ctxt->nsNr++] = URL; 1577 return (ctxt->nsNr); 1578} 1579/** 1580 * nsPop: 1581 * @ctxt: an XML parser context 1582 * @nr: the number to pop 1583 * 1584 * Pops the top @nr parser prefix/namespace from the ns stack 1585 * 1586 * Returns the number of namespaces removed 1587 */ 1588static int 1589nsPop(xmlParserCtxtPtr ctxt, int nr) 1590{ 1591 int i; 1592 1593 if (ctxt->nsTab == NULL) return(0); 1594 if (ctxt->nsNr < nr) { 1595 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1596 nr = ctxt->nsNr; 1597 } 1598 if (ctxt->nsNr <= 0) 1599 return (0); 1600 1601 for (i = 0;i < nr;i++) { 1602 ctxt->nsNr--; 1603 ctxt->nsTab[ctxt->nsNr] = NULL; 1604 } 1605 return(nr); 1606} 1607#endif 1608 1609static int 1610xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1611 const xmlChar **atts; 1612 int *attallocs; 1613 int maxatts; 1614 1615 if (ctxt->atts == NULL) { 1616 maxatts = 55; /* allow for 10 attrs by default */ 1617 atts = (const xmlChar **) 1618 xmlMalloc(maxatts * sizeof(xmlChar *)); 1619 if (atts == NULL) goto mem_error; 1620 ctxt->atts = atts; 1621 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1622 if (attallocs == NULL) goto mem_error; 1623 ctxt->attallocs = attallocs; 1624 ctxt->maxatts = maxatts; 1625 } else if (nr + 5 > ctxt->maxatts) { 1626 maxatts = (nr + 5) * 2; 1627 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1628 maxatts * sizeof(const xmlChar *)); 1629 if (atts == NULL) goto mem_error; 1630 ctxt->atts = atts; 1631 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1632 (maxatts / 5) * sizeof(int)); 1633 if (attallocs == NULL) goto mem_error; 1634 ctxt->attallocs = attallocs; 1635 ctxt->maxatts = maxatts; 1636 } 1637 return(ctxt->maxatts); 1638mem_error: 1639 xmlErrMemory(ctxt, NULL); 1640 return(-1); 1641} 1642 1643/** 1644 * inputPush: 1645 * @ctxt: an XML parser context 1646 * @value: the parser input 1647 * 1648 * Pushes a new parser input on top of the input stack 1649 * 1650 * Returns -1 in case of error, the index in the stack otherwise 1651 */ 1652int 1653inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1654{ 1655 if ((ctxt == NULL) || (value == NULL)) 1656 return(-1); 1657 if (ctxt->inputNr >= ctxt->inputMax) { 1658 ctxt->inputMax *= 2; 1659 ctxt->inputTab = 1660 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1661 ctxt->inputMax * 1662 sizeof(ctxt->inputTab[0])); 1663 if (ctxt->inputTab == NULL) { 1664 xmlErrMemory(ctxt, NULL); 1665 xmlFreeInputStream(value); 1666 ctxt->inputMax /= 2; 1667 value = NULL; 1668 return (-1); 1669 } 1670 } 1671 ctxt->inputTab[ctxt->inputNr] = value; 1672 ctxt->input = value; 1673 return (ctxt->inputNr++); 1674} 1675/** 1676 * inputPop: 1677 * @ctxt: an XML parser context 1678 * 1679 * Pops the top parser input from the input stack 1680 * 1681 * Returns the input just removed 1682 */ 1683xmlParserInputPtr 1684inputPop(xmlParserCtxtPtr ctxt) 1685{ 1686 xmlParserInputPtr ret; 1687 1688 if (ctxt == NULL) 1689 return(NULL); 1690 if (ctxt->inputNr <= 0) 1691 return (NULL); 1692 ctxt->inputNr--; 1693 if (ctxt->inputNr > 0) 1694 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1695 else 1696 ctxt->input = NULL; 1697 ret = ctxt->inputTab[ctxt->inputNr]; 1698 ctxt->inputTab[ctxt->inputNr] = NULL; 1699 return (ret); 1700} 1701/** 1702 * nodePush: 1703 * @ctxt: an XML parser context 1704 * @value: the element node 1705 * 1706 * Pushes a new element node on top of the node stack 1707 * 1708 * Returns -1 in case of error, the index in the stack otherwise 1709 */ 1710int 1711nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1712{ 1713 if (ctxt == NULL) return(0); 1714 if (ctxt->nodeNr >= ctxt->nodeMax) { 1715 xmlNodePtr *tmp; 1716 1717 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1718 ctxt->nodeMax * 2 * 1719 sizeof(ctxt->nodeTab[0])); 1720 if (tmp == NULL) { 1721 xmlErrMemory(ctxt, NULL); 1722 return (-1); 1723 } 1724 ctxt->nodeTab = tmp; 1725 ctxt->nodeMax *= 2; 1726 } 1727 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1728 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1729 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1730 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1731 xmlParserMaxDepth); 1732 ctxt->instate = XML_PARSER_EOF; 1733 return(-1); 1734 } 1735 ctxt->nodeTab[ctxt->nodeNr] = value; 1736 ctxt->node = value; 1737 return (ctxt->nodeNr++); 1738} 1739 1740/** 1741 * nodePop: 1742 * @ctxt: an XML parser context 1743 * 1744 * Pops the top element node from the node stack 1745 * 1746 * Returns the node just removed 1747 */ 1748xmlNodePtr 1749nodePop(xmlParserCtxtPtr ctxt) 1750{ 1751 xmlNodePtr ret; 1752 1753 if (ctxt == NULL) return(NULL); 1754 if (ctxt->nodeNr <= 0) 1755 return (NULL); 1756 ctxt->nodeNr--; 1757 if (ctxt->nodeNr > 0) 1758 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1759 else 1760 ctxt->node = NULL; 1761 ret = ctxt->nodeTab[ctxt->nodeNr]; 1762 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1763 return (ret); 1764} 1765 1766#ifdef LIBXML_PUSH_ENABLED 1767/** 1768 * nameNsPush: 1769 * @ctxt: an XML parser context 1770 * @value: the element name 1771 * @prefix: the element prefix 1772 * @URI: the element namespace name 1773 * 1774 * Pushes a new element name/prefix/URL on top of the name stack 1775 * 1776 * Returns -1 in case of error, the index in the stack otherwise 1777 */ 1778static int 1779nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1780 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1781{ 1782 if (ctxt->nameNr >= ctxt->nameMax) { 1783 const xmlChar * *tmp; 1784 void **tmp2; 1785 ctxt->nameMax *= 2; 1786 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1787 ctxt->nameMax * 1788 sizeof(ctxt->nameTab[0])); 1789 if (tmp == NULL) { 1790 ctxt->nameMax /= 2; 1791 goto mem_error; 1792 } 1793 ctxt->nameTab = tmp; 1794 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1795 ctxt->nameMax * 3 * 1796 sizeof(ctxt->pushTab[0])); 1797 if (tmp2 == NULL) { 1798 ctxt->nameMax /= 2; 1799 goto mem_error; 1800 } 1801 ctxt->pushTab = tmp2; 1802 } 1803 ctxt->nameTab[ctxt->nameNr] = value; 1804 ctxt->name = value; 1805 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1806 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1807 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1808 return (ctxt->nameNr++); 1809mem_error: 1810 xmlErrMemory(ctxt, NULL); 1811 return (-1); 1812} 1813/** 1814 * nameNsPop: 1815 * @ctxt: an XML parser context 1816 * 1817 * Pops the top element/prefix/URI name from the name stack 1818 * 1819 * Returns the name just removed 1820 */ 1821static const xmlChar * 1822nameNsPop(xmlParserCtxtPtr ctxt) 1823{ 1824 const xmlChar *ret; 1825 1826 if (ctxt->nameNr <= 0) 1827 return (NULL); 1828 ctxt->nameNr--; 1829 if (ctxt->nameNr > 0) 1830 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1831 else 1832 ctxt->name = NULL; 1833 ret = ctxt->nameTab[ctxt->nameNr]; 1834 ctxt->nameTab[ctxt->nameNr] = NULL; 1835 return (ret); 1836} 1837#endif /* LIBXML_PUSH_ENABLED */ 1838 1839/** 1840 * namePush: 1841 * @ctxt: an XML parser context 1842 * @value: the element name 1843 * 1844 * Pushes a new element name on top of the name stack 1845 * 1846 * Returns -1 in case of error, the index in the stack otherwise 1847 */ 1848int 1849namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1850{ 1851 if (ctxt == NULL) return (-1); 1852 1853 if (ctxt->nameNr >= ctxt->nameMax) { 1854 const xmlChar * *tmp; 1855 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1856 ctxt->nameMax * 2 * 1857 sizeof(ctxt->nameTab[0])); 1858 if (tmp == NULL) { 1859 goto mem_error; 1860 } 1861 ctxt->nameTab = tmp; 1862 ctxt->nameMax *= 2; 1863 } 1864 ctxt->nameTab[ctxt->nameNr] = value; 1865 ctxt->name = value; 1866 return (ctxt->nameNr++); 1867mem_error: 1868 xmlErrMemory(ctxt, NULL); 1869 return (-1); 1870} 1871/** 1872 * namePop: 1873 * @ctxt: an XML parser context 1874 * 1875 * Pops the top element name from the name stack 1876 * 1877 * Returns the name just removed 1878 */ 1879const xmlChar * 1880namePop(xmlParserCtxtPtr ctxt) 1881{ 1882 const xmlChar *ret; 1883 1884 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1885 return (NULL); 1886 ctxt->nameNr--; 1887 if (ctxt->nameNr > 0) 1888 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1889 else 1890 ctxt->name = NULL; 1891 ret = ctxt->nameTab[ctxt->nameNr]; 1892 ctxt->nameTab[ctxt->nameNr] = NULL; 1893 return (ret); 1894} 1895 1896static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1897 if (ctxt->spaceNr >= ctxt->spaceMax) { 1898 int *tmp; 1899 1900 ctxt->spaceMax *= 2; 1901 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1902 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1903 if (tmp == NULL) { 1904 xmlErrMemory(ctxt, NULL); 1905 ctxt->spaceMax /=2; 1906 return(-1); 1907 } 1908 ctxt->spaceTab = tmp; 1909 } 1910 ctxt->spaceTab[ctxt->spaceNr] = val; 1911 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1912 return(ctxt->spaceNr++); 1913} 1914 1915static int spacePop(xmlParserCtxtPtr ctxt) { 1916 int ret; 1917 if (ctxt->spaceNr <= 0) return(0); 1918 ctxt->spaceNr--; 1919 if (ctxt->spaceNr > 0) 1920 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1921 else 1922 ctxt->space = &ctxt->spaceTab[0]; 1923 ret = ctxt->spaceTab[ctxt->spaceNr]; 1924 ctxt->spaceTab[ctxt->spaceNr] = -1; 1925 return(ret); 1926} 1927 1928/* 1929 * Macros for accessing the content. Those should be used only by the parser, 1930 * and not exported. 1931 * 1932 * Dirty macros, i.e. one often need to make assumption on the context to 1933 * use them 1934 * 1935 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1936 * To be used with extreme caution since operations consuming 1937 * characters may move the input buffer to a different location ! 1938 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1939 * This should be used internally by the parser 1940 * only to compare to ASCII values otherwise it would break when 1941 * running with UTF-8 encoding. 1942 * RAW same as CUR but in the input buffer, bypass any token 1943 * extraction that may have been done 1944 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1945 * to compare on ASCII based substring. 1946 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1947 * strings without newlines within the parser. 1948 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1949 * defined char within the parser. 1950 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1951 * 1952 * NEXT Skip to the next character, this does the proper decoding 1953 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1954 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1955 * CUR_CHAR(l) returns the current unicode character (int), set l 1956 * to the number of xmlChars used for the encoding [0-5]. 1957 * CUR_SCHAR same but operate on a string instead of the context 1958 * COPY_BUF copy the current unicode char to the target buffer, increment 1959 * the index 1960 * GROW, SHRINK handling of input buffers 1961 */ 1962 1963#define RAW (*ctxt->input->cur) 1964#define CUR (*ctxt->input->cur) 1965#define NXT(val) ctxt->input->cur[(val)] 1966#define CUR_PTR ctxt->input->cur 1967 1968#define CMP4( s, c1, c2, c3, c4 ) \ 1969 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1970 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1971#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1972 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1973#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1974 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1975#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1976 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1977#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1978 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1979#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1980 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1981 ((unsigned char *) s)[ 8 ] == c9 ) 1982#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1983 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1984 ((unsigned char *) s)[ 9 ] == c10 ) 1985 1986#define SKIP(val) do { \ 1987 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1988 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1989 if ((*ctxt->input->cur == 0) && \ 1990 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1991 xmlPopInput(ctxt); \ 1992 } while (0) 1993 1994#define SKIPL(val) do { \ 1995 int skipl; \ 1996 for(skipl=0; skipl<val; skipl++) { \ 1997 if (*(ctxt->input->cur) == '\n') { \ 1998 ctxt->input->line++; ctxt->input->col = 1; \ 1999 } else ctxt->input->col++; \ 2000 ctxt->nbChars++; \ 2001 ctxt->input->cur++; \ 2002 } \ 2003 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2004 if ((*ctxt->input->cur == 0) && \ 2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2006 xmlPopInput(ctxt); \ 2007 } while (0) 2008 2009#define SHRINK if ((ctxt->progressive == 0) && \ 2010 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2011 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2012 xmlSHRINK (ctxt); 2013 2014static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2015 xmlParserInputShrink(ctxt->input); 2016 if ((*ctxt->input->cur == 0) && 2017 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2018 xmlPopInput(ctxt); 2019 } 2020 2021#define GROW if ((ctxt->progressive == 0) && \ 2022 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2023 xmlGROW (ctxt); 2024 2025static void xmlGROW (xmlParserCtxtPtr ctxt) { 2026 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 2027 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 2028 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2029 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2030 ctxt->instate = XML_PARSER_EOF; 2031 } 2032 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2033 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2035 xmlPopInput(ctxt); 2036} 2037 2038#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2039 2040#define NEXT xmlNextChar(ctxt) 2041 2042#define NEXT1 { \ 2043 ctxt->input->col++; \ 2044 ctxt->input->cur++; \ 2045 ctxt->nbChars++; \ 2046 if (*ctxt->input->cur == 0) \ 2047 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2048 } 2049 2050#define NEXTL(l) do { \ 2051 if (*(ctxt->input->cur) == '\n') { \ 2052 ctxt->input->line++; ctxt->input->col = 1; \ 2053 } else ctxt->input->col++; \ 2054 ctxt->input->cur += l; \ 2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2056 } while (0) 2057 2058#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2059#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2060 2061#define COPY_BUF(l,b,i,v) \ 2062 if (l == 1) b[i++] = (xmlChar) v; \ 2063 else i += xmlCopyCharMultiByte(&b[i],v) 2064 2065/** 2066 * xmlSkipBlankChars: 2067 * @ctxt: the XML parser context 2068 * 2069 * skip all blanks character found at that point in the input streams. 2070 * It pops up finished entities in the process if allowable at that point. 2071 * 2072 * Returns the number of space chars skipped 2073 */ 2074 2075int 2076xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2077 int res = 0; 2078 2079 /* 2080 * It's Okay to use CUR/NEXT here since all the blanks are on 2081 * the ASCII range. 2082 */ 2083 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2084 const xmlChar *cur; 2085 /* 2086 * if we are in the document content, go really fast 2087 */ 2088 cur = ctxt->input->cur; 2089 while (IS_BLANK_CH(*cur)) { 2090 if (*cur == '\n') { 2091 ctxt->input->line++; ctxt->input->col = 1; 2092 } 2093 cur++; 2094 res++; 2095 if (*cur == 0) { 2096 ctxt->input->cur = cur; 2097 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2098 cur = ctxt->input->cur; 2099 } 2100 } 2101 ctxt->input->cur = cur; 2102 } else { 2103 int cur; 2104 do { 2105 cur = CUR; 2106 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2107 NEXT; 2108 cur = CUR; 2109 res++; 2110 } 2111 while ((cur == 0) && (ctxt->inputNr > 1) && 2112 (ctxt->instate != XML_PARSER_COMMENT)) { 2113 xmlPopInput(ctxt); 2114 cur = CUR; 2115 } 2116 /* 2117 * Need to handle support of entities branching here 2118 */ 2119 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2120 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2121 } 2122 return(res); 2123} 2124 2125/************************************************************************ 2126 * * 2127 * Commodity functions to handle entities * 2128 * * 2129 ************************************************************************/ 2130 2131/** 2132 * xmlPopInput: 2133 * @ctxt: an XML parser context 2134 * 2135 * xmlPopInput: the current input pointed by ctxt->input came to an end 2136 * pop it and return the next char. 2137 * 2138 * Returns the current xmlChar in the parser context 2139 */ 2140xmlChar 2141xmlPopInput(xmlParserCtxtPtr ctxt) { 2142 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2143 if (xmlParserDebugEntities) 2144 xmlGenericError(xmlGenericErrorContext, 2145 "Popping input %d\n", ctxt->inputNr); 2146 xmlFreeInputStream(inputPop(ctxt)); 2147 if ((*ctxt->input->cur == 0) && 2148 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2149 return(xmlPopInput(ctxt)); 2150 return(CUR); 2151} 2152 2153/** 2154 * xmlPushInput: 2155 * @ctxt: an XML parser context 2156 * @input: an XML parser input fragment (entity, XML fragment ...). 2157 * 2158 * xmlPushInput: switch to a new input stream which is stacked on top 2159 * of the previous one(s). 2160 * Returns -1 in case of error or the index in the input stack 2161 */ 2162int 2163xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2164 int ret; 2165 if (input == NULL) return(-1); 2166 2167 if (xmlParserDebugEntities) { 2168 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2169 xmlGenericError(xmlGenericErrorContext, 2170 "%s(%d): ", ctxt->input->filename, 2171 ctxt->input->line); 2172 xmlGenericError(xmlGenericErrorContext, 2173 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2174 } 2175 ret = inputPush(ctxt, input); 2176 if (ctxt->instate == XML_PARSER_EOF) 2177 return(-1); 2178 GROW; 2179 return(ret); 2180} 2181 2182/** 2183 * xmlParseCharRef: 2184 * @ctxt: an XML parser context 2185 * 2186 * parse Reference declarations 2187 * 2188 * [66] CharRef ::= '&#' [0-9]+ ';' | 2189 * '&#x' [0-9a-fA-F]+ ';' 2190 * 2191 * [ WFC: Legal Character ] 2192 * Characters referred to using character references must match the 2193 * production for Char. 2194 * 2195 * Returns the value parsed (as an int), 0 in case of error 2196 */ 2197int 2198xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2199 unsigned int val = 0; 2200 int count = 0; 2201 unsigned int outofrange = 0; 2202 2203 /* 2204 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2205 */ 2206 if ((RAW == '&') && (NXT(1) == '#') && 2207 (NXT(2) == 'x')) { 2208 SKIP(3); 2209 GROW; 2210 while (RAW != ';') { /* loop blocked by count */ 2211 if (count++ > 20) { 2212 count = 0; 2213 GROW; 2214 if (ctxt->instate == XML_PARSER_EOF) 2215 return(0); 2216 } 2217 if ((RAW >= '0') && (RAW <= '9')) 2218 val = val * 16 + (CUR - '0'); 2219 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2220 val = val * 16 + (CUR - 'a') + 10; 2221 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2222 val = val * 16 + (CUR - 'A') + 10; 2223 else { 2224 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2225 val = 0; 2226 break; 2227 } 2228 if (val > 0x10FFFF) 2229 outofrange = val; 2230 2231 NEXT; 2232 count++; 2233 } 2234 if (RAW == ';') { 2235 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2236 ctxt->input->col++; 2237 ctxt->nbChars ++; 2238 ctxt->input->cur++; 2239 } 2240 } else if ((RAW == '&') && (NXT(1) == '#')) { 2241 SKIP(2); 2242 GROW; 2243 while (RAW != ';') { /* loop blocked by count */ 2244 if (count++ > 20) { 2245 count = 0; 2246 GROW; 2247 if (ctxt->instate == XML_PARSER_EOF) 2248 return(0); 2249 } 2250 if ((RAW >= '0') && (RAW <= '9')) 2251 val = val * 10 + (CUR - '0'); 2252 else { 2253 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2254 val = 0; 2255 break; 2256 } 2257 if (val > 0x10FFFF) 2258 outofrange = val; 2259 2260 NEXT; 2261 count++; 2262 } 2263 if (RAW == ';') { 2264 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2265 ctxt->input->col++; 2266 ctxt->nbChars ++; 2267 ctxt->input->cur++; 2268 } 2269 } else { 2270 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2271 } 2272 2273 /* 2274 * [ WFC: Legal Character ] 2275 * Characters referred to using character references must match the 2276 * production for Char. 2277 */ 2278 if ((IS_CHAR(val) && (outofrange == 0))) { 2279 return(val); 2280 } else { 2281 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2282 "xmlParseCharRef: invalid xmlChar value %d\n", 2283 val); 2284 } 2285 return(0); 2286} 2287 2288/** 2289 * xmlParseStringCharRef: 2290 * @ctxt: an XML parser context 2291 * @str: a pointer to an index in the string 2292 * 2293 * parse Reference declarations, variant parsing from a string rather 2294 * than an an input flow. 2295 * 2296 * [66] CharRef ::= '&#' [0-9]+ ';' | 2297 * '&#x' [0-9a-fA-F]+ ';' 2298 * 2299 * [ WFC: Legal Character ] 2300 * Characters referred to using character references must match the 2301 * production for Char. 2302 * 2303 * Returns the value parsed (as an int), 0 in case of error, str will be 2304 * updated to the current value of the index 2305 */ 2306static int 2307xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2308 const xmlChar *ptr; 2309 xmlChar cur; 2310 unsigned int val = 0; 2311 unsigned int outofrange = 0; 2312 2313 if ((str == NULL) || (*str == NULL)) return(0); 2314 ptr = *str; 2315 cur = *ptr; 2316 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2317 ptr += 3; 2318 cur = *ptr; 2319 while (cur != ';') { /* Non input consuming loop */ 2320 if ((cur >= '0') && (cur <= '9')) 2321 val = val * 16 + (cur - '0'); 2322 else if ((cur >= 'a') && (cur <= 'f')) 2323 val = val * 16 + (cur - 'a') + 10; 2324 else if ((cur >= 'A') && (cur <= 'F')) 2325 val = val * 16 + (cur - 'A') + 10; 2326 else { 2327 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2328 val = 0; 2329 break; 2330 } 2331 if (val > 0x10FFFF) 2332 outofrange = val; 2333 2334 ptr++; 2335 cur = *ptr; 2336 } 2337 if (cur == ';') 2338 ptr++; 2339 } else if ((cur == '&') && (ptr[1] == '#')){ 2340 ptr += 2; 2341 cur = *ptr; 2342 while (cur != ';') { /* Non input consuming loops */ 2343 if ((cur >= '0') && (cur <= '9')) 2344 val = val * 10 + (cur - '0'); 2345 else { 2346 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2347 val = 0; 2348 break; 2349 } 2350 if (val > 0x10FFFF) 2351 outofrange = val; 2352 2353 ptr++; 2354 cur = *ptr; 2355 } 2356 if (cur == ';') 2357 ptr++; 2358 } else { 2359 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2360 return(0); 2361 } 2362 *str = ptr; 2363 2364 /* 2365 * [ WFC: Legal Character ] 2366 * Characters referred to using character references must match the 2367 * production for Char. 2368 */ 2369 if ((IS_CHAR(val) && (outofrange == 0))) { 2370 return(val); 2371 } else { 2372 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2373 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2374 val); 2375 } 2376 return(0); 2377} 2378 2379/** 2380 * xmlNewBlanksWrapperInputStream: 2381 * @ctxt: an XML parser context 2382 * @entity: an Entity pointer 2383 * 2384 * Create a new input stream for wrapping 2385 * blanks around a PEReference 2386 * 2387 * Returns the new input stream or NULL 2388 */ 2389 2390static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2391 2392static xmlParserInputPtr 2393xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2394 xmlParserInputPtr input; 2395 xmlChar *buffer; 2396 size_t length; 2397 if (entity == NULL) { 2398 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2399 "xmlNewBlanksWrapperInputStream entity\n"); 2400 return(NULL); 2401 } 2402 if (xmlParserDebugEntities) 2403 xmlGenericError(xmlGenericErrorContext, 2404 "new blanks wrapper for entity: %s\n", entity->name); 2405 input = xmlNewInputStream(ctxt); 2406 if (input == NULL) { 2407 return(NULL); 2408 } 2409 length = xmlStrlen(entity->name) + 5; 2410 buffer = xmlMallocAtomic(length); 2411 if (buffer == NULL) { 2412 xmlErrMemory(ctxt, NULL); 2413 xmlFree(input); 2414 return(NULL); 2415 } 2416 buffer [0] = ' '; 2417 buffer [1] = '%'; 2418 buffer [length-3] = ';'; 2419 buffer [length-2] = ' '; 2420 buffer [length-1] = 0; 2421 memcpy(buffer + 2, entity->name, length - 5); 2422 input->free = deallocblankswrapper; 2423 input->base = buffer; 2424 input->cur = buffer; 2425 input->length = length; 2426 input->end = &buffer[length]; 2427 return(input); 2428} 2429 2430/** 2431 * xmlParserHandlePEReference: 2432 * @ctxt: the parser context 2433 * 2434 * [69] PEReference ::= '%' Name ';' 2435 * 2436 * [ WFC: No Recursion ] 2437 * A parsed entity must not contain a recursive 2438 * reference to itself, either directly or indirectly. 2439 * 2440 * [ WFC: Entity Declared ] 2441 * In a document without any DTD, a document with only an internal DTD 2442 * subset which contains no parameter entity references, or a document 2443 * with "standalone='yes'", ... ... The declaration of a parameter 2444 * entity must precede any reference to it... 2445 * 2446 * [ VC: Entity Declared ] 2447 * In a document with an external subset or external parameter entities 2448 * with "standalone='no'", ... ... The declaration of a parameter entity 2449 * must precede any reference to it... 2450 * 2451 * [ WFC: In DTD ] 2452 * Parameter-entity references may only appear in the DTD. 2453 * NOTE: misleading but this is handled. 2454 * 2455 * A PEReference may have been detected in the current input stream 2456 * the handling is done accordingly to 2457 * http://www.w3.org/TR/REC-xml#entproc 2458 * i.e. 2459 * - Included in literal in entity values 2460 * - Included as Parameter Entity reference within DTDs 2461 */ 2462void 2463xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2464 const xmlChar *name; 2465 xmlEntityPtr entity = NULL; 2466 xmlParserInputPtr input; 2467 2468 if (RAW != '%') return; 2469 switch(ctxt->instate) { 2470 case XML_PARSER_CDATA_SECTION: 2471 return; 2472 case XML_PARSER_COMMENT: 2473 return; 2474 case XML_PARSER_START_TAG: 2475 return; 2476 case XML_PARSER_END_TAG: 2477 return; 2478 case XML_PARSER_EOF: 2479 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2480 return; 2481 case XML_PARSER_PROLOG: 2482 case XML_PARSER_START: 2483 case XML_PARSER_MISC: 2484 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2485 return; 2486 case XML_PARSER_ENTITY_DECL: 2487 case XML_PARSER_CONTENT: 2488 case XML_PARSER_ATTRIBUTE_VALUE: 2489 case XML_PARSER_PI: 2490 case XML_PARSER_SYSTEM_LITERAL: 2491 case XML_PARSER_PUBLIC_LITERAL: 2492 /* we just ignore it there */ 2493 return; 2494 case XML_PARSER_EPILOG: 2495 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2496 return; 2497 case XML_PARSER_ENTITY_VALUE: 2498 /* 2499 * NOTE: in the case of entity values, we don't do the 2500 * substitution here since we need the literal 2501 * entity value to be able to save the internal 2502 * subset of the document. 2503 * This will be handled by xmlStringDecodeEntities 2504 */ 2505 return; 2506 case XML_PARSER_DTD: 2507 /* 2508 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2509 * In the internal DTD subset, parameter-entity references 2510 * can occur only where markup declarations can occur, not 2511 * within markup declarations. 2512 * In that case this is handled in xmlParseMarkupDecl 2513 */ 2514 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2515 return; 2516 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2517 return; 2518 break; 2519 case XML_PARSER_IGNORE: 2520 return; 2521 } 2522 2523 NEXT; 2524 name = xmlParseName(ctxt); 2525 if (xmlParserDebugEntities) 2526 xmlGenericError(xmlGenericErrorContext, 2527 "PEReference: %s\n", name); 2528 if (name == NULL) { 2529 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2530 } else { 2531 if (RAW == ';') { 2532 NEXT; 2533 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2534 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2535 if (entity == NULL) { 2536 2537 /* 2538 * [ WFC: Entity Declared ] 2539 * In a document without any DTD, a document with only an 2540 * internal DTD subset which contains no parameter entity 2541 * references, or a document with "standalone='yes'", ... 2542 * ... The declaration of a parameter entity must precede 2543 * any reference to it... 2544 */ 2545 if ((ctxt->standalone == 1) || 2546 ((ctxt->hasExternalSubset == 0) && 2547 (ctxt->hasPErefs == 0))) { 2548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2549 "PEReference: %%%s; not found\n", name); 2550 } else { 2551 /* 2552 * [ VC: Entity Declared ] 2553 * In a document with an external subset or external 2554 * parameter entities with "standalone='no'", ... 2555 * ... The declaration of a parameter entity must precede 2556 * any reference to it... 2557 */ 2558 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2559 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2560 "PEReference: %%%s; not found\n", 2561 name, NULL); 2562 } else 2563 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2564 "PEReference: %%%s; not found\n", 2565 name, NULL); 2566 ctxt->valid = 0; 2567 } 2568 } else if (ctxt->input->free != deallocblankswrapper) { 2569 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2570 if (xmlPushInput(ctxt, input) < 0) 2571 return; 2572 } else { 2573 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2574 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2575 xmlChar start[4]; 2576 xmlCharEncoding enc; 2577 2578 /* 2579 * handle the extra spaces added before and after 2580 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2581 * this is done independently. 2582 */ 2583 input = xmlNewEntityInputStream(ctxt, entity); 2584 if (xmlPushInput(ctxt, input) < 0) 2585 return; 2586 2587 /* 2588 * Get the 4 first bytes and decode the charset 2589 * if enc != XML_CHAR_ENCODING_NONE 2590 * plug some encoding conversion routines. 2591 * Note that, since we may have some non-UTF8 2592 * encoding (like UTF16, bug 135229), the 'length' 2593 * is not known, but we can calculate based upon 2594 * the amount of data in the buffer. 2595 */ 2596 GROW 2597 if (ctxt->instate == XML_PARSER_EOF) 2598 return; 2599 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2600 start[0] = RAW; 2601 start[1] = NXT(1); 2602 start[2] = NXT(2); 2603 start[3] = NXT(3); 2604 enc = xmlDetectCharEncoding(start, 4); 2605 if (enc != XML_CHAR_ENCODING_NONE) { 2606 xmlSwitchEncoding(ctxt, enc); 2607 } 2608 } 2609 2610 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2611 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2612 (IS_BLANK_CH(NXT(5)))) { 2613 xmlParseTextDecl(ctxt); 2614 } 2615 } else { 2616 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2617 "PEReference: %s is not a parameter entity\n", 2618 name); 2619 } 2620 } 2621 } else { 2622 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2623 } 2624 } 2625} 2626 2627/* 2628 * Macro used to grow the current buffer. 2629 * buffer##_size is expected to be a size_t 2630 * mem_error: is expected to handle memory allocation failures 2631 */ 2632#define growBuffer(buffer, n) { \ 2633 xmlChar *tmp; \ 2634 size_t new_size = buffer##_size * 2 + n; \ 2635 if (new_size < buffer##_size) goto mem_error; \ 2636 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2637 if (tmp == NULL) goto mem_error; \ 2638 buffer = tmp; \ 2639 buffer##_size = new_size; \ 2640} 2641 2642/** 2643 * xmlStringLenDecodeEntities: 2644 * @ctxt: the parser context 2645 * @str: the input string 2646 * @len: the string length 2647 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2648 * @end: an end marker xmlChar, 0 if none 2649 * @end2: an end marker xmlChar, 0 if none 2650 * @end3: an end marker xmlChar, 0 if none 2651 * 2652 * Takes a entity string content and process to do the adequate substitutions. 2653 * 2654 * [67] Reference ::= EntityRef | CharRef 2655 * 2656 * [69] PEReference ::= '%' Name ';' 2657 * 2658 * Returns A newly allocated string with the substitution done. The caller 2659 * must deallocate it ! 2660 */ 2661xmlChar * 2662xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2663 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2664 xmlChar *buffer = NULL; 2665 size_t buffer_size = 0; 2666 size_t nbchars = 0; 2667 2668 xmlChar *current = NULL; 2669 xmlChar *rep = NULL; 2670 const xmlChar *last; 2671 xmlEntityPtr ent; 2672 int c,l; 2673 2674 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2675 return(NULL); 2676 last = str + len; 2677 2678 if (((ctxt->depth > 40) && 2679 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2680 (ctxt->depth > 1024)) { 2681 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2682 return(NULL); 2683 } 2684 2685 /* 2686 * allocate a translation buffer. 2687 */ 2688 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2689 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2690 if (buffer == NULL) goto mem_error; 2691 2692 /* 2693 * OK loop until we reach one of the ending char or a size limit. 2694 * we are operating on already parsed values. 2695 */ 2696 if (str < last) 2697 c = CUR_SCHAR(str, l); 2698 else 2699 c = 0; 2700 while ((c != 0) && (c != end) && /* non input consuming loop */ 2701 (c != end2) && (c != end3)) { 2702 2703 if (c == 0) break; 2704 if ((c == '&') && (str[1] == '#')) { 2705 int val = xmlParseStringCharRef(ctxt, &str); 2706 if (val != 0) { 2707 COPY_BUF(0,buffer,nbchars,val); 2708 } 2709 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2710 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2711 } 2712 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2713 if (xmlParserDebugEntities) 2714 xmlGenericError(xmlGenericErrorContext, 2715 "String decoding Entity Reference: %.30s\n", 2716 str); 2717 ent = xmlParseStringEntityRef(ctxt, &str); 2718 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2719 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2720 goto int_error; 2721 if (ent != NULL) 2722 ctxt->nbentities += ent->checked; 2723 if ((ent != NULL) && 2724 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2725 if (ent->content != NULL) { 2726 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2728 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2729 } 2730 } else { 2731 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2732 "predefined entity has no content\n"); 2733 } 2734 } else if ((ent != NULL) && (ent->content != NULL)) { 2735 ctxt->depth++; 2736 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2737 0, 0, 0); 2738 ctxt->depth--; 2739 2740 if (rep != NULL) { 2741 current = rep; 2742 while (*current != 0) { /* non input consuming loop */ 2743 buffer[nbchars++] = *current++; 2744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2745 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2746 goto int_error; 2747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2748 } 2749 } 2750 xmlFree(rep); 2751 rep = NULL; 2752 } 2753 } else if (ent != NULL) { 2754 int i = xmlStrlen(ent->name); 2755 const xmlChar *cur = ent->name; 2756 2757 buffer[nbchars++] = '&'; 2758 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2759 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2760 } 2761 for (;i > 0;i--) 2762 buffer[nbchars++] = *cur++; 2763 buffer[nbchars++] = ';'; 2764 } 2765 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2766 if (xmlParserDebugEntities) 2767 xmlGenericError(xmlGenericErrorContext, 2768 "String decoding PE Reference: %.30s\n", str); 2769 ent = xmlParseStringPEReference(ctxt, &str); 2770 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2771 goto int_error; 2772 if (ent != NULL) 2773 ctxt->nbentities += ent->checked; 2774 if (ent != NULL) { 2775 if (ent->content == NULL) { 2776 xmlLoadEntityContent(ctxt, ent); 2777 } 2778 ctxt->depth++; 2779 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2780 0, 0, 0); 2781 ctxt->depth--; 2782 if (rep != NULL) { 2783 current = rep; 2784 while (*current != 0) { /* non input consuming loop */ 2785 buffer[nbchars++] = *current++; 2786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2787 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2788 goto int_error; 2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2790 } 2791 } 2792 xmlFree(rep); 2793 rep = NULL; 2794 } 2795 } 2796 } else { 2797 COPY_BUF(l,buffer,nbchars,c); 2798 str += l; 2799 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2800 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2801 } 2802 } 2803 if (str < last) 2804 c = CUR_SCHAR(str, l); 2805 else 2806 c = 0; 2807 } 2808 buffer[nbchars] = 0; 2809 return(buffer); 2810 2811mem_error: 2812 xmlErrMemory(ctxt, NULL); 2813int_error: 2814 if (rep != NULL) 2815 xmlFree(rep); 2816 if (buffer != NULL) 2817 xmlFree(buffer); 2818 return(NULL); 2819} 2820 2821/** 2822 * xmlStringDecodeEntities: 2823 * @ctxt: the parser context 2824 * @str: the input string 2825 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2826 * @end: an end marker xmlChar, 0 if none 2827 * @end2: an end marker xmlChar, 0 if none 2828 * @end3: an end marker xmlChar, 0 if none 2829 * 2830 * Takes a entity string content and process to do the adequate substitutions. 2831 * 2832 * [67] Reference ::= EntityRef | CharRef 2833 * 2834 * [69] PEReference ::= '%' Name ';' 2835 * 2836 * Returns A newly allocated string with the substitution done. The caller 2837 * must deallocate it ! 2838 */ 2839xmlChar * 2840xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2841 xmlChar end, xmlChar end2, xmlChar end3) { 2842 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2843 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2844 end, end2, end3)); 2845} 2846 2847/************************************************************************ 2848 * * 2849 * Commodity functions, cleanup needed ? * 2850 * * 2851 ************************************************************************/ 2852 2853/** 2854 * areBlanks: 2855 * @ctxt: an XML parser context 2856 * @str: a xmlChar * 2857 * @len: the size of @str 2858 * @blank_chars: we know the chars are blanks 2859 * 2860 * Is this a sequence of blank chars that one can ignore ? 2861 * 2862 * Returns 1 if ignorable 0 otherwise. 2863 */ 2864 2865static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2866 int blank_chars) { 2867 int i, ret; 2868 xmlNodePtr lastChild; 2869 2870 /* 2871 * Don't spend time trying to differentiate them, the same callback is 2872 * used ! 2873 */ 2874 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2875 return(0); 2876 2877 /* 2878 * Check for xml:space value. 2879 */ 2880 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2881 (*(ctxt->space) == -2)) 2882 return(0); 2883 2884 /* 2885 * Check that the string is made of blanks 2886 */ 2887 if (blank_chars == 0) { 2888 for (i = 0;i < len;i++) 2889 if (!(IS_BLANK_CH(str[i]))) return(0); 2890 } 2891 2892 /* 2893 * Look if the element is mixed content in the DTD if available 2894 */ 2895 if (ctxt->node == NULL) return(0); 2896 if (ctxt->myDoc != NULL) { 2897 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2898 if (ret == 0) return(1); 2899 if (ret == 1) return(0); 2900 } 2901 2902 /* 2903 * Otherwise, heuristic :-\ 2904 */ 2905 if ((RAW != '<') && (RAW != 0xD)) return(0); 2906 if ((ctxt->node->children == NULL) && 2907 (RAW == '<') && (NXT(1) == '/')) return(0); 2908 2909 lastChild = xmlGetLastChild(ctxt->node); 2910 if (lastChild == NULL) { 2911 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2912 (ctxt->node->content != NULL)) return(0); 2913 } else if (xmlNodeIsText(lastChild)) 2914 return(0); 2915 else if ((ctxt->node->children != NULL) && 2916 (xmlNodeIsText(ctxt->node->children))) 2917 return(0); 2918 return(1); 2919} 2920 2921/************************************************************************ 2922 * * 2923 * Extra stuff for namespace support * 2924 * Relates to http://www.w3.org/TR/WD-xml-names * 2925 * * 2926 ************************************************************************/ 2927 2928/** 2929 * xmlSplitQName: 2930 * @ctxt: an XML parser context 2931 * @name: an XML parser context 2932 * @prefix: a xmlChar ** 2933 * 2934 * parse an UTF8 encoded XML qualified name string 2935 * 2936 * [NS 5] QName ::= (Prefix ':')? LocalPart 2937 * 2938 * [NS 6] Prefix ::= NCName 2939 * 2940 * [NS 7] LocalPart ::= NCName 2941 * 2942 * Returns the local part, and prefix is updated 2943 * to get the Prefix if any. 2944 */ 2945 2946xmlChar * 2947xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2948 xmlChar buf[XML_MAX_NAMELEN + 5]; 2949 xmlChar *buffer = NULL; 2950 int len = 0; 2951 int max = XML_MAX_NAMELEN; 2952 xmlChar *ret = NULL; 2953 const xmlChar *cur = name; 2954 int c; 2955 2956 if (prefix == NULL) return(NULL); 2957 *prefix = NULL; 2958 2959 if (cur == NULL) return(NULL); 2960 2961#ifndef XML_XML_NAMESPACE 2962 /* xml: prefix is not really a namespace */ 2963 if ((cur[0] == 'x') && (cur[1] == 'm') && 2964 (cur[2] == 'l') && (cur[3] == ':')) 2965 return(xmlStrdup(name)); 2966#endif 2967 2968 /* nasty but well=formed */ 2969 if (cur[0] == ':') 2970 return(xmlStrdup(name)); 2971 2972 c = *cur++; 2973 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2974 buf[len++] = c; 2975 c = *cur++; 2976 } 2977 if (len >= max) { 2978 /* 2979 * Okay someone managed to make a huge name, so he's ready to pay 2980 * for the processing speed. 2981 */ 2982 max = len * 2; 2983 2984 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2985 if (buffer == NULL) { 2986 xmlErrMemory(ctxt, NULL); 2987 return(NULL); 2988 } 2989 memcpy(buffer, buf, len); 2990 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2991 if (len + 10 > max) { 2992 xmlChar *tmp; 2993 2994 max *= 2; 2995 tmp = (xmlChar *) xmlRealloc(buffer, 2996 max * sizeof(xmlChar)); 2997 if (tmp == NULL) { 2998 xmlFree(buffer); 2999 xmlErrMemory(ctxt, NULL); 3000 return(NULL); 3001 } 3002 buffer = tmp; 3003 } 3004 buffer[len++] = c; 3005 c = *cur++; 3006 } 3007 buffer[len] = 0; 3008 } 3009 3010 if ((c == ':') && (*cur == 0)) { 3011 if (buffer != NULL) 3012 xmlFree(buffer); 3013 *prefix = NULL; 3014 return(xmlStrdup(name)); 3015 } 3016 3017 if (buffer == NULL) 3018 ret = xmlStrndup(buf, len); 3019 else { 3020 ret = buffer; 3021 buffer = NULL; 3022 max = XML_MAX_NAMELEN; 3023 } 3024 3025 3026 if (c == ':') { 3027 c = *cur; 3028 *prefix = ret; 3029 if (c == 0) { 3030 return(xmlStrndup(BAD_CAST "", 0)); 3031 } 3032 len = 0; 3033 3034 /* 3035 * Check that the first character is proper to start 3036 * a new name 3037 */ 3038 if (!(((c >= 0x61) && (c <= 0x7A)) || 3039 ((c >= 0x41) && (c <= 0x5A)) || 3040 (c == '_') || (c == ':'))) { 3041 int l; 3042 int first = CUR_SCHAR(cur, l); 3043 3044 if (!IS_LETTER(first) && (first != '_')) { 3045 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3046 "Name %s is not XML Namespace compliant\n", 3047 name); 3048 } 3049 } 3050 cur++; 3051 3052 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3053 buf[len++] = c; 3054 c = *cur++; 3055 } 3056 if (len >= max) { 3057 /* 3058 * Okay someone managed to make a huge name, so he's ready to pay 3059 * for the processing speed. 3060 */ 3061 max = len * 2; 3062 3063 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3064 if (buffer == NULL) { 3065 xmlErrMemory(ctxt, NULL); 3066 return(NULL); 3067 } 3068 memcpy(buffer, buf, len); 3069 while (c != 0) { /* tested bigname2.xml */ 3070 if (len + 10 > max) { 3071 xmlChar *tmp; 3072 3073 max *= 2; 3074 tmp = (xmlChar *) xmlRealloc(buffer, 3075 max * sizeof(xmlChar)); 3076 if (tmp == NULL) { 3077 xmlErrMemory(ctxt, NULL); 3078 xmlFree(buffer); 3079 return(NULL); 3080 } 3081 buffer = tmp; 3082 } 3083 buffer[len++] = c; 3084 c = *cur++; 3085 } 3086 buffer[len] = 0; 3087 } 3088 3089 if (buffer == NULL) 3090 ret = xmlStrndup(buf, len); 3091 else { 3092 ret = buffer; 3093 } 3094 } 3095 3096 return(ret); 3097} 3098 3099/************************************************************************ 3100 * * 3101 * The parser itself * 3102 * Relates to http://www.w3.org/TR/REC-xml * 3103 * * 3104 ************************************************************************/ 3105 3106/************************************************************************ 3107 * * 3108 * Routines to parse Name, NCName and NmToken * 3109 * * 3110 ************************************************************************/ 3111#ifdef DEBUG 3112static unsigned long nbParseName = 0; 3113static unsigned long nbParseNmToken = 0; 3114static unsigned long nbParseNCName = 0; 3115static unsigned long nbParseNCNameComplex = 0; 3116static unsigned long nbParseNameComplex = 0; 3117static unsigned long nbParseStringName = 0; 3118#endif 3119 3120/* 3121 * The two following functions are related to the change of accepted 3122 * characters for Name and NmToken in the Revision 5 of XML-1.0 3123 * They correspond to the modified production [4] and the new production [4a] 3124 * changes in that revision. Also note that the macros used for the 3125 * productions Letter, Digit, CombiningChar and Extender are not needed 3126 * anymore. 3127 * We still keep compatibility to pre-revision5 parsing semantic if the 3128 * new XML_PARSE_OLD10 option is given to the parser. 3129 */ 3130static int 3131xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3132 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3133 /* 3134 * Use the new checks of production [4] [4a] amd [5] of the 3135 * Update 5 of XML-1.0 3136 */ 3137 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3138 (((c >= 'a') && (c <= 'z')) || 3139 ((c >= 'A') && (c <= 'Z')) || 3140 (c == '_') || (c == ':') || 3141 ((c >= 0xC0) && (c <= 0xD6)) || 3142 ((c >= 0xD8) && (c <= 0xF6)) || 3143 ((c >= 0xF8) && (c <= 0x2FF)) || 3144 ((c >= 0x370) && (c <= 0x37D)) || 3145 ((c >= 0x37F) && (c <= 0x1FFF)) || 3146 ((c >= 0x200C) && (c <= 0x200D)) || 3147 ((c >= 0x2070) && (c <= 0x218F)) || 3148 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3149 ((c >= 0x3001) && (c <= 0xD7FF)) || 3150 ((c >= 0xF900) && (c <= 0xFDCF)) || 3151 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3152 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3153 return(1); 3154 } else { 3155 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3156 return(1); 3157 } 3158 return(0); 3159} 3160 3161static int 3162xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3163 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3164 /* 3165 * Use the new checks of production [4] [4a] amd [5] of the 3166 * Update 5 of XML-1.0 3167 */ 3168 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3169 (((c >= 'a') && (c <= 'z')) || 3170 ((c >= 'A') && (c <= 'Z')) || 3171 ((c >= '0') && (c <= '9')) || /* !start */ 3172 (c == '_') || (c == ':') || 3173 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3174 ((c >= 0xC0) && (c <= 0xD6)) || 3175 ((c >= 0xD8) && (c <= 0xF6)) || 3176 ((c >= 0xF8) && (c <= 0x2FF)) || 3177 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3178 ((c >= 0x370) && (c <= 0x37D)) || 3179 ((c >= 0x37F) && (c <= 0x1FFF)) || 3180 ((c >= 0x200C) && (c <= 0x200D)) || 3181 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3182 ((c >= 0x2070) && (c <= 0x218F)) || 3183 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3184 ((c >= 0x3001) && (c <= 0xD7FF)) || 3185 ((c >= 0xF900) && (c <= 0xFDCF)) || 3186 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3187 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3188 return(1); 3189 } else { 3190 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3191 (c == '.') || (c == '-') || 3192 (c == '_') || (c == ':') || 3193 (IS_COMBINING(c)) || 3194 (IS_EXTENDER(c))) 3195 return(1); 3196 } 3197 return(0); 3198} 3199 3200static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3201 int *len, int *alloc, int normalize); 3202 3203static const xmlChar * 3204xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3205 int len = 0, l; 3206 int c; 3207 int count = 0; 3208 3209#ifdef DEBUG 3210 nbParseNameComplex++; 3211#endif 3212 3213 /* 3214 * Handler for more complex cases 3215 */ 3216 GROW; 3217 if (ctxt->instate == XML_PARSER_EOF) 3218 return(NULL); 3219 c = CUR_CHAR(l); 3220 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3221 /* 3222 * Use the new checks of production [4] [4a] amd [5] of the 3223 * Update 5 of XML-1.0 3224 */ 3225 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3226 (!(((c >= 'a') && (c <= 'z')) || 3227 ((c >= 'A') && (c <= 'Z')) || 3228 (c == '_') || (c == ':') || 3229 ((c >= 0xC0) && (c <= 0xD6)) || 3230 ((c >= 0xD8) && (c <= 0xF6)) || 3231 ((c >= 0xF8) && (c <= 0x2FF)) || 3232 ((c >= 0x370) && (c <= 0x37D)) || 3233 ((c >= 0x37F) && (c <= 0x1FFF)) || 3234 ((c >= 0x200C) && (c <= 0x200D)) || 3235 ((c >= 0x2070) && (c <= 0x218F)) || 3236 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3237 ((c >= 0x3001) && (c <= 0xD7FF)) || 3238 ((c >= 0xF900) && (c <= 0xFDCF)) || 3239 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3240 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3241 return(NULL); 3242 } 3243 len += l; 3244 NEXTL(l); 3245 c = CUR_CHAR(l); 3246 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3247 (((c >= 'a') && (c <= 'z')) || 3248 ((c >= 'A') && (c <= 'Z')) || 3249 ((c >= '0') && (c <= '9')) || /* !start */ 3250 (c == '_') || (c == ':') || 3251 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3252 ((c >= 0xC0) && (c <= 0xD6)) || 3253 ((c >= 0xD8) && (c <= 0xF6)) || 3254 ((c >= 0xF8) && (c <= 0x2FF)) || 3255 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3256 ((c >= 0x370) && (c <= 0x37D)) || 3257 ((c >= 0x37F) && (c <= 0x1FFF)) || 3258 ((c >= 0x200C) && (c <= 0x200D)) || 3259 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3260 ((c >= 0x2070) && (c <= 0x218F)) || 3261 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3262 ((c >= 0x3001) && (c <= 0xD7FF)) || 3263 ((c >= 0xF900) && (c <= 0xFDCF)) || 3264 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3265 ((c >= 0x10000) && (c <= 0xEFFFF)) 3266 )) { 3267 if (count++ > XML_PARSER_CHUNK_SIZE) { 3268 count = 0; 3269 GROW; 3270 if (ctxt->instate == XML_PARSER_EOF) 3271 return(NULL); 3272 } 3273 len += l; 3274 NEXTL(l); 3275 c = CUR_CHAR(l); 3276 } 3277 } else { 3278 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3279 (!IS_LETTER(c) && (c != '_') && 3280 (c != ':'))) { 3281 return(NULL); 3282 } 3283 len += l; 3284 NEXTL(l); 3285 c = CUR_CHAR(l); 3286 3287 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3288 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3289 (c == '.') || (c == '-') || 3290 (c == '_') || (c == ':') || 3291 (IS_COMBINING(c)) || 3292 (IS_EXTENDER(c)))) { 3293 if (count++ > XML_PARSER_CHUNK_SIZE) { 3294 count = 0; 3295 GROW; 3296 if (ctxt->instate == XML_PARSER_EOF) 3297 return(NULL); 3298 } 3299 len += l; 3300 NEXTL(l); 3301 c = CUR_CHAR(l); 3302 if (c == 0) { 3303 count = 0; 3304 GROW; 3305 if (ctxt->instate == XML_PARSER_EOF) 3306 return(NULL); 3307 c = CUR_CHAR(l); 3308 } 3309 } 3310 } 3311 if ((len > XML_MAX_NAME_LENGTH) && 3312 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3313 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3314 return(NULL); 3315 } 3316 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3317 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3318 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3319} 3320 3321/** 3322 * xmlParseName: 3323 * @ctxt: an XML parser context 3324 * 3325 * parse an XML name. 3326 * 3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3328 * CombiningChar | Extender 3329 * 3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3331 * 3332 * [6] Names ::= Name (#x20 Name)* 3333 * 3334 * Returns the Name parsed or NULL 3335 */ 3336 3337const xmlChar * 3338xmlParseName(xmlParserCtxtPtr ctxt) { 3339 const xmlChar *in; 3340 const xmlChar *ret; 3341 int count = 0; 3342 3343 GROW; 3344 3345#ifdef DEBUG 3346 nbParseName++; 3347#endif 3348 3349 /* 3350 * Accelerator for simple ASCII names 3351 */ 3352 in = ctxt->input->cur; 3353 if (((*in >= 0x61) && (*in <= 0x7A)) || 3354 ((*in >= 0x41) && (*in <= 0x5A)) || 3355 (*in == '_') || (*in == ':')) { 3356 in++; 3357 while (((*in >= 0x61) && (*in <= 0x7A)) || 3358 ((*in >= 0x41) && (*in <= 0x5A)) || 3359 ((*in >= 0x30) && (*in <= 0x39)) || 3360 (*in == '_') || (*in == '-') || 3361 (*in == ':') || (*in == '.')) 3362 in++; 3363 if ((*in > 0) && (*in < 0x80)) { 3364 count = in - ctxt->input->cur; 3365 if ((count > XML_MAX_NAME_LENGTH) && 3366 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3367 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3368 return(NULL); 3369 } 3370 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3371 ctxt->input->cur = in; 3372 ctxt->nbChars += count; 3373 ctxt->input->col += count; 3374 if (ret == NULL) 3375 xmlErrMemory(ctxt, NULL); 3376 return(ret); 3377 } 3378 } 3379 /* accelerator for special cases */ 3380 return(xmlParseNameComplex(ctxt)); 3381} 3382 3383static const xmlChar * 3384xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3385 int len = 0, l; 3386 int c; 3387 int count = 0; 3388 3389#ifdef DEBUG 3390 nbParseNCNameComplex++; 3391#endif 3392 3393 /* 3394 * Handler for more complex cases 3395 */ 3396 GROW; 3397 c = CUR_CHAR(l); 3398 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3399 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3400 return(NULL); 3401 } 3402 3403 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3404 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3405 if (count++ > XML_PARSER_CHUNK_SIZE) { 3406 if ((len > XML_MAX_NAME_LENGTH) && 3407 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3408 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3409 return(NULL); 3410 } 3411 count = 0; 3412 GROW; 3413 if (ctxt->instate == XML_PARSER_EOF) 3414 return(NULL); 3415 } 3416 len += l; 3417 NEXTL(l); 3418 c = CUR_CHAR(l); 3419 if (c == 0) { 3420 count = 0; 3421 GROW; 3422 if (ctxt->instate == XML_PARSER_EOF) 3423 return(NULL); 3424 c = CUR_CHAR(l); 3425 } 3426 } 3427 if ((len > XML_MAX_NAME_LENGTH) && 3428 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3429 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3430 return(NULL); 3431 } 3432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3433} 3434 3435/** 3436 * xmlParseNCName: 3437 * @ctxt: an XML parser context 3438 * @len: lenght of the string parsed 3439 * 3440 * parse an XML name. 3441 * 3442 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3443 * CombiningChar | Extender 3444 * 3445 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3446 * 3447 * Returns the Name parsed or NULL 3448 */ 3449 3450static const xmlChar * 3451xmlParseNCName(xmlParserCtxtPtr ctxt) { 3452 const xmlChar *in; 3453 const xmlChar *ret; 3454 int count = 0; 3455 3456#ifdef DEBUG 3457 nbParseNCName++; 3458#endif 3459 3460 /* 3461 * Accelerator for simple ASCII names 3462 */ 3463 in = ctxt->input->cur; 3464 if (((*in >= 0x61) && (*in <= 0x7A)) || 3465 ((*in >= 0x41) && (*in <= 0x5A)) || 3466 (*in == '_')) { 3467 in++; 3468 while (((*in >= 0x61) && (*in <= 0x7A)) || 3469 ((*in >= 0x41) && (*in <= 0x5A)) || 3470 ((*in >= 0x30) && (*in <= 0x39)) || 3471 (*in == '_') || (*in == '-') || 3472 (*in == '.')) 3473 in++; 3474 if ((*in > 0) && (*in < 0x80)) { 3475 count = in - ctxt->input->cur; 3476 if ((count > XML_MAX_NAME_LENGTH) && 3477 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3478 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3479 return(NULL); 3480 } 3481 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3482 ctxt->input->cur = in; 3483 ctxt->nbChars += count; 3484 ctxt->input->col += count; 3485 if (ret == NULL) { 3486 xmlErrMemory(ctxt, NULL); 3487 } 3488 return(ret); 3489 } 3490 } 3491 return(xmlParseNCNameComplex(ctxt)); 3492} 3493 3494/** 3495 * xmlParseNameAndCompare: 3496 * @ctxt: an XML parser context 3497 * 3498 * parse an XML name and compares for match 3499 * (specialized for endtag parsing) 3500 * 3501 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3502 * and the name for mismatch 3503 */ 3504 3505static const xmlChar * 3506xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3507 register const xmlChar *cmp = other; 3508 register const xmlChar *in; 3509 const xmlChar *ret; 3510 3511 GROW; 3512 if (ctxt->instate == XML_PARSER_EOF) 3513 return(NULL); 3514 3515 in = ctxt->input->cur; 3516 while (*in != 0 && *in == *cmp) { 3517 ++in; 3518 ++cmp; 3519 ctxt->input->col++; 3520 } 3521 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3522 /* success */ 3523 ctxt->input->cur = in; 3524 return (const xmlChar*) 1; 3525 } 3526 /* failure (or end of input buffer), check with full function */ 3527 ret = xmlParseName (ctxt); 3528 /* strings coming from the dictionnary direct compare possible */ 3529 if (ret == other) { 3530 return (const xmlChar*) 1; 3531 } 3532 return ret; 3533} 3534 3535/** 3536 * xmlParseStringName: 3537 * @ctxt: an XML parser context 3538 * @str: a pointer to the string pointer (IN/OUT) 3539 * 3540 * parse an XML name. 3541 * 3542 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3543 * CombiningChar | Extender 3544 * 3545 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3546 * 3547 * [6] Names ::= Name (#x20 Name)* 3548 * 3549 * Returns the Name parsed or NULL. The @str pointer 3550 * is updated to the current location in the string. 3551 */ 3552 3553static xmlChar * 3554xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3555 xmlChar buf[XML_MAX_NAMELEN + 5]; 3556 const xmlChar *cur = *str; 3557 int len = 0, l; 3558 int c; 3559 3560#ifdef DEBUG 3561 nbParseStringName++; 3562#endif 3563 3564 c = CUR_SCHAR(cur, l); 3565 if (!xmlIsNameStartChar(ctxt, c)) { 3566 return(NULL); 3567 } 3568 3569 COPY_BUF(l,buf,len,c); 3570 cur += l; 3571 c = CUR_SCHAR(cur, l); 3572 while (xmlIsNameChar(ctxt, c)) { 3573 COPY_BUF(l,buf,len,c); 3574 cur += l; 3575 c = CUR_SCHAR(cur, l); 3576 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3577 /* 3578 * Okay someone managed to make a huge name, so he's ready to pay 3579 * for the processing speed. 3580 */ 3581 xmlChar *buffer; 3582 int max = len * 2; 3583 3584 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3585 if (buffer == NULL) { 3586 xmlErrMemory(ctxt, NULL); 3587 return(NULL); 3588 } 3589 memcpy(buffer, buf, len); 3590 while (xmlIsNameChar(ctxt, c)) { 3591 if (len + 10 > max) { 3592 xmlChar *tmp; 3593 3594 if ((len > XML_MAX_NAME_LENGTH) && 3595 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3596 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3597 xmlFree(buffer); 3598 return(NULL); 3599 } 3600 max *= 2; 3601 tmp = (xmlChar *) xmlRealloc(buffer, 3602 max * sizeof(xmlChar)); 3603 if (tmp == NULL) { 3604 xmlErrMemory(ctxt, NULL); 3605 xmlFree(buffer); 3606 return(NULL); 3607 } 3608 buffer = tmp; 3609 } 3610 COPY_BUF(l,buffer,len,c); 3611 cur += l; 3612 c = CUR_SCHAR(cur, l); 3613 } 3614 buffer[len] = 0; 3615 *str = cur; 3616 return(buffer); 3617 } 3618 } 3619 if ((len > XML_MAX_NAME_LENGTH) && 3620 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3621 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3622 return(NULL); 3623 } 3624 *str = cur; 3625 return(xmlStrndup(buf, len)); 3626} 3627 3628/** 3629 * xmlParseNmtoken: 3630 * @ctxt: an XML parser context 3631 * 3632 * parse an XML Nmtoken. 3633 * 3634 * [7] Nmtoken ::= (NameChar)+ 3635 * 3636 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3637 * 3638 * Returns the Nmtoken parsed or NULL 3639 */ 3640 3641xmlChar * 3642xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3643 xmlChar buf[XML_MAX_NAMELEN + 5]; 3644 int len = 0, l; 3645 int c; 3646 int count = 0; 3647 3648#ifdef DEBUG 3649 nbParseNmToken++; 3650#endif 3651 3652 GROW; 3653 if (ctxt->instate == XML_PARSER_EOF) 3654 return(NULL); 3655 c = CUR_CHAR(l); 3656 3657 while (xmlIsNameChar(ctxt, c)) { 3658 if (count++ > XML_PARSER_CHUNK_SIZE) { 3659 count = 0; 3660 GROW; 3661 } 3662 COPY_BUF(l,buf,len,c); 3663 NEXTL(l); 3664 c = CUR_CHAR(l); 3665 if (c == 0) { 3666 count = 0; 3667 GROW; 3668 if (ctxt->instate == XML_PARSER_EOF) 3669 return(NULL); 3670 c = CUR_CHAR(l); 3671 } 3672 if (len >= XML_MAX_NAMELEN) { 3673 /* 3674 * Okay someone managed to make a huge token, so he's ready to pay 3675 * for the processing speed. 3676 */ 3677 xmlChar *buffer; 3678 int max = len * 2; 3679 3680 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3681 if (buffer == NULL) { 3682 xmlErrMemory(ctxt, NULL); 3683 return(NULL); 3684 } 3685 memcpy(buffer, buf, len); 3686 while (xmlIsNameChar(ctxt, c)) { 3687 if (count++ > XML_PARSER_CHUNK_SIZE) { 3688 count = 0; 3689 GROW; 3690 if (ctxt->instate == XML_PARSER_EOF) { 3691 xmlFree(buffer); 3692 return(NULL); 3693 } 3694 } 3695 if (len + 10 > max) { 3696 xmlChar *tmp; 3697 3698 if ((max > XML_MAX_NAME_LENGTH) && 3699 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3700 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3701 xmlFree(buffer); 3702 return(NULL); 3703 } 3704 max *= 2; 3705 tmp = (xmlChar *) xmlRealloc(buffer, 3706 max * sizeof(xmlChar)); 3707 if (tmp == NULL) { 3708 xmlErrMemory(ctxt, NULL); 3709 xmlFree(buffer); 3710 return(NULL); 3711 } 3712 buffer = tmp; 3713 } 3714 COPY_BUF(l,buffer,len,c); 3715 NEXTL(l); 3716 c = CUR_CHAR(l); 3717 } 3718 buffer[len] = 0; 3719 return(buffer); 3720 } 3721 } 3722 if (len == 0) 3723 return(NULL); 3724 if ((len > XML_MAX_NAME_LENGTH) && 3725 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3726 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3727 return(NULL); 3728 } 3729 return(xmlStrndup(buf, len)); 3730} 3731 3732/** 3733 * xmlParseEntityValue: 3734 * @ctxt: an XML parser context 3735 * @orig: if non-NULL store a copy of the original entity value 3736 * 3737 * parse a value for ENTITY declarations 3738 * 3739 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3740 * "'" ([^%&'] | PEReference | Reference)* "'" 3741 * 3742 * Returns the EntityValue parsed with reference substituted or NULL 3743 */ 3744 3745xmlChar * 3746xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3747 xmlChar *buf = NULL; 3748 int len = 0; 3749 int size = XML_PARSER_BUFFER_SIZE; 3750 int c, l; 3751 xmlChar stop; 3752 xmlChar *ret = NULL; 3753 const xmlChar *cur = NULL; 3754 xmlParserInputPtr input; 3755 3756 if (RAW == '"') stop = '"'; 3757 else if (RAW == '\'') stop = '\''; 3758 else { 3759 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3760 return(NULL); 3761 } 3762 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3763 if (buf == NULL) { 3764 xmlErrMemory(ctxt, NULL); 3765 return(NULL); 3766 } 3767 3768 /* 3769 * The content of the entity definition is copied in a buffer. 3770 */ 3771 3772 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3773 input = ctxt->input; 3774 GROW; 3775 if (ctxt->instate == XML_PARSER_EOF) { 3776 xmlFree(buf); 3777 return(NULL); 3778 } 3779 NEXT; 3780 c = CUR_CHAR(l); 3781 /* 3782 * NOTE: 4.4.5 Included in Literal 3783 * When a parameter entity reference appears in a literal entity 3784 * value, ... a single or double quote character in the replacement 3785 * text is always treated as a normal data character and will not 3786 * terminate the literal. 3787 * In practice it means we stop the loop only when back at parsing 3788 * the initial entity and the quote is found 3789 */ 3790 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3791 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3792 if (len + 5 >= size) { 3793 xmlChar *tmp; 3794 3795 size *= 2; 3796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3797 if (tmp == NULL) { 3798 xmlErrMemory(ctxt, NULL); 3799 xmlFree(buf); 3800 return(NULL); 3801 } 3802 buf = tmp; 3803 } 3804 COPY_BUF(l,buf,len,c); 3805 NEXTL(l); 3806 /* 3807 * Pop-up of finished entities. 3808 */ 3809 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3810 xmlPopInput(ctxt); 3811 3812 GROW; 3813 c = CUR_CHAR(l); 3814 if (c == 0) { 3815 GROW; 3816 c = CUR_CHAR(l); 3817 } 3818 } 3819 buf[len] = 0; 3820 if (ctxt->instate == XML_PARSER_EOF) { 3821 xmlFree(buf); 3822 return(NULL); 3823 } 3824 3825 /* 3826 * Raise problem w.r.t. '&' and '%' being used in non-entities 3827 * reference constructs. Note Charref will be handled in 3828 * xmlStringDecodeEntities() 3829 */ 3830 cur = buf; 3831 while (*cur != 0) { /* non input consuming */ 3832 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3833 xmlChar *name; 3834 xmlChar tmp = *cur; 3835 3836 cur++; 3837 name = xmlParseStringName(ctxt, &cur); 3838 if ((name == NULL) || (*cur != ';')) { 3839 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3840 "EntityValue: '%c' forbidden except for entities references\n", 3841 tmp); 3842 } 3843 if ((tmp == '%') && (ctxt->inSubset == 1) && 3844 (ctxt->inputNr == 1)) { 3845 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3846 } 3847 if (name != NULL) 3848 xmlFree(name); 3849 if (*cur == 0) 3850 break; 3851 } 3852 cur++; 3853 } 3854 3855 /* 3856 * Then PEReference entities are substituted. 3857 */ 3858 if (c != stop) { 3859 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3860 xmlFree(buf); 3861 } else { 3862 NEXT; 3863 /* 3864 * NOTE: 4.4.7 Bypassed 3865 * When a general entity reference appears in the EntityValue in 3866 * an entity declaration, it is bypassed and left as is. 3867 * so XML_SUBSTITUTE_REF is not set here. 3868 */ 3869 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3870 0, 0, 0); 3871 if (orig != NULL) 3872 *orig = buf; 3873 else 3874 xmlFree(buf); 3875 } 3876 3877 return(ret); 3878} 3879 3880/** 3881 * xmlParseAttValueComplex: 3882 * @ctxt: an XML parser context 3883 * @len: the resulting attribute len 3884 * @normalize: wether to apply the inner normalization 3885 * 3886 * parse a value for an attribute, this is the fallback function 3887 * of xmlParseAttValue() when the attribute parsing requires handling 3888 * of non-ASCII characters, or normalization compaction. 3889 * 3890 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3891 */ 3892static xmlChar * 3893xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3894 xmlChar limit = 0; 3895 xmlChar *buf = NULL; 3896 xmlChar *rep = NULL; 3897 size_t len = 0; 3898 size_t buf_size = 0; 3899 int c, l, in_space = 0; 3900 xmlChar *current = NULL; 3901 xmlEntityPtr ent; 3902 3903 if (NXT(0) == '"') { 3904 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3905 limit = '"'; 3906 NEXT; 3907 } else if (NXT(0) == '\'') { 3908 limit = '\''; 3909 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3910 NEXT; 3911 } else { 3912 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3913 return(NULL); 3914 } 3915 3916 /* 3917 * allocate a translation buffer. 3918 */ 3919 buf_size = XML_PARSER_BUFFER_SIZE; 3920 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3921 if (buf == NULL) goto mem_error; 3922 3923 /* 3924 * OK loop until we reach one of the ending char or a size limit. 3925 */ 3926 c = CUR_CHAR(l); 3927 while (((NXT(0) != limit) && /* checked */ 3928 (IS_CHAR(c)) && (c != '<')) && 3929 (ctxt->instate != XML_PARSER_EOF)) { 3930 /* 3931 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3932 * special option is given 3933 */ 3934 if ((len > XML_MAX_TEXT_LENGTH) && 3935 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3936 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3937 "AttValue lenght too long\n"); 3938 goto mem_error; 3939 } 3940 if (c == 0) break; 3941 if (c == '&') { 3942 in_space = 0; 3943 if (NXT(1) == '#') { 3944 int val = xmlParseCharRef(ctxt); 3945 3946 if (val == '&') { 3947 if (ctxt->replaceEntities) { 3948 if (len + 10 > buf_size) { 3949 growBuffer(buf, 10); 3950 } 3951 buf[len++] = '&'; 3952 } else { 3953 /* 3954 * The reparsing will be done in xmlStringGetNodeList() 3955 * called by the attribute() function in SAX.c 3956 */ 3957 if (len + 10 > buf_size) { 3958 growBuffer(buf, 10); 3959 } 3960 buf[len++] = '&'; 3961 buf[len++] = '#'; 3962 buf[len++] = '3'; 3963 buf[len++] = '8'; 3964 buf[len++] = ';'; 3965 } 3966 } else if (val != 0) { 3967 if (len + 10 > buf_size) { 3968 growBuffer(buf, 10); 3969 } 3970 len += xmlCopyChar(0, &buf[len], val); 3971 } 3972 } else { 3973 ent = xmlParseEntityRef(ctxt); 3974 ctxt->nbentities++; 3975 if (ent != NULL) 3976 ctxt->nbentities += ent->owner; 3977 if ((ent != NULL) && 3978 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3979 if (len + 10 > buf_size) { 3980 growBuffer(buf, 10); 3981 } 3982 if ((ctxt->replaceEntities == 0) && 3983 (ent->content[0] == '&')) { 3984 buf[len++] = '&'; 3985 buf[len++] = '#'; 3986 buf[len++] = '3'; 3987 buf[len++] = '8'; 3988 buf[len++] = ';'; 3989 } else { 3990 buf[len++] = ent->content[0]; 3991 } 3992 } else if ((ent != NULL) && 3993 (ctxt->replaceEntities != 0)) { 3994 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3995 rep = xmlStringDecodeEntities(ctxt, ent->content, 3996 XML_SUBSTITUTE_REF, 3997 0, 0, 0); 3998 if (rep != NULL) { 3999 current = rep; 4000 while (*current != 0) { /* non input consuming */ 4001 if ((*current == 0xD) || (*current == 0xA) || 4002 (*current == 0x9)) { 4003 buf[len++] = 0x20; 4004 current++; 4005 } else 4006 buf[len++] = *current++; 4007 if (len + 10 > buf_size) { 4008 growBuffer(buf, 10); 4009 } 4010 } 4011 xmlFree(rep); 4012 rep = NULL; 4013 } 4014 } else { 4015 if (len + 10 > buf_size) { 4016 growBuffer(buf, 10); 4017 } 4018 if (ent->content != NULL) 4019 buf[len++] = ent->content[0]; 4020 } 4021 } else if (ent != NULL) { 4022 int i = xmlStrlen(ent->name); 4023 const xmlChar *cur = ent->name; 4024 4025 /* 4026 * This may look absurd but is needed to detect 4027 * entities problems 4028 */ 4029 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4030 (ent->content != NULL)) { 4031 rep = xmlStringDecodeEntities(ctxt, ent->content, 4032 XML_SUBSTITUTE_REF, 0, 0, 0); 4033 if (rep != NULL) { 4034 xmlFree(rep); 4035 rep = NULL; 4036 } 4037 } 4038 4039 /* 4040 * Just output the reference 4041 */ 4042 buf[len++] = '&'; 4043 while (len + i + 10 > buf_size) { 4044 growBuffer(buf, i + 10); 4045 } 4046 for (;i > 0;i--) 4047 buf[len++] = *cur++; 4048 buf[len++] = ';'; 4049 } 4050 } 4051 } else { 4052 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4053 if ((len != 0) || (!normalize)) { 4054 if ((!normalize) || (!in_space)) { 4055 COPY_BUF(l,buf,len,0x20); 4056 while (len + 10 > buf_size) { 4057 growBuffer(buf, 10); 4058 } 4059 } 4060 in_space = 1; 4061 } 4062 } else { 4063 in_space = 0; 4064 COPY_BUF(l,buf,len,c); 4065 if (len + 10 > buf_size) { 4066 growBuffer(buf, 10); 4067 } 4068 } 4069 NEXTL(l); 4070 } 4071 GROW; 4072 c = CUR_CHAR(l); 4073 } 4074 if (ctxt->instate == XML_PARSER_EOF) 4075 goto error; 4076 4077 if ((in_space) && (normalize)) { 4078 while (buf[len - 1] == 0x20) len--; 4079 } 4080 buf[len] = 0; 4081 if (RAW == '<') { 4082 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4083 } else if (RAW != limit) { 4084 if ((c != 0) && (!IS_CHAR(c))) { 4085 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4086 "invalid character in attribute value\n"); 4087 } else { 4088 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4089 "AttValue: ' expected\n"); 4090 } 4091 } else 4092 NEXT; 4093 4094 /* 4095 * There we potentially risk an overflow, don't allow attribute value of 4096 * lenght more than INT_MAX it is a very reasonnable assumption ! 4097 */ 4098 if (len >= INT_MAX) { 4099 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4100 "AttValue lenght too long\n"); 4101 goto mem_error; 4102 } 4103 4104 if (attlen != NULL) *attlen = (int) len; 4105 return(buf); 4106 4107mem_error: 4108 xmlErrMemory(ctxt, NULL); 4109error: 4110 if (buf != NULL) 4111 xmlFree(buf); 4112 if (rep != NULL) 4113 xmlFree(rep); 4114 return(NULL); 4115} 4116 4117/** 4118 * xmlParseAttValue: 4119 * @ctxt: an XML parser context 4120 * 4121 * parse a value for an attribute 4122 * Note: the parser won't do substitution of entities here, this 4123 * will be handled later in xmlStringGetNodeList 4124 * 4125 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4126 * "'" ([^<&'] | Reference)* "'" 4127 * 4128 * 3.3.3 Attribute-Value Normalization: 4129 * Before the value of an attribute is passed to the application or 4130 * checked for validity, the XML processor must normalize it as follows: 4131 * - a character reference is processed by appending the referenced 4132 * character to the attribute value 4133 * - an entity reference is processed by recursively processing the 4134 * replacement text of the entity 4135 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4136 * appending #x20 to the normalized value, except that only a single 4137 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4138 * parsed entity or the literal entity value of an internal parsed entity 4139 * - other characters are processed by appending them to the normalized value 4140 * If the declared value is not CDATA, then the XML processor must further 4141 * process the normalized attribute value by discarding any leading and 4142 * trailing space (#x20) characters, and by replacing sequences of space 4143 * (#x20) characters by a single space (#x20) character. 4144 * All attributes for which no declaration has been read should be treated 4145 * by a non-validating parser as if declared CDATA. 4146 * 4147 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4148 */ 4149 4150 4151xmlChar * 4152xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4153 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4154 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4155} 4156 4157/** 4158 * xmlParseSystemLiteral: 4159 * @ctxt: an XML parser context 4160 * 4161 * parse an XML Literal 4162 * 4163 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4164 * 4165 * Returns the SystemLiteral parsed or NULL 4166 */ 4167 4168xmlChar * 4169xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4170 xmlChar *buf = NULL; 4171 int len = 0; 4172 int size = XML_PARSER_BUFFER_SIZE; 4173 int cur, l; 4174 xmlChar stop; 4175 int state = ctxt->instate; 4176 int count = 0; 4177 4178 SHRINK; 4179 if (RAW == '"') { 4180 NEXT; 4181 stop = '"'; 4182 } else if (RAW == '\'') { 4183 NEXT; 4184 stop = '\''; 4185 } else { 4186 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4187 return(NULL); 4188 } 4189 4190 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4191 if (buf == NULL) { 4192 xmlErrMemory(ctxt, NULL); 4193 return(NULL); 4194 } 4195 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4196 cur = CUR_CHAR(l); 4197 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4198 if (len + 5 >= size) { 4199 xmlChar *tmp; 4200 4201 if ((size > XML_MAX_NAME_LENGTH) && 4202 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4203 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4204 xmlFree(buf); 4205 ctxt->instate = (xmlParserInputState) state; 4206 return(NULL); 4207 } 4208 size *= 2; 4209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4210 if (tmp == NULL) { 4211 xmlFree(buf); 4212 xmlErrMemory(ctxt, NULL); 4213 ctxt->instate = (xmlParserInputState) state; 4214 return(NULL); 4215 } 4216 buf = tmp; 4217 } 4218 count++; 4219 if (count > 50) { 4220 GROW; 4221 count = 0; 4222 if (ctxt->instate == XML_PARSER_EOF) { 4223 xmlFree(buf); 4224 return(NULL); 4225 } 4226 } 4227 COPY_BUF(l,buf,len,cur); 4228 NEXTL(l); 4229 cur = CUR_CHAR(l); 4230 if (cur == 0) { 4231 GROW; 4232 SHRINK; 4233 cur = CUR_CHAR(l); 4234 } 4235 } 4236 buf[len] = 0; 4237 ctxt->instate = (xmlParserInputState) state; 4238 if (!IS_CHAR(cur)) { 4239 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4240 } else { 4241 NEXT; 4242 } 4243 return(buf); 4244} 4245 4246/** 4247 * xmlParsePubidLiteral: 4248 * @ctxt: an XML parser context 4249 * 4250 * parse an XML public literal 4251 * 4252 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4253 * 4254 * Returns the PubidLiteral parsed or NULL. 4255 */ 4256 4257xmlChar * 4258xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4259 xmlChar *buf = NULL; 4260 int len = 0; 4261 int size = XML_PARSER_BUFFER_SIZE; 4262 xmlChar cur; 4263 xmlChar stop; 4264 int count = 0; 4265 xmlParserInputState oldstate = ctxt->instate; 4266 4267 SHRINK; 4268 if (RAW == '"') { 4269 NEXT; 4270 stop = '"'; 4271 } else if (RAW == '\'') { 4272 NEXT; 4273 stop = '\''; 4274 } else { 4275 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4276 return(NULL); 4277 } 4278 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4279 if (buf == NULL) { 4280 xmlErrMemory(ctxt, NULL); 4281 return(NULL); 4282 } 4283 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4284 cur = CUR; 4285 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4286 if (len + 1 >= size) { 4287 xmlChar *tmp; 4288 4289 if ((size > XML_MAX_NAME_LENGTH) && 4290 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4291 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4292 xmlFree(buf); 4293 return(NULL); 4294 } 4295 size *= 2; 4296 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4297 if (tmp == NULL) { 4298 xmlErrMemory(ctxt, NULL); 4299 xmlFree(buf); 4300 return(NULL); 4301 } 4302 buf = tmp; 4303 } 4304 buf[len++] = cur; 4305 count++; 4306 if (count > 50) { 4307 GROW; 4308 count = 0; 4309 if (ctxt->instate == XML_PARSER_EOF) { 4310 xmlFree(buf); 4311 return(NULL); 4312 } 4313 } 4314 NEXT; 4315 cur = CUR; 4316 if (cur == 0) { 4317 GROW; 4318 SHRINK; 4319 cur = CUR; 4320 } 4321 } 4322 buf[len] = 0; 4323 if (cur != stop) { 4324 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4325 } else { 4326 NEXT; 4327 } 4328 ctxt->instate = oldstate; 4329 return(buf); 4330} 4331 4332static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4333 4334/* 4335 * used for the test in the inner loop of the char data testing 4336 */ 4337static const unsigned char test_char_data[256] = { 4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4339 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4342 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4343 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4344 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4345 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4346 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4347 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4348 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4349 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4350 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4351 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4352 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4353 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4354 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4370}; 4371 4372/** 4373 * xmlParseCharData: 4374 * @ctxt: an XML parser context 4375 * @cdata: int indicating whether we are within a CDATA section 4376 * 4377 * parse a CharData section. 4378 * if we are within a CDATA section ']]>' marks an end of section. 4379 * 4380 * The right angle bracket (>) may be represented using the string ">", 4381 * and must, for compatibility, be escaped using ">" or a character 4382 * reference when it appears in the string "]]>" in content, when that 4383 * string is not marking the end of a CDATA section. 4384 * 4385 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4386 */ 4387 4388void 4389xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4390 const xmlChar *in; 4391 int nbchar = 0; 4392 int line = ctxt->input->line; 4393 int col = ctxt->input->col; 4394 int ccol; 4395 4396 SHRINK; 4397 GROW; 4398 /* 4399 * Accelerated common case where input don't need to be 4400 * modified before passing it to the handler. 4401 */ 4402 if (!cdata) { 4403 in = ctxt->input->cur; 4404 do { 4405get_more_space: 4406 while (*in == 0x20) { in++; ctxt->input->col++; } 4407 if (*in == 0xA) { 4408 do { 4409 ctxt->input->line++; ctxt->input->col = 1; 4410 in++; 4411 } while (*in == 0xA); 4412 goto get_more_space; 4413 } 4414 if (*in == '<') { 4415 nbchar = in - ctxt->input->cur; 4416 if (nbchar > 0) { 4417 const xmlChar *tmp = ctxt->input->cur; 4418 ctxt->input->cur = in; 4419 4420 if ((ctxt->sax != NULL) && 4421 (ctxt->sax->ignorableWhitespace != 4422 ctxt->sax->characters)) { 4423 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4424 if (ctxt->sax->ignorableWhitespace != NULL) 4425 ctxt->sax->ignorableWhitespace(ctxt->userData, 4426 tmp, nbchar); 4427 } else { 4428 if (ctxt->sax->characters != NULL) 4429 ctxt->sax->characters(ctxt->userData, 4430 tmp, nbchar); 4431 if (*ctxt->space == -1) 4432 *ctxt->space = -2; 4433 } 4434 } else if ((ctxt->sax != NULL) && 4435 (ctxt->sax->characters != NULL)) { 4436 ctxt->sax->characters(ctxt->userData, 4437 tmp, nbchar); 4438 } 4439 } 4440 return; 4441 } 4442 4443get_more: 4444 ccol = ctxt->input->col; 4445 while (test_char_data[*in]) { 4446 in++; 4447 ccol++; 4448 } 4449 ctxt->input->col = ccol; 4450 if (*in == 0xA) { 4451 do { 4452 ctxt->input->line++; ctxt->input->col = 1; 4453 in++; 4454 } while (*in == 0xA); 4455 goto get_more; 4456 } 4457 if (*in == ']') { 4458 if ((in[1] == ']') && (in[2] == '>')) { 4459 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4460 ctxt->input->cur = in; 4461 return; 4462 } 4463 in++; 4464 ctxt->input->col++; 4465 goto get_more; 4466 } 4467 nbchar = in - ctxt->input->cur; 4468 if (nbchar > 0) { 4469 if ((ctxt->sax != NULL) && 4470 (ctxt->sax->ignorableWhitespace != 4471 ctxt->sax->characters) && 4472 (IS_BLANK_CH(*ctxt->input->cur))) { 4473 const xmlChar *tmp = ctxt->input->cur; 4474 ctxt->input->cur = in; 4475 4476 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4477 if (ctxt->sax->ignorableWhitespace != NULL) 4478 ctxt->sax->ignorableWhitespace(ctxt->userData, 4479 tmp, nbchar); 4480 } else { 4481 if (ctxt->sax->characters != NULL) 4482 ctxt->sax->characters(ctxt->userData, 4483 tmp, nbchar); 4484 if (*ctxt->space == -1) 4485 *ctxt->space = -2; 4486 } 4487 line = ctxt->input->line; 4488 col = ctxt->input->col; 4489 } else if (ctxt->sax != NULL) { 4490 if (ctxt->sax->characters != NULL) 4491 ctxt->sax->characters(ctxt->userData, 4492 ctxt->input->cur, nbchar); 4493 line = ctxt->input->line; 4494 col = ctxt->input->col; 4495 } 4496 /* something really bad happened in the SAX callback */ 4497 if (ctxt->instate != XML_PARSER_CONTENT) 4498 return; 4499 } 4500 ctxt->input->cur = in; 4501 if (*in == 0xD) { 4502 in++; 4503 if (*in == 0xA) { 4504 ctxt->input->cur = in; 4505 in++; 4506 ctxt->input->line++; ctxt->input->col = 1; 4507 continue; /* while */ 4508 } 4509 in--; 4510 } 4511 if (*in == '<') { 4512 return; 4513 } 4514 if (*in == '&') { 4515 return; 4516 } 4517 SHRINK; 4518 GROW; 4519 if (ctxt->instate == XML_PARSER_EOF) 4520 return; 4521 in = ctxt->input->cur; 4522 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4523 nbchar = 0; 4524 } 4525 ctxt->input->line = line; 4526 ctxt->input->col = col; 4527 xmlParseCharDataComplex(ctxt, cdata); 4528} 4529 4530/** 4531 * xmlParseCharDataComplex: 4532 * @ctxt: an XML parser context 4533 * @cdata: int indicating whether we are within a CDATA section 4534 * 4535 * parse a CharData section.this is the fallback function 4536 * of xmlParseCharData() when the parsing requires handling 4537 * of non-ASCII characters. 4538 */ 4539static void 4540xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4541 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4542 int nbchar = 0; 4543 int cur, l; 4544 int count = 0; 4545 4546 SHRINK; 4547 GROW; 4548 cur = CUR_CHAR(l); 4549 while ((cur != '<') && /* checked */ 4550 (cur != '&') && 4551 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4552 if ((cur == ']') && (NXT(1) == ']') && 4553 (NXT(2) == '>')) { 4554 if (cdata) break; 4555 else { 4556 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4557 } 4558 } 4559 COPY_BUF(l,buf,nbchar,cur); 4560 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4561 buf[nbchar] = 0; 4562 4563 /* 4564 * OK the segment is to be consumed as chars. 4565 */ 4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4567 if (areBlanks(ctxt, buf, nbchar, 0)) { 4568 if (ctxt->sax->ignorableWhitespace != NULL) 4569 ctxt->sax->ignorableWhitespace(ctxt->userData, 4570 buf, nbchar); 4571 } else { 4572 if (ctxt->sax->characters != NULL) 4573 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4574 if ((ctxt->sax->characters != 4575 ctxt->sax->ignorableWhitespace) && 4576 (*ctxt->space == -1)) 4577 *ctxt->space = -2; 4578 } 4579 } 4580 nbchar = 0; 4581 /* something really bad happened in the SAX callback */ 4582 if (ctxt->instate != XML_PARSER_CONTENT) 4583 return; 4584 } 4585 count++; 4586 if (count > 50) { 4587 GROW; 4588 count = 0; 4589 if (ctxt->instate == XML_PARSER_EOF) 4590 return; 4591 } 4592 NEXTL(l); 4593 cur = CUR_CHAR(l); 4594 } 4595 if (nbchar != 0) { 4596 buf[nbchar] = 0; 4597 /* 4598 * OK the segment is to be consumed as chars. 4599 */ 4600 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4601 if (areBlanks(ctxt, buf, nbchar, 0)) { 4602 if (ctxt->sax->ignorableWhitespace != NULL) 4603 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4604 } else { 4605 if (ctxt->sax->characters != NULL) 4606 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4607 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4608 (*ctxt->space == -1)) 4609 *ctxt->space = -2; 4610 } 4611 } 4612 } 4613 if ((cur != 0) && (!IS_CHAR(cur))) { 4614 /* Generate the error and skip the offending character */ 4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4616 "PCDATA invalid Char value %d\n", 4617 cur); 4618 NEXTL(l); 4619 } 4620} 4621 4622/** 4623 * xmlParseExternalID: 4624 * @ctxt: an XML parser context 4625 * @publicID: a xmlChar** receiving PubidLiteral 4626 * @strict: indicate whether we should restrict parsing to only 4627 * production [75], see NOTE below 4628 * 4629 * Parse an External ID or a Public ID 4630 * 4631 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4632 * 'PUBLIC' S PubidLiteral S SystemLiteral 4633 * 4634 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4635 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4636 * 4637 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4638 * 4639 * Returns the function returns SystemLiteral and in the second 4640 * case publicID receives PubidLiteral, is strict is off 4641 * it is possible to return NULL and have publicID set. 4642 */ 4643 4644xmlChar * 4645xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4646 xmlChar *URI = NULL; 4647 4648 SHRINK; 4649 4650 *publicID = NULL; 4651 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4652 SKIP(6); 4653 if (!IS_BLANK_CH(CUR)) { 4654 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4655 "Space required after 'SYSTEM'\n"); 4656 } 4657 SKIP_BLANKS; 4658 URI = xmlParseSystemLiteral(ctxt); 4659 if (URI == NULL) { 4660 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4661 } 4662 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4663 SKIP(6); 4664 if (!IS_BLANK_CH(CUR)) { 4665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4666 "Space required after 'PUBLIC'\n"); 4667 } 4668 SKIP_BLANKS; 4669 *publicID = xmlParsePubidLiteral(ctxt); 4670 if (*publicID == NULL) { 4671 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4672 } 4673 if (strict) { 4674 /* 4675 * We don't handle [83] so "S SystemLiteral" is required. 4676 */ 4677 if (!IS_BLANK_CH(CUR)) { 4678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4679 "Space required after the Public Identifier\n"); 4680 } 4681 } else { 4682 /* 4683 * We handle [83] so we return immediately, if 4684 * "S SystemLiteral" is not detected. From a purely parsing 4685 * point of view that's a nice mess. 4686 */ 4687 const xmlChar *ptr; 4688 GROW; 4689 4690 ptr = CUR_PTR; 4691 if (!IS_BLANK_CH(*ptr)) return(NULL); 4692 4693 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4694 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4695 } 4696 SKIP_BLANKS; 4697 URI = xmlParseSystemLiteral(ctxt); 4698 if (URI == NULL) { 4699 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4700 } 4701 } 4702 return(URI); 4703} 4704 4705/** 4706 * xmlParseCommentComplex: 4707 * @ctxt: an XML parser context 4708 * @buf: the already parsed part of the buffer 4709 * @len: number of bytes filles in the buffer 4710 * @size: allocated size of the buffer 4711 * 4712 * Skip an XML (SGML) comment <!-- .... --> 4713 * The spec says that "For compatibility, the string "--" (double-hyphen) 4714 * must not occur within comments. " 4715 * This is the slow routine in case the accelerator for ascii didn't work 4716 * 4717 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4718 */ 4719static void 4720xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4721 size_t len, size_t size) { 4722 int q, ql; 4723 int r, rl; 4724 int cur, l; 4725 size_t count = 0; 4726 int inputid; 4727 4728 inputid = ctxt->input->id; 4729 4730 if (buf == NULL) { 4731 len = 0; 4732 size = XML_PARSER_BUFFER_SIZE; 4733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4734 if (buf == NULL) { 4735 xmlErrMemory(ctxt, NULL); 4736 return; 4737 } 4738 } 4739 GROW; /* Assure there's enough input data */ 4740 q = CUR_CHAR(ql); 4741 if (q == 0) 4742 goto not_terminated; 4743 if (!IS_CHAR(q)) { 4744 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4745 "xmlParseComment: invalid xmlChar value %d\n", 4746 q); 4747 xmlFree (buf); 4748 return; 4749 } 4750 NEXTL(ql); 4751 r = CUR_CHAR(rl); 4752 if (r == 0) 4753 goto not_terminated; 4754 if (!IS_CHAR(r)) { 4755 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4756 "xmlParseComment: invalid xmlChar value %d\n", 4757 q); 4758 xmlFree (buf); 4759 return; 4760 } 4761 NEXTL(rl); 4762 cur = CUR_CHAR(l); 4763 if (cur == 0) 4764 goto not_terminated; 4765 while (IS_CHAR(cur) && /* checked */ 4766 ((cur != '>') || 4767 (r != '-') || (q != '-'))) { 4768 if ((r == '-') && (q == '-')) { 4769 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4770 } 4771 if ((len > XML_MAX_TEXT_LENGTH) && 4772 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4774 "Comment too big found", NULL); 4775 xmlFree (buf); 4776 return; 4777 } 4778 if (len + 5 >= size) { 4779 xmlChar *new_buf; 4780 size_t new_size; 4781 4782 new_size = size * 2; 4783 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4784 if (new_buf == NULL) { 4785 xmlFree (buf); 4786 xmlErrMemory(ctxt, NULL); 4787 return; 4788 } 4789 buf = new_buf; 4790 size = new_size; 4791 } 4792 COPY_BUF(ql,buf,len,q); 4793 q = r; 4794 ql = rl; 4795 r = cur; 4796 rl = l; 4797 4798 count++; 4799 if (count > 50) { 4800 GROW; 4801 count = 0; 4802 if (ctxt->instate == XML_PARSER_EOF) { 4803 xmlFree(buf); 4804 return; 4805 } 4806 } 4807 NEXTL(l); 4808 cur = CUR_CHAR(l); 4809 if (cur == 0) { 4810 SHRINK; 4811 GROW; 4812 cur = CUR_CHAR(l); 4813 } 4814 } 4815 buf[len] = 0; 4816 if (cur == 0) { 4817 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4818 "Comment not terminated \n<!--%.50s\n", buf); 4819 } else if (!IS_CHAR(cur)) { 4820 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4821 "xmlParseComment: invalid xmlChar value %d\n", 4822 cur); 4823 } else { 4824 if (inputid != ctxt->input->id) { 4825 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4826 "Comment doesn't start and stop in the same entity\n"); 4827 } 4828 NEXT; 4829 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4830 (!ctxt->disableSAX)) 4831 ctxt->sax->comment(ctxt->userData, buf); 4832 } 4833 xmlFree(buf); 4834 return; 4835not_terminated: 4836 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4837 "Comment not terminated\n", NULL); 4838 xmlFree(buf); 4839 return; 4840} 4841 4842/** 4843 * xmlParseComment: 4844 * @ctxt: an XML parser context 4845 * 4846 * Skip an XML (SGML) comment <!-- .... --> 4847 * The spec says that "For compatibility, the string "--" (double-hyphen) 4848 * must not occur within comments. " 4849 * 4850 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4851 */ 4852void 4853xmlParseComment(xmlParserCtxtPtr ctxt) { 4854 xmlChar *buf = NULL; 4855 size_t size = XML_PARSER_BUFFER_SIZE; 4856 size_t len = 0; 4857 xmlParserInputState state; 4858 const xmlChar *in; 4859 size_t nbchar = 0; 4860 int ccol; 4861 int inputid; 4862 4863 /* 4864 * Check that there is a comment right here. 4865 */ 4866 if ((RAW != '<') || (NXT(1) != '!') || 4867 (NXT(2) != '-') || (NXT(3) != '-')) return; 4868 state = ctxt->instate; 4869 ctxt->instate = XML_PARSER_COMMENT; 4870 inputid = ctxt->input->id; 4871 SKIP(4); 4872 SHRINK; 4873 GROW; 4874 4875 /* 4876 * Accelerated common case where input don't need to be 4877 * modified before passing it to the handler. 4878 */ 4879 in = ctxt->input->cur; 4880 do { 4881 if (*in == 0xA) { 4882 do { 4883 ctxt->input->line++; ctxt->input->col = 1; 4884 in++; 4885 } while (*in == 0xA); 4886 } 4887get_more: 4888 ccol = ctxt->input->col; 4889 while (((*in > '-') && (*in <= 0x7F)) || 4890 ((*in >= 0x20) && (*in < '-')) || 4891 (*in == 0x09)) { 4892 in++; 4893 ccol++; 4894 } 4895 ctxt->input->col = ccol; 4896 if (*in == 0xA) { 4897 do { 4898 ctxt->input->line++; ctxt->input->col = 1; 4899 in++; 4900 } while (*in == 0xA); 4901 goto get_more; 4902 } 4903 nbchar = in - ctxt->input->cur; 4904 /* 4905 * save current set of data 4906 */ 4907 if (nbchar > 0) { 4908 if ((ctxt->sax != NULL) && 4909 (ctxt->sax->comment != NULL)) { 4910 if (buf == NULL) { 4911 if ((*in == '-') && (in[1] == '-')) 4912 size = nbchar + 1; 4913 else 4914 size = XML_PARSER_BUFFER_SIZE + nbchar; 4915 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4916 if (buf == NULL) { 4917 xmlErrMemory(ctxt, NULL); 4918 ctxt->instate = state; 4919 return; 4920 } 4921 len = 0; 4922 } else if (len + nbchar + 1 >= size) { 4923 xmlChar *new_buf; 4924 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4925 new_buf = (xmlChar *) xmlRealloc(buf, 4926 size * sizeof(xmlChar)); 4927 if (new_buf == NULL) { 4928 xmlFree (buf); 4929 xmlErrMemory(ctxt, NULL); 4930 ctxt->instate = state; 4931 return; 4932 } 4933 buf = new_buf; 4934 } 4935 memcpy(&buf[len], ctxt->input->cur, nbchar); 4936 len += nbchar; 4937 buf[len] = 0; 4938 } 4939 } 4940 if ((len > XML_MAX_TEXT_LENGTH) && 4941 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4942 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4943 "Comment too big found", NULL); 4944 xmlFree (buf); 4945 return; 4946 } 4947 ctxt->input->cur = in; 4948 if (*in == 0xA) { 4949 in++; 4950 ctxt->input->line++; ctxt->input->col = 1; 4951 } 4952 if (*in == 0xD) { 4953 in++; 4954 if (*in == 0xA) { 4955 ctxt->input->cur = in; 4956 in++; 4957 ctxt->input->line++; ctxt->input->col = 1; 4958 continue; /* while */ 4959 } 4960 in--; 4961 } 4962 SHRINK; 4963 GROW; 4964 if (ctxt->instate == XML_PARSER_EOF) { 4965 xmlFree(buf); 4966 return; 4967 } 4968 in = ctxt->input->cur; 4969 if (*in == '-') { 4970 if (in[1] == '-') { 4971 if (in[2] == '>') { 4972 if (ctxt->input->id != inputid) { 4973 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4974 "comment doesn't start and stop in the same entity\n"); 4975 } 4976 SKIP(3); 4977 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4978 (!ctxt->disableSAX)) { 4979 if (buf != NULL) 4980 ctxt->sax->comment(ctxt->userData, buf); 4981 else 4982 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4983 } 4984 if (buf != NULL) 4985 xmlFree(buf); 4986 ctxt->instate = state; 4987 return; 4988 } 4989 if (buf != NULL) { 4990 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4991 "Double hyphen within comment: " 4992 "<!--%.50s\n", 4993 buf); 4994 } else 4995 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4996 "Double hyphen within comment\n", NULL); 4997 in++; 4998 ctxt->input->col++; 4999 } 5000 in++; 5001 ctxt->input->col++; 5002 goto get_more; 5003 } 5004 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5005 xmlParseCommentComplex(ctxt, buf, len, size); 5006 ctxt->instate = state; 5007 return; 5008} 5009 5010 5011/** 5012 * xmlParsePITarget: 5013 * @ctxt: an XML parser context 5014 * 5015 * parse the name of a PI 5016 * 5017 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5018 * 5019 * Returns the PITarget name or NULL 5020 */ 5021 5022const xmlChar * 5023xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5024 const xmlChar *name; 5025 5026 name = xmlParseName(ctxt); 5027 if ((name != NULL) && 5028 ((name[0] == 'x') || (name[0] == 'X')) && 5029 ((name[1] == 'm') || (name[1] == 'M')) && 5030 ((name[2] == 'l') || (name[2] == 'L'))) { 5031 int i; 5032 if ((name[0] == 'x') && (name[1] == 'm') && 5033 (name[2] == 'l') && (name[3] == 0)) { 5034 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5035 "XML declaration allowed only at the start of the document\n"); 5036 return(name); 5037 } else if (name[3] == 0) { 5038 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5039 return(name); 5040 } 5041 for (i = 0;;i++) { 5042 if (xmlW3CPIs[i] == NULL) break; 5043 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5044 return(name); 5045 } 5046 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5047 "xmlParsePITarget: invalid name prefix 'xml'\n", 5048 NULL, NULL); 5049 } 5050 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5051 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5052 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 5053 } 5054 return(name); 5055} 5056 5057#ifdef LIBXML_CATALOG_ENABLED 5058/** 5059 * xmlParseCatalogPI: 5060 * @ctxt: an XML parser context 5061 * @catalog: the PI value string 5062 * 5063 * parse an XML Catalog Processing Instruction. 5064 * 5065 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5066 * 5067 * Occurs only if allowed by the user and if happening in the Misc 5068 * part of the document before any doctype informations 5069 * This will add the given catalog to the parsing context in order 5070 * to be used if there is a resolution need further down in the document 5071 */ 5072 5073static void 5074xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5075 xmlChar *URL = NULL; 5076 const xmlChar *tmp, *base; 5077 xmlChar marker; 5078 5079 tmp = catalog; 5080 while (IS_BLANK_CH(*tmp)) tmp++; 5081 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5082 goto error; 5083 tmp += 7; 5084 while (IS_BLANK_CH(*tmp)) tmp++; 5085 if (*tmp != '=') { 5086 return; 5087 } 5088 tmp++; 5089 while (IS_BLANK_CH(*tmp)) tmp++; 5090 marker = *tmp; 5091 if ((marker != '\'') && (marker != '"')) 5092 goto error; 5093 tmp++; 5094 base = tmp; 5095 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5096 if (*tmp == 0) 5097 goto error; 5098 URL = xmlStrndup(base, tmp - base); 5099 tmp++; 5100 while (IS_BLANK_CH(*tmp)) tmp++; 5101 if (*tmp != 0) 5102 goto error; 5103 5104 if (URL != NULL) { 5105 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5106 xmlFree(URL); 5107 } 5108 return; 5109 5110error: 5111 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5112 "Catalog PI syntax error: %s\n", 5113 catalog, NULL); 5114 if (URL != NULL) 5115 xmlFree(URL); 5116} 5117#endif 5118 5119/** 5120 * xmlParsePI: 5121 * @ctxt: an XML parser context 5122 * 5123 * parse an XML Processing Instruction. 5124 * 5125 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5126 * 5127 * The processing is transfered to SAX once parsed. 5128 */ 5129 5130void 5131xmlParsePI(xmlParserCtxtPtr ctxt) { 5132 xmlChar *buf = NULL; 5133 size_t len = 0; 5134 size_t size = XML_PARSER_BUFFER_SIZE; 5135 int cur, l; 5136 const xmlChar *target; 5137 xmlParserInputState state; 5138 int count = 0; 5139 5140 if ((RAW == '<') && (NXT(1) == '?')) { 5141 xmlParserInputPtr input = ctxt->input; 5142 state = ctxt->instate; 5143 ctxt->instate = XML_PARSER_PI; 5144 /* 5145 * this is a Processing Instruction. 5146 */ 5147 SKIP(2); 5148 SHRINK; 5149 5150 /* 5151 * Parse the target name and check for special support like 5152 * namespace. 5153 */ 5154 target = xmlParsePITarget(ctxt); 5155 if (target != NULL) { 5156 if ((RAW == '?') && (NXT(1) == '>')) { 5157 if (input != ctxt->input) { 5158 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5159 "PI declaration doesn't start and stop in the same entity\n"); 5160 } 5161 SKIP(2); 5162 5163 /* 5164 * SAX: PI detected. 5165 */ 5166 if ((ctxt->sax) && (!ctxt->disableSAX) && 5167 (ctxt->sax->processingInstruction != NULL)) 5168 ctxt->sax->processingInstruction(ctxt->userData, 5169 target, NULL); 5170 if (ctxt->instate != XML_PARSER_EOF) 5171 ctxt->instate = state; 5172 return; 5173 } 5174 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5175 if (buf == NULL) { 5176 xmlErrMemory(ctxt, NULL); 5177 ctxt->instate = state; 5178 return; 5179 } 5180 cur = CUR; 5181 if (!IS_BLANK(cur)) { 5182 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5183 "ParsePI: PI %s space expected\n", target); 5184 } 5185 SKIP_BLANKS; 5186 cur = CUR_CHAR(l); 5187 while (IS_CHAR(cur) && /* checked */ 5188 ((cur != '?') || (NXT(1) != '>'))) { 5189 if (len + 5 >= size) { 5190 xmlChar *tmp; 5191 size_t new_size = size * 2; 5192 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5193 if (tmp == NULL) { 5194 xmlErrMemory(ctxt, NULL); 5195 xmlFree(buf); 5196 ctxt->instate = state; 5197 return; 5198 } 5199 buf = tmp; 5200 size = new_size; 5201 } 5202 count++; 5203 if (count > 50) { 5204 GROW; 5205 if (ctxt->instate == XML_PARSER_EOF) { 5206 xmlFree(buf); 5207 return; 5208 } 5209 count = 0; 5210 if ((len > XML_MAX_TEXT_LENGTH) && 5211 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5212 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5213 "PI %s too big found", target); 5214 xmlFree(buf); 5215 ctxt->instate = state; 5216 return; 5217 } 5218 } 5219 COPY_BUF(l,buf,len,cur); 5220 NEXTL(l); 5221 cur = CUR_CHAR(l); 5222 if (cur == 0) { 5223 SHRINK; 5224 GROW; 5225 cur = CUR_CHAR(l); 5226 } 5227 } 5228 if ((len > XML_MAX_TEXT_LENGTH) && 5229 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5230 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5231 "PI %s too big found", target); 5232 xmlFree(buf); 5233 ctxt->instate = state; 5234 return; 5235 } 5236 buf[len] = 0; 5237 if (cur != '?') { 5238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5239 "ParsePI: PI %s never end ...\n", target); 5240 } else { 5241 if (input != ctxt->input) { 5242 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5243 "PI declaration doesn't start and stop in the same entity\n"); 5244 } 5245 SKIP(2); 5246 5247#ifdef LIBXML_CATALOG_ENABLED 5248 if (((state == XML_PARSER_MISC) || 5249 (state == XML_PARSER_START)) && 5250 (xmlStrEqual(target, XML_CATALOG_PI))) { 5251 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5252 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5253 (allow == XML_CATA_ALLOW_ALL)) 5254 xmlParseCatalogPI(ctxt, buf); 5255 } 5256#endif 5257 5258 5259 /* 5260 * SAX: PI detected. 5261 */ 5262 if ((ctxt->sax) && (!ctxt->disableSAX) && 5263 (ctxt->sax->processingInstruction != NULL)) 5264 ctxt->sax->processingInstruction(ctxt->userData, 5265 target, buf); 5266 } 5267 xmlFree(buf); 5268 } else { 5269 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5270 } 5271 if (ctxt->instate != XML_PARSER_EOF) 5272 ctxt->instate = state; 5273 } 5274} 5275 5276/** 5277 * xmlParseNotationDecl: 5278 * @ctxt: an XML parser context 5279 * 5280 * parse a notation declaration 5281 * 5282 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5283 * 5284 * Hence there is actually 3 choices: 5285 * 'PUBLIC' S PubidLiteral 5286 * 'PUBLIC' S PubidLiteral S SystemLiteral 5287 * and 'SYSTEM' S SystemLiteral 5288 * 5289 * See the NOTE on xmlParseExternalID(). 5290 */ 5291 5292void 5293xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5294 const xmlChar *name; 5295 xmlChar *Pubid; 5296 xmlChar *Systemid; 5297 5298 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5299 xmlParserInputPtr input = ctxt->input; 5300 SHRINK; 5301 SKIP(10); 5302 if (!IS_BLANK_CH(CUR)) { 5303 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5304 "Space required after '<!NOTATION'\n"); 5305 return; 5306 } 5307 SKIP_BLANKS; 5308 5309 name = xmlParseName(ctxt); 5310 if (name == NULL) { 5311 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5312 return; 5313 } 5314 if (!IS_BLANK_CH(CUR)) { 5315 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5316 "Space required after the NOTATION name'\n"); 5317 return; 5318 } 5319 if (xmlStrchr(name, ':') != NULL) { 5320 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5321 "colon are forbidden from notation names '%s'\n", 5322 name, NULL, NULL); 5323 } 5324 SKIP_BLANKS; 5325 5326 /* 5327 * Parse the IDs. 5328 */ 5329 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5330 SKIP_BLANKS; 5331 5332 if (RAW == '>') { 5333 if (input != ctxt->input) { 5334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5335 "Notation declaration doesn't start and stop in the same entity\n"); 5336 } 5337 NEXT; 5338 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5339 (ctxt->sax->notationDecl != NULL)) 5340 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5341 } else { 5342 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5343 } 5344 if (Systemid != NULL) xmlFree(Systemid); 5345 if (Pubid != NULL) xmlFree(Pubid); 5346 } 5347} 5348 5349/** 5350 * xmlParseEntityDecl: 5351 * @ctxt: an XML parser context 5352 * 5353 * parse <!ENTITY declarations 5354 * 5355 * [70] EntityDecl ::= GEDecl | PEDecl 5356 * 5357 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5358 * 5359 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5360 * 5361 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5362 * 5363 * [74] PEDef ::= EntityValue | ExternalID 5364 * 5365 * [76] NDataDecl ::= S 'NDATA' S Name 5366 * 5367 * [ VC: Notation Declared ] 5368 * The Name must match the declared name of a notation. 5369 */ 5370 5371void 5372xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5373 const xmlChar *name = NULL; 5374 xmlChar *value = NULL; 5375 xmlChar *URI = NULL, *literal = NULL; 5376 const xmlChar *ndata = NULL; 5377 int isParameter = 0; 5378 xmlChar *orig = NULL; 5379 int skipped; 5380 5381 /* GROW; done in the caller */ 5382 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5383 xmlParserInputPtr input = ctxt->input; 5384 SHRINK; 5385 SKIP(8); 5386 skipped = SKIP_BLANKS; 5387 if (skipped == 0) { 5388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5389 "Space required after '<!ENTITY'\n"); 5390 } 5391 5392 if (RAW == '%') { 5393 NEXT; 5394 skipped = SKIP_BLANKS; 5395 if (skipped == 0) { 5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5397 "Space required after '%'\n"); 5398 } 5399 isParameter = 1; 5400 } 5401 5402 name = xmlParseName(ctxt); 5403 if (name == NULL) { 5404 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5405 "xmlParseEntityDecl: no name\n"); 5406 return; 5407 } 5408 if (xmlStrchr(name, ':') != NULL) { 5409 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5410 "colon are forbidden from entities names '%s'\n", 5411 name, NULL, NULL); 5412 } 5413 skipped = SKIP_BLANKS; 5414 if (skipped == 0) { 5415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5416 "Space required after the entity name\n"); 5417 } 5418 5419 ctxt->instate = XML_PARSER_ENTITY_DECL; 5420 /* 5421 * handle the various case of definitions... 5422 */ 5423 if (isParameter) { 5424 if ((RAW == '"') || (RAW == '\'')) { 5425 value = xmlParseEntityValue(ctxt, &orig); 5426 if (value) { 5427 if ((ctxt->sax != NULL) && 5428 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5429 ctxt->sax->entityDecl(ctxt->userData, name, 5430 XML_INTERNAL_PARAMETER_ENTITY, 5431 NULL, NULL, value); 5432 } 5433 } else { 5434 URI = xmlParseExternalID(ctxt, &literal, 1); 5435 if ((URI == NULL) && (literal == NULL)) { 5436 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5437 } 5438 if (URI) { 5439 xmlURIPtr uri; 5440 5441 uri = xmlParseURI((const char *) URI); 5442 if (uri == NULL) { 5443 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5444 "Invalid URI: %s\n", URI); 5445 /* 5446 * This really ought to be a well formedness error 5447 * but the XML Core WG decided otherwise c.f. issue 5448 * E26 of the XML erratas. 5449 */ 5450 } else { 5451 if (uri->fragment != NULL) { 5452 /* 5453 * Okay this is foolish to block those but not 5454 * invalid URIs. 5455 */ 5456 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5457 } else { 5458 if ((ctxt->sax != NULL) && 5459 (!ctxt->disableSAX) && 5460 (ctxt->sax->entityDecl != NULL)) 5461 ctxt->sax->entityDecl(ctxt->userData, name, 5462 XML_EXTERNAL_PARAMETER_ENTITY, 5463 literal, URI, NULL); 5464 } 5465 xmlFreeURI(uri); 5466 } 5467 } 5468 } 5469 } else { 5470 if ((RAW == '"') || (RAW == '\'')) { 5471 value = xmlParseEntityValue(ctxt, &orig); 5472 if ((ctxt->sax != NULL) && 5473 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5474 ctxt->sax->entityDecl(ctxt->userData, name, 5475 XML_INTERNAL_GENERAL_ENTITY, 5476 NULL, NULL, value); 5477 /* 5478 * For expat compatibility in SAX mode. 5479 */ 5480 if ((ctxt->myDoc == NULL) || 5481 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5482 if (ctxt->myDoc == NULL) { 5483 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5484 if (ctxt->myDoc == NULL) { 5485 xmlErrMemory(ctxt, "New Doc failed"); 5486 return; 5487 } 5488 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5489 } 5490 if (ctxt->myDoc->intSubset == NULL) 5491 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5492 BAD_CAST "fake", NULL, NULL); 5493 5494 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5495 NULL, NULL, value); 5496 } 5497 } else { 5498 URI = xmlParseExternalID(ctxt, &literal, 1); 5499 if ((URI == NULL) && (literal == NULL)) { 5500 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5501 } 5502 if (URI) { 5503 xmlURIPtr uri; 5504 5505 uri = xmlParseURI((const char *)URI); 5506 if (uri == NULL) { 5507 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5508 "Invalid URI: %s\n", URI); 5509 /* 5510 * This really ought to be a well formedness error 5511 * but the XML Core WG decided otherwise c.f. issue 5512 * E26 of the XML erratas. 5513 */ 5514 } else { 5515 if (uri->fragment != NULL) { 5516 /* 5517 * Okay this is foolish to block those but not 5518 * invalid URIs. 5519 */ 5520 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5521 } 5522 xmlFreeURI(uri); 5523 } 5524 } 5525 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5526 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5527 "Space required before 'NDATA'\n"); 5528 } 5529 SKIP_BLANKS; 5530 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5531 SKIP(5); 5532 if (!IS_BLANK_CH(CUR)) { 5533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5534 "Space required after 'NDATA'\n"); 5535 } 5536 SKIP_BLANKS; 5537 ndata = xmlParseName(ctxt); 5538 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5539 (ctxt->sax->unparsedEntityDecl != NULL)) 5540 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5541 literal, URI, ndata); 5542 } else { 5543 if ((ctxt->sax != NULL) && 5544 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5545 ctxt->sax->entityDecl(ctxt->userData, name, 5546 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5547 literal, URI, NULL); 5548 /* 5549 * For expat compatibility in SAX mode. 5550 * assuming the entity repalcement was asked for 5551 */ 5552 if ((ctxt->replaceEntities != 0) && 5553 ((ctxt->myDoc == NULL) || 5554 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5555 if (ctxt->myDoc == NULL) { 5556 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5557 if (ctxt->myDoc == NULL) { 5558 xmlErrMemory(ctxt, "New Doc failed"); 5559 return; 5560 } 5561 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5562 } 5563 5564 if (ctxt->myDoc->intSubset == NULL) 5565 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5566 BAD_CAST "fake", NULL, NULL); 5567 xmlSAX2EntityDecl(ctxt, name, 5568 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5569 literal, URI, NULL); 5570 } 5571 } 5572 } 5573 } 5574 SKIP_BLANKS; 5575 if (RAW != '>') { 5576 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5577 "xmlParseEntityDecl: entity %s not terminated\n", name); 5578 } else { 5579 if (input != ctxt->input) { 5580 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5581 "Entity declaration doesn't start and stop in the same entity\n"); 5582 } 5583 NEXT; 5584 } 5585 if (orig != NULL) { 5586 /* 5587 * Ugly mechanism to save the raw entity value. 5588 */ 5589 xmlEntityPtr cur = NULL; 5590 5591 if (isParameter) { 5592 if ((ctxt->sax != NULL) && 5593 (ctxt->sax->getParameterEntity != NULL)) 5594 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5595 } else { 5596 if ((ctxt->sax != NULL) && 5597 (ctxt->sax->getEntity != NULL)) 5598 cur = ctxt->sax->getEntity(ctxt->userData, name); 5599 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5600 cur = xmlSAX2GetEntity(ctxt, name); 5601 } 5602 } 5603 if (cur != NULL) { 5604 if (cur->orig != NULL) 5605 xmlFree(orig); 5606 else 5607 cur->orig = orig; 5608 } else 5609 xmlFree(orig); 5610 } 5611 if (value != NULL) xmlFree(value); 5612 if (URI != NULL) xmlFree(URI); 5613 if (literal != NULL) xmlFree(literal); 5614 } 5615} 5616 5617/** 5618 * xmlParseDefaultDecl: 5619 * @ctxt: an XML parser context 5620 * @value: Receive a possible fixed default value for the attribute 5621 * 5622 * Parse an attribute default declaration 5623 * 5624 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5625 * 5626 * [ VC: Required Attribute ] 5627 * if the default declaration is the keyword #REQUIRED, then the 5628 * attribute must be specified for all elements of the type in the 5629 * attribute-list declaration. 5630 * 5631 * [ VC: Attribute Default Legal ] 5632 * The declared default value must meet the lexical constraints of 5633 * the declared attribute type c.f. xmlValidateAttributeDecl() 5634 * 5635 * [ VC: Fixed Attribute Default ] 5636 * if an attribute has a default value declared with the #FIXED 5637 * keyword, instances of that attribute must match the default value. 5638 * 5639 * [ WFC: No < in Attribute Values ] 5640 * handled in xmlParseAttValue() 5641 * 5642 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5643 * or XML_ATTRIBUTE_FIXED. 5644 */ 5645 5646int 5647xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5648 int val; 5649 xmlChar *ret; 5650 5651 *value = NULL; 5652 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5653 SKIP(9); 5654 return(XML_ATTRIBUTE_REQUIRED); 5655 } 5656 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5657 SKIP(8); 5658 return(XML_ATTRIBUTE_IMPLIED); 5659 } 5660 val = XML_ATTRIBUTE_NONE; 5661 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5662 SKIP(6); 5663 val = XML_ATTRIBUTE_FIXED; 5664 if (!IS_BLANK_CH(CUR)) { 5665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5666 "Space required after '#FIXED'\n"); 5667 } 5668 SKIP_BLANKS; 5669 } 5670 ret = xmlParseAttValue(ctxt); 5671 ctxt->instate = XML_PARSER_DTD; 5672 if (ret == NULL) { 5673 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5674 "Attribute default value declaration error\n"); 5675 } else 5676 *value = ret; 5677 return(val); 5678} 5679 5680/** 5681 * xmlParseNotationType: 5682 * @ctxt: an XML parser context 5683 * 5684 * parse an Notation attribute type. 5685 * 5686 * Note: the leading 'NOTATION' S part has already being parsed... 5687 * 5688 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5689 * 5690 * [ VC: Notation Attributes ] 5691 * Values of this type must match one of the notation names included 5692 * in the declaration; all notation names in the declaration must be declared. 5693 * 5694 * Returns: the notation attribute tree built while parsing 5695 */ 5696 5697xmlEnumerationPtr 5698xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5699 const xmlChar *name; 5700 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5701 5702 if (RAW != '(') { 5703 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5704 return(NULL); 5705 } 5706 SHRINK; 5707 do { 5708 NEXT; 5709 SKIP_BLANKS; 5710 name = xmlParseName(ctxt); 5711 if (name == NULL) { 5712 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5713 "Name expected in NOTATION declaration\n"); 5714 xmlFreeEnumeration(ret); 5715 return(NULL); 5716 } 5717 tmp = ret; 5718 while (tmp != NULL) { 5719 if (xmlStrEqual(name, tmp->name)) { 5720 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5721 "standalone: attribute notation value token %s duplicated\n", 5722 name, NULL); 5723 if (!xmlDictOwns(ctxt->dict, name)) 5724 xmlFree((xmlChar *) name); 5725 break; 5726 } 5727 tmp = tmp->next; 5728 } 5729 if (tmp == NULL) { 5730 cur = xmlCreateEnumeration(name); 5731 if (cur == NULL) { 5732 xmlFreeEnumeration(ret); 5733 return(NULL); 5734 } 5735 if (last == NULL) ret = last = cur; 5736 else { 5737 last->next = cur; 5738 last = cur; 5739 } 5740 } 5741 SKIP_BLANKS; 5742 } while (RAW == '|'); 5743 if (RAW != ')') { 5744 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5745 xmlFreeEnumeration(ret); 5746 return(NULL); 5747 } 5748 NEXT; 5749 return(ret); 5750} 5751 5752/** 5753 * xmlParseEnumerationType: 5754 * @ctxt: an XML parser context 5755 * 5756 * parse an Enumeration attribute type. 5757 * 5758 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5759 * 5760 * [ VC: Enumeration ] 5761 * Values of this type must match one of the Nmtoken tokens in 5762 * the declaration 5763 * 5764 * Returns: the enumeration attribute tree built while parsing 5765 */ 5766 5767xmlEnumerationPtr 5768xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5769 xmlChar *name; 5770 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5771 5772 if (RAW != '(') { 5773 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5774 return(NULL); 5775 } 5776 SHRINK; 5777 do { 5778 NEXT; 5779 SKIP_BLANKS; 5780 name = xmlParseNmtoken(ctxt); 5781 if (name == NULL) { 5782 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5783 return(ret); 5784 } 5785 tmp = ret; 5786 while (tmp != NULL) { 5787 if (xmlStrEqual(name, tmp->name)) { 5788 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5789 "standalone: attribute enumeration value token %s duplicated\n", 5790 name, NULL); 5791 if (!xmlDictOwns(ctxt->dict, name)) 5792 xmlFree(name); 5793 break; 5794 } 5795 tmp = tmp->next; 5796 } 5797 if (tmp == NULL) { 5798 cur = xmlCreateEnumeration(name); 5799 if (!xmlDictOwns(ctxt->dict, name)) 5800 xmlFree(name); 5801 if (cur == NULL) { 5802 xmlFreeEnumeration(ret); 5803 return(NULL); 5804 } 5805 if (last == NULL) ret = last = cur; 5806 else { 5807 last->next = cur; 5808 last = cur; 5809 } 5810 } 5811 SKIP_BLANKS; 5812 } while (RAW == '|'); 5813 if (RAW != ')') { 5814 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5815 return(ret); 5816 } 5817 NEXT; 5818 return(ret); 5819} 5820 5821/** 5822 * xmlParseEnumeratedType: 5823 * @ctxt: an XML parser context 5824 * @tree: the enumeration tree built while parsing 5825 * 5826 * parse an Enumerated attribute type. 5827 * 5828 * [57] EnumeratedType ::= NotationType | Enumeration 5829 * 5830 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5831 * 5832 * 5833 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5834 */ 5835 5836int 5837xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5838 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5839 SKIP(8); 5840 if (!IS_BLANK_CH(CUR)) { 5841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5842 "Space required after 'NOTATION'\n"); 5843 return(0); 5844 } 5845 SKIP_BLANKS; 5846 *tree = xmlParseNotationType(ctxt); 5847 if (*tree == NULL) return(0); 5848 return(XML_ATTRIBUTE_NOTATION); 5849 } 5850 *tree = xmlParseEnumerationType(ctxt); 5851 if (*tree == NULL) return(0); 5852 return(XML_ATTRIBUTE_ENUMERATION); 5853} 5854 5855/** 5856 * xmlParseAttributeType: 5857 * @ctxt: an XML parser context 5858 * @tree: the enumeration tree built while parsing 5859 * 5860 * parse the Attribute list def for an element 5861 * 5862 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5863 * 5864 * [55] StringType ::= 'CDATA' 5865 * 5866 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5867 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5868 * 5869 * Validity constraints for attribute values syntax are checked in 5870 * xmlValidateAttributeValue() 5871 * 5872 * [ VC: ID ] 5873 * Values of type ID must match the Name production. A name must not 5874 * appear more than once in an XML document as a value of this type; 5875 * i.e., ID values must uniquely identify the elements which bear them. 5876 * 5877 * [ VC: One ID per Element Type ] 5878 * No element type may have more than one ID attribute specified. 5879 * 5880 * [ VC: ID Attribute Default ] 5881 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5882 * 5883 * [ VC: IDREF ] 5884 * Values of type IDREF must match the Name production, and values 5885 * of type IDREFS must match Names; each IDREF Name must match the value 5886 * of an ID attribute on some element in the XML document; i.e. IDREF 5887 * values must match the value of some ID attribute. 5888 * 5889 * [ VC: Entity Name ] 5890 * Values of type ENTITY must match the Name production, values 5891 * of type ENTITIES must match Names; each Entity Name must match the 5892 * name of an unparsed entity declared in the DTD. 5893 * 5894 * [ VC: Name Token ] 5895 * Values of type NMTOKEN must match the Nmtoken production; values 5896 * of type NMTOKENS must match Nmtokens. 5897 * 5898 * Returns the attribute type 5899 */ 5900int 5901xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5902 SHRINK; 5903 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5904 SKIP(5); 5905 return(XML_ATTRIBUTE_CDATA); 5906 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5907 SKIP(6); 5908 return(XML_ATTRIBUTE_IDREFS); 5909 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5910 SKIP(5); 5911 return(XML_ATTRIBUTE_IDREF); 5912 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5913 SKIP(2); 5914 return(XML_ATTRIBUTE_ID); 5915 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5916 SKIP(6); 5917 return(XML_ATTRIBUTE_ENTITY); 5918 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5919 SKIP(8); 5920 return(XML_ATTRIBUTE_ENTITIES); 5921 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5922 SKIP(8); 5923 return(XML_ATTRIBUTE_NMTOKENS); 5924 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5925 SKIP(7); 5926 return(XML_ATTRIBUTE_NMTOKEN); 5927 } 5928 return(xmlParseEnumeratedType(ctxt, tree)); 5929} 5930 5931/** 5932 * xmlParseAttributeListDecl: 5933 * @ctxt: an XML parser context 5934 * 5935 * : parse the Attribute list def for an element 5936 * 5937 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5938 * 5939 * [53] AttDef ::= S Name S AttType S DefaultDecl 5940 * 5941 */ 5942void 5943xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5944 const xmlChar *elemName; 5945 const xmlChar *attrName; 5946 xmlEnumerationPtr tree; 5947 5948 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5949 xmlParserInputPtr input = ctxt->input; 5950 5951 SKIP(9); 5952 if (!IS_BLANK_CH(CUR)) { 5953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5954 "Space required after '<!ATTLIST'\n"); 5955 } 5956 SKIP_BLANKS; 5957 elemName = xmlParseName(ctxt); 5958 if (elemName == NULL) { 5959 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5960 "ATTLIST: no name for Element\n"); 5961 return; 5962 } 5963 SKIP_BLANKS; 5964 GROW; 5965 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5966 const xmlChar *check = CUR_PTR; 5967 int type; 5968 int def; 5969 xmlChar *defaultValue = NULL; 5970 5971 GROW; 5972 tree = NULL; 5973 attrName = xmlParseName(ctxt); 5974 if (attrName == NULL) { 5975 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5976 "ATTLIST: no name for Attribute\n"); 5977 break; 5978 } 5979 GROW; 5980 if (!IS_BLANK_CH(CUR)) { 5981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5982 "Space required after the attribute name\n"); 5983 break; 5984 } 5985 SKIP_BLANKS; 5986 5987 type = xmlParseAttributeType(ctxt, &tree); 5988 if (type <= 0) { 5989 break; 5990 } 5991 5992 GROW; 5993 if (!IS_BLANK_CH(CUR)) { 5994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5995 "Space required after the attribute type\n"); 5996 if (tree != NULL) 5997 xmlFreeEnumeration(tree); 5998 break; 5999 } 6000 SKIP_BLANKS; 6001 6002 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6003 if (def <= 0) { 6004 if (defaultValue != NULL) 6005 xmlFree(defaultValue); 6006 if (tree != NULL) 6007 xmlFreeEnumeration(tree); 6008 break; 6009 } 6010 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6011 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6012 6013 GROW; 6014 if (RAW != '>') { 6015 if (!IS_BLANK_CH(CUR)) { 6016 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6017 "Space required after the attribute default value\n"); 6018 if (defaultValue != NULL) 6019 xmlFree(defaultValue); 6020 if (tree != NULL) 6021 xmlFreeEnumeration(tree); 6022 break; 6023 } 6024 SKIP_BLANKS; 6025 } 6026 if (check == CUR_PTR) { 6027 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6028 "in xmlParseAttributeListDecl\n"); 6029 if (defaultValue != NULL) 6030 xmlFree(defaultValue); 6031 if (tree != NULL) 6032 xmlFreeEnumeration(tree); 6033 break; 6034 } 6035 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6036 (ctxt->sax->attributeDecl != NULL)) 6037 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6038 type, def, defaultValue, tree); 6039 else if (tree != NULL) 6040 xmlFreeEnumeration(tree); 6041 6042 if ((ctxt->sax2) && (defaultValue != NULL) && 6043 (def != XML_ATTRIBUTE_IMPLIED) && 6044 (def != XML_ATTRIBUTE_REQUIRED)) { 6045 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6046 } 6047 if (ctxt->sax2) { 6048 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6049 } 6050 if (defaultValue != NULL) 6051 xmlFree(defaultValue); 6052 GROW; 6053 } 6054 if (RAW == '>') { 6055 if (input != ctxt->input) { 6056 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6057 "Attribute list declaration doesn't start and stop in the same entity\n", 6058 NULL, NULL); 6059 } 6060 NEXT; 6061 } 6062 } 6063} 6064 6065/** 6066 * xmlParseElementMixedContentDecl: 6067 * @ctxt: an XML parser context 6068 * @inputchk: the input used for the current entity, needed for boundary checks 6069 * 6070 * parse the declaration for a Mixed Element content 6071 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6072 * 6073 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6074 * '(' S? '#PCDATA' S? ')' 6075 * 6076 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6077 * 6078 * [ VC: No Duplicate Types ] 6079 * The same name must not appear more than once in a single 6080 * mixed-content declaration. 6081 * 6082 * returns: the list of the xmlElementContentPtr describing the element choices 6083 */ 6084xmlElementContentPtr 6085xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6086 xmlElementContentPtr ret = NULL, cur = NULL, n; 6087 const xmlChar *elem = NULL; 6088 6089 GROW; 6090 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6091 SKIP(7); 6092 SKIP_BLANKS; 6093 SHRINK; 6094 if (RAW == ')') { 6095 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6096 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6097"Element content declaration doesn't start and stop in the same entity\n", 6098 NULL, NULL); 6099 } 6100 NEXT; 6101 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6102 if (ret == NULL) 6103 return(NULL); 6104 if (RAW == '*') { 6105 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6106 NEXT; 6107 } 6108 return(ret); 6109 } 6110 if ((RAW == '(') || (RAW == '|')) { 6111 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6112 if (ret == NULL) return(NULL); 6113 } 6114 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6115 NEXT; 6116 if (elem == NULL) { 6117 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6118 if (ret == NULL) return(NULL); 6119 ret->c1 = cur; 6120 if (cur != NULL) 6121 cur->parent = ret; 6122 cur = ret; 6123 } else { 6124 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6125 if (n == NULL) return(NULL); 6126 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6127 if (n->c1 != NULL) 6128 n->c1->parent = n; 6129 cur->c2 = n; 6130 if (n != NULL) 6131 n->parent = cur; 6132 cur = n; 6133 } 6134 SKIP_BLANKS; 6135 elem = xmlParseName(ctxt); 6136 if (elem == NULL) { 6137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6138 "xmlParseElementMixedContentDecl : Name expected\n"); 6139 xmlFreeDocElementContent(ctxt->myDoc, cur); 6140 return(NULL); 6141 } 6142 SKIP_BLANKS; 6143 GROW; 6144 } 6145 if ((RAW == ')') && (NXT(1) == '*')) { 6146 if (elem != NULL) { 6147 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6148 XML_ELEMENT_CONTENT_ELEMENT); 6149 if (cur->c2 != NULL) 6150 cur->c2->parent = cur; 6151 } 6152 if (ret != NULL) 6153 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6156"Element content declaration doesn't start and stop in the same entity\n", 6157 NULL, NULL); 6158 } 6159 SKIP(2); 6160 } else { 6161 xmlFreeDocElementContent(ctxt->myDoc, ret); 6162 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6163 return(NULL); 6164 } 6165 6166 } else { 6167 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6168 } 6169 return(ret); 6170} 6171 6172/** 6173 * xmlParseElementChildrenContentDeclPriv: 6174 * @ctxt: an XML parser context 6175 * @inputchk: the input used for the current entity, needed for boundary checks 6176 * @depth: the level of recursion 6177 * 6178 * parse the declaration for a Mixed Element content 6179 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6180 * 6181 * 6182 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6183 * 6184 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6185 * 6186 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6187 * 6188 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6189 * 6190 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6191 * TODO Parameter-entity replacement text must be properly nested 6192 * with parenthesized groups. That is to say, if either of the 6193 * opening or closing parentheses in a choice, seq, or Mixed 6194 * construct is contained in the replacement text for a parameter 6195 * entity, both must be contained in the same replacement text. For 6196 * interoperability, if a parameter-entity reference appears in a 6197 * choice, seq, or Mixed construct, its replacement text should not 6198 * be empty, and neither the first nor last non-blank character of 6199 * the replacement text should be a connector (| or ,). 6200 * 6201 * Returns the tree of xmlElementContentPtr describing the element 6202 * hierarchy. 6203 */ 6204static xmlElementContentPtr 6205xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6206 int depth) { 6207 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6208 const xmlChar *elem; 6209 xmlChar type = 0; 6210 6211 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6212 (depth > 2048)) { 6213 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6214"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6215 depth); 6216 return(NULL); 6217 } 6218 SKIP_BLANKS; 6219 GROW; 6220 if (RAW == '(') { 6221 int inputid = ctxt->input->id; 6222 6223 /* Recurse on first child */ 6224 NEXT; 6225 SKIP_BLANKS; 6226 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6227 depth + 1); 6228 SKIP_BLANKS; 6229 GROW; 6230 } else { 6231 elem = xmlParseName(ctxt); 6232 if (elem == NULL) { 6233 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6234 return(NULL); 6235 } 6236 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6237 if (cur == NULL) { 6238 xmlErrMemory(ctxt, NULL); 6239 return(NULL); 6240 } 6241 GROW; 6242 if (RAW == '?') { 6243 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6244 NEXT; 6245 } else if (RAW == '*') { 6246 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6247 NEXT; 6248 } else if (RAW == '+') { 6249 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6250 NEXT; 6251 } else { 6252 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6253 } 6254 GROW; 6255 } 6256 SKIP_BLANKS; 6257 SHRINK; 6258 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6259 /* 6260 * Each loop we parse one separator and one element. 6261 */ 6262 if (RAW == ',') { 6263 if (type == 0) type = CUR; 6264 6265 /* 6266 * Detect "Name | Name , Name" error 6267 */ 6268 else if (type != CUR) { 6269 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6270 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6271 type); 6272 if ((last != NULL) && (last != ret)) 6273 xmlFreeDocElementContent(ctxt->myDoc, last); 6274 if (ret != NULL) 6275 xmlFreeDocElementContent(ctxt->myDoc, ret); 6276 return(NULL); 6277 } 6278 NEXT; 6279 6280 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6281 if (op == NULL) { 6282 if ((last != NULL) && (last != ret)) 6283 xmlFreeDocElementContent(ctxt->myDoc, last); 6284 xmlFreeDocElementContent(ctxt->myDoc, ret); 6285 return(NULL); 6286 } 6287 if (last == NULL) { 6288 op->c1 = ret; 6289 if (ret != NULL) 6290 ret->parent = op; 6291 ret = cur = op; 6292 } else { 6293 cur->c2 = op; 6294 if (op != NULL) 6295 op->parent = cur; 6296 op->c1 = last; 6297 if (last != NULL) 6298 last->parent = op; 6299 cur =op; 6300 last = NULL; 6301 } 6302 } else if (RAW == '|') { 6303 if (type == 0) type = CUR; 6304 6305 /* 6306 * Detect "Name , Name | Name" error 6307 */ 6308 else if (type != CUR) { 6309 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6310 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6311 type); 6312 if ((last != NULL) && (last != ret)) 6313 xmlFreeDocElementContent(ctxt->myDoc, last); 6314 if (ret != NULL) 6315 xmlFreeDocElementContent(ctxt->myDoc, ret); 6316 return(NULL); 6317 } 6318 NEXT; 6319 6320 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6321 if (op == NULL) { 6322 if ((last != NULL) && (last != ret)) 6323 xmlFreeDocElementContent(ctxt->myDoc, last); 6324 if (ret != NULL) 6325 xmlFreeDocElementContent(ctxt->myDoc, ret); 6326 return(NULL); 6327 } 6328 if (last == NULL) { 6329 op->c1 = ret; 6330 if (ret != NULL) 6331 ret->parent = op; 6332 ret = cur = op; 6333 } else { 6334 cur->c2 = op; 6335 if (op != NULL) 6336 op->parent = cur; 6337 op->c1 = last; 6338 if (last != NULL) 6339 last->parent = op; 6340 cur =op; 6341 last = NULL; 6342 } 6343 } else { 6344 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6345 if ((last != NULL) && (last != ret)) 6346 xmlFreeDocElementContent(ctxt->myDoc, last); 6347 if (ret != NULL) 6348 xmlFreeDocElementContent(ctxt->myDoc, ret); 6349 return(NULL); 6350 } 6351 GROW; 6352 SKIP_BLANKS; 6353 GROW; 6354 if (RAW == '(') { 6355 int inputid = ctxt->input->id; 6356 /* Recurse on second child */ 6357 NEXT; 6358 SKIP_BLANKS; 6359 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6360 depth + 1); 6361 SKIP_BLANKS; 6362 } else { 6363 elem = xmlParseName(ctxt); 6364 if (elem == NULL) { 6365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6366 if (ret != NULL) 6367 xmlFreeDocElementContent(ctxt->myDoc, ret); 6368 return(NULL); 6369 } 6370 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6371 if (last == NULL) { 6372 if (ret != NULL) 6373 xmlFreeDocElementContent(ctxt->myDoc, ret); 6374 return(NULL); 6375 } 6376 if (RAW == '?') { 6377 last->ocur = XML_ELEMENT_CONTENT_OPT; 6378 NEXT; 6379 } else if (RAW == '*') { 6380 last->ocur = XML_ELEMENT_CONTENT_MULT; 6381 NEXT; 6382 } else if (RAW == '+') { 6383 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6384 NEXT; 6385 } else { 6386 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6387 } 6388 } 6389 SKIP_BLANKS; 6390 GROW; 6391 } 6392 if ((cur != NULL) && (last != NULL)) { 6393 cur->c2 = last; 6394 if (last != NULL) 6395 last->parent = cur; 6396 } 6397 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6398 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6399"Element content declaration doesn't start and stop in the same entity\n", 6400 NULL, NULL); 6401 } 6402 NEXT; 6403 if (RAW == '?') { 6404 if (ret != NULL) { 6405 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6406 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6407 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6408 else 6409 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6410 } 6411 NEXT; 6412 } else if (RAW == '*') { 6413 if (ret != NULL) { 6414 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6415 cur = ret; 6416 /* 6417 * Some normalization: 6418 * (a | b* | c?)* == (a | b | c)* 6419 */ 6420 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6421 if ((cur->c1 != NULL) && 6422 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6423 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6424 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6425 if ((cur->c2 != NULL) && 6426 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6427 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6428 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6429 cur = cur->c2; 6430 } 6431 } 6432 NEXT; 6433 } else if (RAW == '+') { 6434 if (ret != NULL) { 6435 int found = 0; 6436 6437 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6439 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6440 else 6441 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6442 /* 6443 * Some normalization: 6444 * (a | b*)+ == (a | b)* 6445 * (a | b?)+ == (a | b)* 6446 */ 6447 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6448 if ((cur->c1 != NULL) && 6449 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6450 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6451 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6452 found = 1; 6453 } 6454 if ((cur->c2 != NULL) && 6455 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6456 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6457 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6458 found = 1; 6459 } 6460 cur = cur->c2; 6461 } 6462 if (found) 6463 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6464 } 6465 NEXT; 6466 } 6467 return(ret); 6468} 6469 6470/** 6471 * xmlParseElementChildrenContentDecl: 6472 * @ctxt: an XML parser context 6473 * @inputchk: the input used for the current entity, needed for boundary checks 6474 * 6475 * parse the declaration for a Mixed Element content 6476 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6477 * 6478 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6479 * 6480 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6481 * 6482 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6483 * 6484 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6485 * 6486 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6487 * TODO Parameter-entity replacement text must be properly nested 6488 * with parenthesized groups. That is to say, if either of the 6489 * opening or closing parentheses in a choice, seq, or Mixed 6490 * construct is contained in the replacement text for a parameter 6491 * entity, both must be contained in the same replacement text. For 6492 * interoperability, if a parameter-entity reference appears in a 6493 * choice, seq, or Mixed construct, its replacement text should not 6494 * be empty, and neither the first nor last non-blank character of 6495 * the replacement text should be a connector (| or ,). 6496 * 6497 * Returns the tree of xmlElementContentPtr describing the element 6498 * hierarchy. 6499 */ 6500xmlElementContentPtr 6501xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6502 /* stub left for API/ABI compat */ 6503 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6504} 6505 6506/** 6507 * xmlParseElementContentDecl: 6508 * @ctxt: an XML parser context 6509 * @name: the name of the element being defined. 6510 * @result: the Element Content pointer will be stored here if any 6511 * 6512 * parse the declaration for an Element content either Mixed or Children, 6513 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6514 * 6515 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6516 * 6517 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6518 */ 6519 6520int 6521xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6522 xmlElementContentPtr *result) { 6523 6524 xmlElementContentPtr tree = NULL; 6525 int inputid = ctxt->input->id; 6526 int res; 6527 6528 *result = NULL; 6529 6530 if (RAW != '(') { 6531 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6532 "xmlParseElementContentDecl : %s '(' expected\n", name); 6533 return(-1); 6534 } 6535 NEXT; 6536 GROW; 6537 if (ctxt->instate == XML_PARSER_EOF) 6538 return(-1); 6539 SKIP_BLANKS; 6540 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6541 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6542 res = XML_ELEMENT_TYPE_MIXED; 6543 } else { 6544 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6545 res = XML_ELEMENT_TYPE_ELEMENT; 6546 } 6547 SKIP_BLANKS; 6548 *result = tree; 6549 return(res); 6550} 6551 6552/** 6553 * xmlParseElementDecl: 6554 * @ctxt: an XML parser context 6555 * 6556 * parse an Element declaration. 6557 * 6558 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6559 * 6560 * [ VC: Unique Element Type Declaration ] 6561 * No element type may be declared more than once 6562 * 6563 * Returns the type of the element, or -1 in case of error 6564 */ 6565int 6566xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6567 const xmlChar *name; 6568 int ret = -1; 6569 xmlElementContentPtr content = NULL; 6570 6571 /* GROW; done in the caller */ 6572 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6573 xmlParserInputPtr input = ctxt->input; 6574 6575 SKIP(9); 6576 if (!IS_BLANK_CH(CUR)) { 6577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6578 "Space required after 'ELEMENT'\n"); 6579 } 6580 SKIP_BLANKS; 6581 name = xmlParseName(ctxt); 6582 if (name == NULL) { 6583 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6584 "xmlParseElementDecl: no name for Element\n"); 6585 return(-1); 6586 } 6587 while ((RAW == 0) && (ctxt->inputNr > 1)) 6588 xmlPopInput(ctxt); 6589 if (!IS_BLANK_CH(CUR)) { 6590 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6591 "Space required after the element name\n"); 6592 } 6593 SKIP_BLANKS; 6594 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6595 SKIP(5); 6596 /* 6597 * Element must always be empty. 6598 */ 6599 ret = XML_ELEMENT_TYPE_EMPTY; 6600 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6601 (NXT(2) == 'Y')) { 6602 SKIP(3); 6603 /* 6604 * Element is a generic container. 6605 */ 6606 ret = XML_ELEMENT_TYPE_ANY; 6607 } else if (RAW == '(') { 6608 ret = xmlParseElementContentDecl(ctxt, name, &content); 6609 } else { 6610 /* 6611 * [ WFC: PEs in Internal Subset ] error handling. 6612 */ 6613 if ((RAW == '%') && (ctxt->external == 0) && 6614 (ctxt->inputNr == 1)) { 6615 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6616 "PEReference: forbidden within markup decl in internal subset\n"); 6617 } else { 6618 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6619 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6620 } 6621 return(-1); 6622 } 6623 6624 SKIP_BLANKS; 6625 /* 6626 * Pop-up of finished entities. 6627 */ 6628 while ((RAW == 0) && (ctxt->inputNr > 1)) 6629 xmlPopInput(ctxt); 6630 SKIP_BLANKS; 6631 6632 if (RAW != '>') { 6633 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6634 if (content != NULL) { 6635 xmlFreeDocElementContent(ctxt->myDoc, content); 6636 } 6637 } else { 6638 if (input != ctxt->input) { 6639 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6640 "Element declaration doesn't start and stop in the same entity\n"); 6641 } 6642 6643 NEXT; 6644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6645 (ctxt->sax->elementDecl != NULL)) { 6646 if (content != NULL) 6647 content->parent = NULL; 6648 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6649 content); 6650 if ((content != NULL) && (content->parent == NULL)) { 6651 /* 6652 * this is a trick: if xmlAddElementDecl is called, 6653 * instead of copying the full tree it is plugged directly 6654 * if called from the parser. Avoid duplicating the 6655 * interfaces or change the API/ABI 6656 */ 6657 xmlFreeDocElementContent(ctxt->myDoc, content); 6658 } 6659 } else if (content != NULL) { 6660 xmlFreeDocElementContent(ctxt->myDoc, content); 6661 } 6662 } 6663 } 6664 return(ret); 6665} 6666 6667/** 6668 * xmlParseConditionalSections 6669 * @ctxt: an XML parser context 6670 * 6671 * [61] conditionalSect ::= includeSect | ignoreSect 6672 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6673 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6674 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6675 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6676 */ 6677 6678static void 6679xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6680 int id = ctxt->input->id; 6681 6682 SKIP(3); 6683 SKIP_BLANKS; 6684 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6685 SKIP(7); 6686 SKIP_BLANKS; 6687 if (RAW != '[') { 6688 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6689 } else { 6690 if (ctxt->input->id != id) { 6691 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6692 "All markup of the conditional section is not in the same entity\n", 6693 NULL, NULL); 6694 } 6695 NEXT; 6696 } 6697 if (xmlParserDebugEntities) { 6698 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6699 xmlGenericError(xmlGenericErrorContext, 6700 "%s(%d): ", ctxt->input->filename, 6701 ctxt->input->line); 6702 xmlGenericError(xmlGenericErrorContext, 6703 "Entering INCLUDE Conditional Section\n"); 6704 } 6705 6706 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6707 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6708 const xmlChar *check = CUR_PTR; 6709 unsigned int cons = ctxt->input->consumed; 6710 6711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6712 xmlParseConditionalSections(ctxt); 6713 } else if (IS_BLANK_CH(CUR)) { 6714 NEXT; 6715 } else if (RAW == '%') { 6716 xmlParsePEReference(ctxt); 6717 } else 6718 xmlParseMarkupDecl(ctxt); 6719 6720 /* 6721 * Pop-up of finished entities. 6722 */ 6723 while ((RAW == 0) && (ctxt->inputNr > 1)) 6724 xmlPopInput(ctxt); 6725 6726 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6727 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6728 break; 6729 } 6730 } 6731 if (xmlParserDebugEntities) { 6732 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6733 xmlGenericError(xmlGenericErrorContext, 6734 "%s(%d): ", ctxt->input->filename, 6735 ctxt->input->line); 6736 xmlGenericError(xmlGenericErrorContext, 6737 "Leaving INCLUDE Conditional Section\n"); 6738 } 6739 6740 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6741 int state; 6742 xmlParserInputState instate; 6743 int depth = 0; 6744 6745 SKIP(6); 6746 SKIP_BLANKS; 6747 if (RAW != '[') { 6748 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6749 } else { 6750 if (ctxt->input->id != id) { 6751 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6752 "All markup of the conditional section is not in the same entity\n", 6753 NULL, NULL); 6754 } 6755 NEXT; 6756 } 6757 if (xmlParserDebugEntities) { 6758 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6759 xmlGenericError(xmlGenericErrorContext, 6760 "%s(%d): ", ctxt->input->filename, 6761 ctxt->input->line); 6762 xmlGenericError(xmlGenericErrorContext, 6763 "Entering IGNORE Conditional Section\n"); 6764 } 6765 6766 /* 6767 * Parse up to the end of the conditional section 6768 * But disable SAX event generating DTD building in the meantime 6769 */ 6770 state = ctxt->disableSAX; 6771 instate = ctxt->instate; 6772 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6773 ctxt->instate = XML_PARSER_IGNORE; 6774 6775 while (((depth >= 0) && (RAW != 0)) && 6776 (ctxt->instate != XML_PARSER_EOF)) { 6777 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6778 depth++; 6779 SKIP(3); 6780 continue; 6781 } 6782 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6783 if (--depth >= 0) SKIP(3); 6784 continue; 6785 } 6786 NEXT; 6787 continue; 6788 } 6789 6790 ctxt->disableSAX = state; 6791 ctxt->instate = instate; 6792 6793 if (xmlParserDebugEntities) { 6794 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6795 xmlGenericError(xmlGenericErrorContext, 6796 "%s(%d): ", ctxt->input->filename, 6797 ctxt->input->line); 6798 xmlGenericError(xmlGenericErrorContext, 6799 "Leaving IGNORE Conditional Section\n"); 6800 } 6801 6802 } else { 6803 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6804 } 6805 6806 if (RAW == 0) 6807 SHRINK; 6808 6809 if (RAW == 0) { 6810 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6811 } else { 6812 if (ctxt->input->id != id) { 6813 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6814 "All markup of the conditional section is not in the same entity\n", 6815 NULL, NULL); 6816 } 6817 SKIP(3); 6818 } 6819} 6820 6821/** 6822 * xmlParseMarkupDecl: 6823 * @ctxt: an XML parser context 6824 * 6825 * parse Markup declarations 6826 * 6827 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6828 * NotationDecl | PI | Comment 6829 * 6830 * [ VC: Proper Declaration/PE Nesting ] 6831 * Parameter-entity replacement text must be properly nested with 6832 * markup declarations. That is to say, if either the first character 6833 * or the last character of a markup declaration (markupdecl above) is 6834 * contained in the replacement text for a parameter-entity reference, 6835 * both must be contained in the same replacement text. 6836 * 6837 * [ WFC: PEs in Internal Subset ] 6838 * In the internal DTD subset, parameter-entity references can occur 6839 * only where markup declarations can occur, not within markup declarations. 6840 * (This does not apply to references that occur in external parameter 6841 * entities or to the external subset.) 6842 */ 6843void 6844xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6845 GROW; 6846 if (CUR == '<') { 6847 if (NXT(1) == '!') { 6848 switch (NXT(2)) { 6849 case 'E': 6850 if (NXT(3) == 'L') 6851 xmlParseElementDecl(ctxt); 6852 else if (NXT(3) == 'N') 6853 xmlParseEntityDecl(ctxt); 6854 break; 6855 case 'A': 6856 xmlParseAttributeListDecl(ctxt); 6857 break; 6858 case 'N': 6859 xmlParseNotationDecl(ctxt); 6860 break; 6861 case '-': 6862 xmlParseComment(ctxt); 6863 break; 6864 default: 6865 /* there is an error but it will be detected later */ 6866 break; 6867 } 6868 } else if (NXT(1) == '?') { 6869 xmlParsePI(ctxt); 6870 } 6871 } 6872 /* 6873 * This is only for internal subset. On external entities, 6874 * the replacement is done before parsing stage 6875 */ 6876 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6877 xmlParsePEReference(ctxt); 6878 6879 /* 6880 * Conditional sections are allowed from entities included 6881 * by PE References in the internal subset. 6882 */ 6883 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6884 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6885 xmlParseConditionalSections(ctxt); 6886 } 6887 } 6888 6889 ctxt->instate = XML_PARSER_DTD; 6890} 6891 6892/** 6893 * xmlParseTextDecl: 6894 * @ctxt: an XML parser context 6895 * 6896 * parse an XML declaration header for external entities 6897 * 6898 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6899 */ 6900 6901void 6902xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6903 xmlChar *version; 6904 const xmlChar *encoding; 6905 6906 /* 6907 * We know that '<?xml' is here. 6908 */ 6909 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6910 SKIP(5); 6911 } else { 6912 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6913 return; 6914 } 6915 6916 if (!IS_BLANK_CH(CUR)) { 6917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6918 "Space needed after '<?xml'\n"); 6919 } 6920 SKIP_BLANKS; 6921 6922 /* 6923 * We may have the VersionInfo here. 6924 */ 6925 version = xmlParseVersionInfo(ctxt); 6926 if (version == NULL) 6927 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6928 else { 6929 if (!IS_BLANK_CH(CUR)) { 6930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6931 "Space needed here\n"); 6932 } 6933 } 6934 ctxt->input->version = version; 6935 6936 /* 6937 * We must have the encoding declaration 6938 */ 6939 encoding = xmlParseEncodingDecl(ctxt); 6940 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6941 /* 6942 * The XML REC instructs us to stop parsing right here 6943 */ 6944 return; 6945 } 6946 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6947 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6948 "Missing encoding in text declaration\n"); 6949 } 6950 6951 SKIP_BLANKS; 6952 if ((RAW == '?') && (NXT(1) == '>')) { 6953 SKIP(2); 6954 } else if (RAW == '>') { 6955 /* Deprecated old WD ... */ 6956 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6957 NEXT; 6958 } else { 6959 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6960 MOVETO_ENDTAG(CUR_PTR); 6961 NEXT; 6962 } 6963} 6964 6965/** 6966 * xmlParseExternalSubset: 6967 * @ctxt: an XML parser context 6968 * @ExternalID: the external identifier 6969 * @SystemID: the system identifier (or URL) 6970 * 6971 * parse Markup declarations from an external subset 6972 * 6973 * [30] extSubset ::= textDecl? extSubsetDecl 6974 * 6975 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6976 */ 6977void 6978xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6979 const xmlChar *SystemID) { 6980 xmlDetectSAX2(ctxt); 6981 GROW; 6982 6983 if ((ctxt->encoding == NULL) && 6984 (ctxt->input->end - ctxt->input->cur >= 4)) { 6985 xmlChar start[4]; 6986 xmlCharEncoding enc; 6987 6988 start[0] = RAW; 6989 start[1] = NXT(1); 6990 start[2] = NXT(2); 6991 start[3] = NXT(3); 6992 enc = xmlDetectCharEncoding(start, 4); 6993 if (enc != XML_CHAR_ENCODING_NONE) 6994 xmlSwitchEncoding(ctxt, enc); 6995 } 6996 6997 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6998 xmlParseTextDecl(ctxt); 6999 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7000 /* 7001 * The XML REC instructs us to stop parsing right here 7002 */ 7003 ctxt->instate = XML_PARSER_EOF; 7004 return; 7005 } 7006 } 7007 if (ctxt->myDoc == NULL) { 7008 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7009 if (ctxt->myDoc == NULL) { 7010 xmlErrMemory(ctxt, "New Doc failed"); 7011 return; 7012 } 7013 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7014 } 7015 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7016 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7017 7018 ctxt->instate = XML_PARSER_DTD; 7019 ctxt->external = 1; 7020 while (((RAW == '<') && (NXT(1) == '?')) || 7021 ((RAW == '<') && (NXT(1) == '!')) || 7022 (RAW == '%') || IS_BLANK_CH(CUR)) { 7023 const xmlChar *check = CUR_PTR; 7024 unsigned int cons = ctxt->input->consumed; 7025 7026 GROW; 7027 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7028 xmlParseConditionalSections(ctxt); 7029 } else if (IS_BLANK_CH(CUR)) { 7030 NEXT; 7031 } else if (RAW == '%') { 7032 xmlParsePEReference(ctxt); 7033 } else 7034 xmlParseMarkupDecl(ctxt); 7035 7036 /* 7037 * Pop-up of finished entities. 7038 */ 7039 while ((RAW == 0) && (ctxt->inputNr > 1)) 7040 xmlPopInput(ctxt); 7041 7042 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7043 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7044 break; 7045 } 7046 } 7047 7048 if (RAW != 0) { 7049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7050 } 7051 7052} 7053 7054/** 7055 * xmlParseReference: 7056 * @ctxt: an XML parser context 7057 * 7058 * parse and handle entity references in content, depending on the SAX 7059 * interface, this may end-up in a call to character() if this is a 7060 * CharRef, a predefined entity, if there is no reference() callback. 7061 * or if the parser was asked to switch to that mode. 7062 * 7063 * [67] Reference ::= EntityRef | CharRef 7064 */ 7065void 7066xmlParseReference(xmlParserCtxtPtr ctxt) { 7067 xmlEntityPtr ent; 7068 xmlChar *val; 7069 int was_checked; 7070 xmlNodePtr list = NULL; 7071 xmlParserErrors ret = XML_ERR_OK; 7072 7073 7074 if (RAW != '&') 7075 return; 7076 7077 /* 7078 * Simple case of a CharRef 7079 */ 7080 if (NXT(1) == '#') { 7081 int i = 0; 7082 xmlChar out[10]; 7083 int hex = NXT(2); 7084 int value = xmlParseCharRef(ctxt); 7085 7086 if (value == 0) 7087 return; 7088 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7089 /* 7090 * So we are using non-UTF-8 buffers 7091 * Check that the char fit on 8bits, if not 7092 * generate a CharRef. 7093 */ 7094 if (value <= 0xFF) { 7095 out[0] = value; 7096 out[1] = 0; 7097 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7098 (!ctxt->disableSAX)) 7099 ctxt->sax->characters(ctxt->userData, out, 1); 7100 } else { 7101 if ((hex == 'x') || (hex == 'X')) 7102 snprintf((char *)out, sizeof(out), "#x%X", value); 7103 else 7104 snprintf((char *)out, sizeof(out), "#%d", value); 7105 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7106 (!ctxt->disableSAX)) 7107 ctxt->sax->reference(ctxt->userData, out); 7108 } 7109 } else { 7110 /* 7111 * Just encode the value in UTF-8 7112 */ 7113 COPY_BUF(0 ,out, i, value); 7114 out[i] = 0; 7115 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7116 (!ctxt->disableSAX)) 7117 ctxt->sax->characters(ctxt->userData, out, i); 7118 } 7119 return; 7120 } 7121 7122 /* 7123 * We are seeing an entity reference 7124 */ 7125 ent = xmlParseEntityRef(ctxt); 7126 if (ent == NULL) return; 7127 if (!ctxt->wellFormed) 7128 return; 7129 was_checked = ent->checked; 7130 7131 /* special case of predefined entities */ 7132 if ((ent->name == NULL) || 7133 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7134 val = ent->content; 7135 if (val == NULL) return; 7136 /* 7137 * inline the entity. 7138 */ 7139 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7140 (!ctxt->disableSAX)) 7141 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7142 return; 7143 } 7144 7145 /* 7146 * The first reference to the entity trigger a parsing phase 7147 * where the ent->children is filled with the result from 7148 * the parsing. 7149 * Note: external parsed entities will not be loaded, it is not 7150 * required for a non-validating parser, unless the parsing option 7151 * of validating, or substituting entities were given. Doing so is 7152 * far more secure as the parser will only process data coming from 7153 * the document entity by default. 7154 */ 7155 if ((ent->checked == 0) && 7156 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7157 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7158 unsigned long oldnbent = ctxt->nbentities; 7159 7160 /* 7161 * This is a bit hackish but this seems the best 7162 * way to make sure both SAX and DOM entity support 7163 * behaves okay. 7164 */ 7165 void *user_data; 7166 if (ctxt->userData == ctxt) 7167 user_data = NULL; 7168 else 7169 user_data = ctxt->userData; 7170 7171 /* 7172 * Check that this entity is well formed 7173 * 4.3.2: An internal general parsed entity is well-formed 7174 * if its replacement text matches the production labeled 7175 * content. 7176 */ 7177 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7178 ctxt->depth++; 7179 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7180 user_data, &list); 7181 ctxt->depth--; 7182 7183 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7184 ctxt->depth++; 7185 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7186 user_data, ctxt->depth, ent->URI, 7187 ent->ExternalID, &list); 7188 ctxt->depth--; 7189 } else { 7190 ret = XML_ERR_ENTITY_PE_INTERNAL; 7191 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7192 "invalid entity type found\n", NULL); 7193 } 7194 7195 /* 7196 * Store the number of entities needing parsing for this entity 7197 * content and do checkings 7198 */ 7199 ent->checked = ctxt->nbentities - oldnbent; 7200 if (ret == XML_ERR_ENTITY_LOOP) { 7201 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7202 xmlFreeNodeList(list); 7203 return; 7204 } 7205 if (xmlParserEntityCheck(ctxt, 0, ent)) { 7206 xmlFreeNodeList(list); 7207 return; 7208 } 7209 7210 if ((ret == XML_ERR_OK) && (list != NULL)) { 7211 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7212 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7213 (ent->children == NULL)) { 7214 ent->children = list; 7215 if (ctxt->replaceEntities) { 7216 /* 7217 * Prune it directly in the generated document 7218 * except for single text nodes. 7219 */ 7220 if (((list->type == XML_TEXT_NODE) && 7221 (list->next == NULL)) || 7222 (ctxt->parseMode == XML_PARSE_READER)) { 7223 list->parent = (xmlNodePtr) ent; 7224 list = NULL; 7225 ent->owner = 1; 7226 } else { 7227 ent->owner = 0; 7228 while (list != NULL) { 7229 list->parent = (xmlNodePtr) ctxt->node; 7230 list->doc = ctxt->myDoc; 7231 if (list->next == NULL) 7232 ent->last = list; 7233 list = list->next; 7234 } 7235 list = ent->children; 7236#ifdef LIBXML_LEGACY_ENABLED 7237 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7238 xmlAddEntityReference(ent, list, NULL); 7239#endif /* LIBXML_LEGACY_ENABLED */ 7240 } 7241 } else { 7242 ent->owner = 1; 7243 while (list != NULL) { 7244 list->parent = (xmlNodePtr) ent; 7245 xmlSetTreeDoc(list, ent->doc); 7246 if (list->next == NULL) 7247 ent->last = list; 7248 list = list->next; 7249 } 7250 } 7251 } else { 7252 xmlFreeNodeList(list); 7253 list = NULL; 7254 } 7255 } else if ((ret != XML_ERR_OK) && 7256 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7257 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7258 "Entity '%s' failed to parse\n", ent->name); 7259 } else if (list != NULL) { 7260 xmlFreeNodeList(list); 7261 list = NULL; 7262 } 7263 if (ent->checked == 0) 7264 ent->checked = 1; 7265 } else if (ent->checked != 1) { 7266 ctxt->nbentities += ent->checked; 7267 } 7268 7269 /* 7270 * Now that the entity content has been gathered 7271 * provide it to the application, this can take different forms based 7272 * on the parsing modes. 7273 */ 7274 if (ent->children == NULL) { 7275 /* 7276 * Probably running in SAX mode and the callbacks don't 7277 * build the entity content. So unless we already went 7278 * though parsing for first checking go though the entity 7279 * content to generate callbacks associated to the entity 7280 */ 7281 if (was_checked != 0) { 7282 void *user_data; 7283 /* 7284 * This is a bit hackish but this seems the best 7285 * way to make sure both SAX and DOM entity support 7286 * behaves okay. 7287 */ 7288 if (ctxt->userData == ctxt) 7289 user_data = NULL; 7290 else 7291 user_data = ctxt->userData; 7292 7293 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7294 ctxt->depth++; 7295 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7296 ent->content, user_data, NULL); 7297 ctxt->depth--; 7298 } else if (ent->etype == 7299 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7300 ctxt->depth++; 7301 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7302 ctxt->sax, user_data, ctxt->depth, 7303 ent->URI, ent->ExternalID, NULL); 7304 ctxt->depth--; 7305 } else { 7306 ret = XML_ERR_ENTITY_PE_INTERNAL; 7307 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7308 "invalid entity type found\n", NULL); 7309 } 7310 if (ret == XML_ERR_ENTITY_LOOP) { 7311 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7312 return; 7313 } 7314 } 7315 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7316 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7317 /* 7318 * Entity reference callback comes second, it's somewhat 7319 * superfluous but a compatibility to historical behaviour 7320 */ 7321 ctxt->sax->reference(ctxt->userData, ent->name); 7322 } 7323 return; 7324 } 7325 7326 /* 7327 * If we didn't get any children for the entity being built 7328 */ 7329 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7330 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7331 /* 7332 * Create a node. 7333 */ 7334 ctxt->sax->reference(ctxt->userData, ent->name); 7335 return; 7336 } 7337 7338 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7339 /* 7340 * There is a problem on the handling of _private for entities 7341 * (bug 155816): Should we copy the content of the field from 7342 * the entity (possibly overwriting some value set by the user 7343 * when a copy is created), should we leave it alone, or should 7344 * we try to take care of different situations? The problem 7345 * is exacerbated by the usage of this field by the xmlReader. 7346 * To fix this bug, we look at _private on the created node 7347 * and, if it's NULL, we copy in whatever was in the entity. 7348 * If it's not NULL we leave it alone. This is somewhat of a 7349 * hack - maybe we should have further tests to determine 7350 * what to do. 7351 */ 7352 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7353 /* 7354 * Seems we are generating the DOM content, do 7355 * a simple tree copy for all references except the first 7356 * In the first occurrence list contains the replacement. 7357 */ 7358 if (((list == NULL) && (ent->owner == 0)) || 7359 (ctxt->parseMode == XML_PARSE_READER)) { 7360 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7361 7362 /* 7363 * when operating on a reader, the entities definitions 7364 * are always owning the entities subtree. 7365 if (ctxt->parseMode == XML_PARSE_READER) 7366 ent->owner = 1; 7367 */ 7368 7369 cur = ent->children; 7370 while (cur != NULL) { 7371 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7372 if (nw != NULL) { 7373 if (nw->_private == NULL) 7374 nw->_private = cur->_private; 7375 if (firstChild == NULL){ 7376 firstChild = nw; 7377 } 7378 nw = xmlAddChild(ctxt->node, nw); 7379 } 7380 if (cur == ent->last) { 7381 /* 7382 * needed to detect some strange empty 7383 * node cases in the reader tests 7384 */ 7385 if ((ctxt->parseMode == XML_PARSE_READER) && 7386 (nw != NULL) && 7387 (nw->type == XML_ELEMENT_NODE) && 7388 (nw->children == NULL)) 7389 nw->extra = 1; 7390 7391 break; 7392 } 7393 cur = cur->next; 7394 } 7395#ifdef LIBXML_LEGACY_ENABLED 7396 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7397 xmlAddEntityReference(ent, firstChild, nw); 7398#endif /* LIBXML_LEGACY_ENABLED */ 7399 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7400 xmlNodePtr nw = NULL, cur, next, last, 7401 firstChild = NULL; 7402 /* 7403 * Copy the entity child list and make it the new 7404 * entity child list. The goal is to make sure any 7405 * ID or REF referenced will be the one from the 7406 * document content and not the entity copy. 7407 */ 7408 cur = ent->children; 7409 ent->children = NULL; 7410 last = ent->last; 7411 ent->last = NULL; 7412 while (cur != NULL) { 7413 next = cur->next; 7414 cur->next = NULL; 7415 cur->parent = NULL; 7416 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7417 if (nw != NULL) { 7418 if (nw->_private == NULL) 7419 nw->_private = cur->_private; 7420 if (firstChild == NULL){ 7421 firstChild = cur; 7422 } 7423 xmlAddChild((xmlNodePtr) ent, nw); 7424 xmlAddChild(ctxt->node, cur); 7425 } 7426 if (cur == last) 7427 break; 7428 cur = next; 7429 } 7430 if (ent->owner == 0) 7431 ent->owner = 1; 7432#ifdef LIBXML_LEGACY_ENABLED 7433 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7434 xmlAddEntityReference(ent, firstChild, nw); 7435#endif /* LIBXML_LEGACY_ENABLED */ 7436 } else { 7437 const xmlChar *nbktext; 7438 7439 /* 7440 * the name change is to avoid coalescing of the 7441 * node with a possible previous text one which 7442 * would make ent->children a dangling pointer 7443 */ 7444 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7445 -1); 7446 if (ent->children->type == XML_TEXT_NODE) 7447 ent->children->name = nbktext; 7448 if ((ent->last != ent->children) && 7449 (ent->last->type == XML_TEXT_NODE)) 7450 ent->last->name = nbktext; 7451 xmlAddChildList(ctxt->node, ent->children); 7452 } 7453 7454 /* 7455 * This is to avoid a nasty side effect, see 7456 * characters() in SAX.c 7457 */ 7458 ctxt->nodemem = 0; 7459 ctxt->nodelen = 0; 7460 return; 7461 } 7462 } 7463} 7464 7465/** 7466 * xmlParseEntityRef: 7467 * @ctxt: an XML parser context 7468 * 7469 * parse ENTITY references declarations 7470 * 7471 * [68] EntityRef ::= '&' Name ';' 7472 * 7473 * [ WFC: Entity Declared ] 7474 * In a document without any DTD, a document with only an internal DTD 7475 * subset which contains no parameter entity references, or a document 7476 * with "standalone='yes'", the Name given in the entity reference 7477 * must match that in an entity declaration, except that well-formed 7478 * documents need not declare any of the following entities: amp, lt, 7479 * gt, apos, quot. The declaration of a parameter entity must precede 7480 * any reference to it. Similarly, the declaration of a general entity 7481 * must precede any reference to it which appears in a default value in an 7482 * attribute-list declaration. Note that if entities are declared in the 7483 * external subset or in external parameter entities, a non-validating 7484 * processor is not obligated to read and process their declarations; 7485 * for such documents, the rule that an entity must be declared is a 7486 * well-formedness constraint only if standalone='yes'. 7487 * 7488 * [ WFC: Parsed Entity ] 7489 * An entity reference must not contain the name of an unparsed entity 7490 * 7491 * Returns the xmlEntityPtr if found, or NULL otherwise. 7492 */ 7493xmlEntityPtr 7494xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7495 const xmlChar *name; 7496 xmlEntityPtr ent = NULL; 7497 7498 GROW; 7499 if (ctxt->instate == XML_PARSER_EOF) 7500 return(NULL); 7501 7502 if (RAW != '&') 7503 return(NULL); 7504 NEXT; 7505 name = xmlParseName(ctxt); 7506 if (name == NULL) { 7507 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7508 "xmlParseEntityRef: no name\n"); 7509 return(NULL); 7510 } 7511 if (RAW != ';') { 7512 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7513 return(NULL); 7514 } 7515 NEXT; 7516 7517 /* 7518 * Predefined entites override any extra definition 7519 */ 7520 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7521 ent = xmlGetPredefinedEntity(name); 7522 if (ent != NULL) 7523 return(ent); 7524 } 7525 7526 /* 7527 * Increate the number of entity references parsed 7528 */ 7529 ctxt->nbentities++; 7530 7531 /* 7532 * Ask first SAX for entity resolution, otherwise try the 7533 * entities which may have stored in the parser context. 7534 */ 7535 if (ctxt->sax != NULL) { 7536 if (ctxt->sax->getEntity != NULL) 7537 ent = ctxt->sax->getEntity(ctxt->userData, name); 7538 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7539 (ctxt->options & XML_PARSE_OLDSAX)) 7540 ent = xmlGetPredefinedEntity(name); 7541 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7542 (ctxt->userData==ctxt)) { 7543 ent = xmlSAX2GetEntity(ctxt, name); 7544 } 7545 } 7546 /* 7547 * [ WFC: Entity Declared ] 7548 * In a document without any DTD, a document with only an 7549 * internal DTD subset which contains no parameter entity 7550 * references, or a document with "standalone='yes'", the 7551 * Name given in the entity reference must match that in an 7552 * entity declaration, except that well-formed documents 7553 * need not declare any of the following entities: amp, lt, 7554 * gt, apos, quot. 7555 * The declaration of a parameter entity must precede any 7556 * reference to it. 7557 * Similarly, the declaration of a general entity must 7558 * precede any reference to it which appears in a default 7559 * value in an attribute-list declaration. Note that if 7560 * entities are declared in the external subset or in 7561 * external parameter entities, a non-validating processor 7562 * is not obligated to read and process their declarations; 7563 * for such documents, the rule that an entity must be 7564 * declared is a well-formedness constraint only if 7565 * standalone='yes'. 7566 */ 7567 if (ent == NULL) { 7568 if ((ctxt->standalone == 1) || 7569 ((ctxt->hasExternalSubset == 0) && 7570 (ctxt->hasPErefs == 0))) { 7571 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7572 "Entity '%s' not defined\n", name); 7573 } else { 7574 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7575 "Entity '%s' not defined\n", name); 7576 if ((ctxt->inSubset == 0) && 7577 (ctxt->sax != NULL) && 7578 (ctxt->sax->reference != NULL)) { 7579 ctxt->sax->reference(ctxt->userData, name); 7580 } 7581 } 7582 ctxt->valid = 0; 7583 } 7584 7585 /* 7586 * [ WFC: Parsed Entity ] 7587 * An entity reference must not contain the name of an 7588 * unparsed entity 7589 */ 7590 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7591 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7592 "Entity reference to unparsed entity %s\n", name); 7593 } 7594 7595 /* 7596 * [ WFC: No External Entity References ] 7597 * Attribute values cannot contain direct or indirect 7598 * entity references to external entities. 7599 */ 7600 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7601 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7602 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7603 "Attribute references external entity '%s'\n", name); 7604 } 7605 /* 7606 * [ WFC: No < in Attribute Values ] 7607 * The replacement text of any entity referred to directly or 7608 * indirectly in an attribute value (other than "<") must 7609 * not contain a <. 7610 */ 7611 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7612 (ent != NULL) && (ent->content != NULL) && 7613 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7614 (xmlStrchr(ent->content, '<'))) { 7615 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7616 "'<' in entity '%s' is not allowed in attributes values\n", name); 7617 } 7618 7619 /* 7620 * Internal check, no parameter entities here ... 7621 */ 7622 else { 7623 switch (ent->etype) { 7624 case XML_INTERNAL_PARAMETER_ENTITY: 7625 case XML_EXTERNAL_PARAMETER_ENTITY: 7626 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7627 "Attempt to reference the parameter entity '%s'\n", 7628 name); 7629 break; 7630 default: 7631 break; 7632 } 7633 } 7634 7635 /* 7636 * [ WFC: No Recursion ] 7637 * A parsed entity must not contain a recursive reference 7638 * to itself, either directly or indirectly. 7639 * Done somewhere else 7640 */ 7641 return(ent); 7642} 7643 7644/** 7645 * xmlParseStringEntityRef: 7646 * @ctxt: an XML parser context 7647 * @str: a pointer to an index in the string 7648 * 7649 * parse ENTITY references declarations, but this version parses it from 7650 * a string value. 7651 * 7652 * [68] EntityRef ::= '&' Name ';' 7653 * 7654 * [ WFC: Entity Declared ] 7655 * In a document without any DTD, a document with only an internal DTD 7656 * subset which contains no parameter entity references, or a document 7657 * with "standalone='yes'", the Name given in the entity reference 7658 * must match that in an entity declaration, except that well-formed 7659 * documents need not declare any of the following entities: amp, lt, 7660 * gt, apos, quot. The declaration of a parameter entity must precede 7661 * any reference to it. Similarly, the declaration of a general entity 7662 * must precede any reference to it which appears in a default value in an 7663 * attribute-list declaration. Note that if entities are declared in the 7664 * external subset or in external parameter entities, a non-validating 7665 * processor is not obligated to read and process their declarations; 7666 * for such documents, the rule that an entity must be declared is a 7667 * well-formedness constraint only if standalone='yes'. 7668 * 7669 * [ WFC: Parsed Entity ] 7670 * An entity reference must not contain the name of an unparsed entity 7671 * 7672 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7673 * is updated to the current location in the string. 7674 */ 7675static xmlEntityPtr 7676xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7677 xmlChar *name; 7678 const xmlChar *ptr; 7679 xmlChar cur; 7680 xmlEntityPtr ent = NULL; 7681 7682 if ((str == NULL) || (*str == NULL)) 7683 return(NULL); 7684 ptr = *str; 7685 cur = *ptr; 7686 if (cur != '&') 7687 return(NULL); 7688 7689 ptr++; 7690 name = xmlParseStringName(ctxt, &ptr); 7691 if (name == NULL) { 7692 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7693 "xmlParseStringEntityRef: no name\n"); 7694 *str = ptr; 7695 return(NULL); 7696 } 7697 if (*ptr != ';') { 7698 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7699 xmlFree(name); 7700 *str = ptr; 7701 return(NULL); 7702 } 7703 ptr++; 7704 7705 7706 /* 7707 * Predefined entites override any extra definition 7708 */ 7709 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7710 ent = xmlGetPredefinedEntity(name); 7711 if (ent != NULL) { 7712 xmlFree(name); 7713 *str = ptr; 7714 return(ent); 7715 } 7716 } 7717 7718 /* 7719 * Increate the number of entity references parsed 7720 */ 7721 ctxt->nbentities++; 7722 7723 /* 7724 * Ask first SAX for entity resolution, otherwise try the 7725 * entities which may have stored in the parser context. 7726 */ 7727 if (ctxt->sax != NULL) { 7728 if (ctxt->sax->getEntity != NULL) 7729 ent = ctxt->sax->getEntity(ctxt->userData, name); 7730 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7731 ent = xmlGetPredefinedEntity(name); 7732 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7733 ent = xmlSAX2GetEntity(ctxt, name); 7734 } 7735 } 7736 7737 /* 7738 * [ WFC: Entity Declared ] 7739 * In a document without any DTD, a document with only an 7740 * internal DTD subset which contains no parameter entity 7741 * references, or a document with "standalone='yes'", the 7742 * Name given in the entity reference must match that in an 7743 * entity declaration, except that well-formed documents 7744 * need not declare any of the following entities: amp, lt, 7745 * gt, apos, quot. 7746 * The declaration of a parameter entity must precede any 7747 * reference to it. 7748 * Similarly, the declaration of a general entity must 7749 * precede any reference to it which appears in a default 7750 * value in an attribute-list declaration. Note that if 7751 * entities are declared in the external subset or in 7752 * external parameter entities, a non-validating processor 7753 * is not obligated to read and process their declarations; 7754 * for such documents, the rule that an entity must be 7755 * declared is a well-formedness constraint only if 7756 * standalone='yes'. 7757 */ 7758 if (ent == NULL) { 7759 if ((ctxt->standalone == 1) || 7760 ((ctxt->hasExternalSubset == 0) && 7761 (ctxt->hasPErefs == 0))) { 7762 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7763 "Entity '%s' not defined\n", name); 7764 } else { 7765 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7766 "Entity '%s' not defined\n", 7767 name); 7768 } 7769 /* TODO ? check regressions ctxt->valid = 0; */ 7770 } 7771 7772 /* 7773 * [ WFC: Parsed Entity ] 7774 * An entity reference must not contain the name of an 7775 * unparsed entity 7776 */ 7777 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7779 "Entity reference to unparsed entity %s\n", name); 7780 } 7781 7782 /* 7783 * [ WFC: No External Entity References ] 7784 * Attribute values cannot contain direct or indirect 7785 * entity references to external entities. 7786 */ 7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7788 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7789 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7790 "Attribute references external entity '%s'\n", name); 7791 } 7792 /* 7793 * [ WFC: No < in Attribute Values ] 7794 * The replacement text of any entity referred to directly or 7795 * indirectly in an attribute value (other than "<") must 7796 * not contain a <. 7797 */ 7798 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7799 (ent != NULL) && (ent->content != NULL) && 7800 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7801 (xmlStrchr(ent->content, '<'))) { 7802 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7803 "'<' in entity '%s' is not allowed in attributes values\n", 7804 name); 7805 } 7806 7807 /* 7808 * Internal check, no parameter entities here ... 7809 */ 7810 else { 7811 switch (ent->etype) { 7812 case XML_INTERNAL_PARAMETER_ENTITY: 7813 case XML_EXTERNAL_PARAMETER_ENTITY: 7814 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7815 "Attempt to reference the parameter entity '%s'\n", 7816 name); 7817 break; 7818 default: 7819 break; 7820 } 7821 } 7822 7823 /* 7824 * [ WFC: No Recursion ] 7825 * A parsed entity must not contain a recursive reference 7826 * to itself, either directly or indirectly. 7827 * Done somewhere else 7828 */ 7829 7830 xmlFree(name); 7831 *str = ptr; 7832 return(ent); 7833} 7834 7835/** 7836 * xmlParsePEReference: 7837 * @ctxt: an XML parser context 7838 * 7839 * parse PEReference declarations 7840 * The entity content is handled directly by pushing it's content as 7841 * a new input stream. 7842 * 7843 * [69] PEReference ::= '%' Name ';' 7844 * 7845 * [ WFC: No Recursion ] 7846 * A parsed entity must not contain a recursive 7847 * reference to itself, either directly or indirectly. 7848 * 7849 * [ WFC: Entity Declared ] 7850 * In a document without any DTD, a document with only an internal DTD 7851 * subset which contains no parameter entity references, or a document 7852 * with "standalone='yes'", ... ... The declaration of a parameter 7853 * entity must precede any reference to it... 7854 * 7855 * [ VC: Entity Declared ] 7856 * In a document with an external subset or external parameter entities 7857 * with "standalone='no'", ... ... The declaration of a parameter entity 7858 * must precede any reference to it... 7859 * 7860 * [ WFC: In DTD ] 7861 * Parameter-entity references may only appear in the DTD. 7862 * NOTE: misleading but this is handled. 7863 */ 7864void 7865xmlParsePEReference(xmlParserCtxtPtr ctxt) 7866{ 7867 const xmlChar *name; 7868 xmlEntityPtr entity = NULL; 7869 xmlParserInputPtr input; 7870 7871 if (RAW != '%') 7872 return; 7873 NEXT; 7874 name = xmlParseName(ctxt); 7875 if (name == NULL) { 7876 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7877 "xmlParsePEReference: no name\n"); 7878 return; 7879 } 7880 if (RAW != ';') { 7881 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7882 return; 7883 } 7884 7885 NEXT; 7886 7887 /* 7888 * Increate the number of entity references parsed 7889 */ 7890 ctxt->nbentities++; 7891 7892 /* 7893 * Request the entity from SAX 7894 */ 7895 if ((ctxt->sax != NULL) && 7896 (ctxt->sax->getParameterEntity != NULL)) 7897 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7898 name); 7899 if (entity == NULL) { 7900 /* 7901 * [ WFC: Entity Declared ] 7902 * In a document without any DTD, a document with only an 7903 * internal DTD subset which contains no parameter entity 7904 * references, or a document with "standalone='yes'", ... 7905 * ... The declaration of a parameter entity must precede 7906 * any reference to it... 7907 */ 7908 if ((ctxt->standalone == 1) || 7909 ((ctxt->hasExternalSubset == 0) && 7910 (ctxt->hasPErefs == 0))) { 7911 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7912 "PEReference: %%%s; not found\n", 7913 name); 7914 } else { 7915 /* 7916 * [ VC: Entity Declared ] 7917 * In a document with an external subset or external 7918 * parameter entities with "standalone='no'", ... 7919 * ... The declaration of a parameter entity must 7920 * precede any reference to it... 7921 */ 7922 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7923 "PEReference: %%%s; not found\n", 7924 name, NULL); 7925 ctxt->valid = 0; 7926 } 7927 } else { 7928 /* 7929 * Internal checking in case the entity quest barfed 7930 */ 7931 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7932 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7933 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7934 "Internal: %%%s; is not a parameter entity\n", 7935 name, NULL); 7936 } else if (ctxt->input->free != deallocblankswrapper) { 7937 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7938 if (xmlPushInput(ctxt, input) < 0) 7939 return; 7940 } else { 7941 /* 7942 * TODO !!! 7943 * handle the extra spaces added before and after 7944 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7945 */ 7946 input = xmlNewEntityInputStream(ctxt, entity); 7947 if (xmlPushInput(ctxt, input) < 0) 7948 return; 7949 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7950 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7951 (IS_BLANK_CH(NXT(5)))) { 7952 xmlParseTextDecl(ctxt); 7953 if (ctxt->errNo == 7954 XML_ERR_UNSUPPORTED_ENCODING) { 7955 /* 7956 * The XML REC instructs us to stop parsing 7957 * right here 7958 */ 7959 ctxt->instate = XML_PARSER_EOF; 7960 return; 7961 } 7962 } 7963 } 7964 } 7965 ctxt->hasPErefs = 1; 7966} 7967 7968/** 7969 * xmlLoadEntityContent: 7970 * @ctxt: an XML parser context 7971 * @entity: an unloaded system entity 7972 * 7973 * Load the original content of the given system entity from the 7974 * ExternalID/SystemID given. This is to be used for Included in Literal 7975 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7976 * 7977 * Returns 0 in case of success and -1 in case of failure 7978 */ 7979static int 7980xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7981 xmlParserInputPtr input; 7982 xmlBufferPtr buf; 7983 int l, c; 7984 int count = 0; 7985 7986 if ((ctxt == NULL) || (entity == NULL) || 7987 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7988 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7989 (entity->content != NULL)) { 7990 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7991 "xmlLoadEntityContent parameter error"); 7992 return(-1); 7993 } 7994 7995 if (xmlParserDebugEntities) 7996 xmlGenericError(xmlGenericErrorContext, 7997 "Reading %s entity content input\n", entity->name); 7998 7999 buf = xmlBufferCreate(); 8000 if (buf == NULL) { 8001 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8002 "xmlLoadEntityContent parameter error"); 8003 return(-1); 8004 } 8005 8006 input = xmlNewEntityInputStream(ctxt, entity); 8007 if (input == NULL) { 8008 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8009 "xmlLoadEntityContent input error"); 8010 xmlBufferFree(buf); 8011 return(-1); 8012 } 8013 8014 /* 8015 * Push the entity as the current input, read char by char 8016 * saving to the buffer until the end of the entity or an error 8017 */ 8018 if (xmlPushInput(ctxt, input) < 0) { 8019 xmlBufferFree(buf); 8020 return(-1); 8021 } 8022 8023 GROW; 8024 c = CUR_CHAR(l); 8025 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8026 (IS_CHAR(c))) { 8027 xmlBufferAdd(buf, ctxt->input->cur, l); 8028 if (count++ > XML_PARSER_CHUNK_SIZE) { 8029 count = 0; 8030 GROW; 8031 if (ctxt->instate == XML_PARSER_EOF) { 8032 xmlBufferFree(buf); 8033 return(-1); 8034 } 8035 } 8036 NEXTL(l); 8037 c = CUR_CHAR(l); 8038 if (c == 0) { 8039 count = 0; 8040 GROW; 8041 if (ctxt->instate == XML_PARSER_EOF) { 8042 xmlBufferFree(buf); 8043 return(-1); 8044 } 8045 c = CUR_CHAR(l); 8046 } 8047 } 8048 8049 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8050 xmlPopInput(ctxt); 8051 } else if (!IS_CHAR(c)) { 8052 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8053 "xmlLoadEntityContent: invalid char value %d\n", 8054 c); 8055 xmlBufferFree(buf); 8056 return(-1); 8057 } 8058 entity->content = buf->content; 8059 buf->content = NULL; 8060 xmlBufferFree(buf); 8061 8062 return(0); 8063} 8064 8065/** 8066 * xmlParseStringPEReference: 8067 * @ctxt: an XML parser context 8068 * @str: a pointer to an index in the string 8069 * 8070 * parse PEReference declarations 8071 * 8072 * [69] PEReference ::= '%' Name ';' 8073 * 8074 * [ WFC: No Recursion ] 8075 * A parsed entity must not contain a recursive 8076 * reference to itself, either directly or indirectly. 8077 * 8078 * [ WFC: Entity Declared ] 8079 * In a document without any DTD, a document with only an internal DTD 8080 * subset which contains no parameter entity references, or a document 8081 * with "standalone='yes'", ... ... The declaration of a parameter 8082 * entity must precede any reference to it... 8083 * 8084 * [ VC: Entity Declared ] 8085 * In a document with an external subset or external parameter entities 8086 * with "standalone='no'", ... ... The declaration of a parameter entity 8087 * must precede any reference to it... 8088 * 8089 * [ WFC: In DTD ] 8090 * Parameter-entity references may only appear in the DTD. 8091 * NOTE: misleading but this is handled. 8092 * 8093 * Returns the string of the entity content. 8094 * str is updated to the current value of the index 8095 */ 8096static xmlEntityPtr 8097xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8098 const xmlChar *ptr; 8099 xmlChar cur; 8100 xmlChar *name; 8101 xmlEntityPtr entity = NULL; 8102 8103 if ((str == NULL) || (*str == NULL)) return(NULL); 8104 ptr = *str; 8105 cur = *ptr; 8106 if (cur != '%') 8107 return(NULL); 8108 ptr++; 8109 name = xmlParseStringName(ctxt, &ptr); 8110 if (name == NULL) { 8111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8112 "xmlParseStringPEReference: no name\n"); 8113 *str = ptr; 8114 return(NULL); 8115 } 8116 cur = *ptr; 8117 if (cur != ';') { 8118 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8119 xmlFree(name); 8120 *str = ptr; 8121 return(NULL); 8122 } 8123 ptr++; 8124 8125 /* 8126 * Increate the number of entity references parsed 8127 */ 8128 ctxt->nbentities++; 8129 8130 /* 8131 * Request the entity from SAX 8132 */ 8133 if ((ctxt->sax != NULL) && 8134 (ctxt->sax->getParameterEntity != NULL)) 8135 entity = ctxt->sax->getParameterEntity(ctxt->userData, 8136 name); 8137 if (entity == NULL) { 8138 /* 8139 * [ WFC: Entity Declared ] 8140 * In a document without any DTD, a document with only an 8141 * internal DTD subset which contains no parameter entity 8142 * references, or a document with "standalone='yes'", ... 8143 * ... The declaration of a parameter entity must precede 8144 * any reference to it... 8145 */ 8146 if ((ctxt->standalone == 1) || 8147 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8148 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8149 "PEReference: %%%s; not found\n", name); 8150 } else { 8151 /* 8152 * [ VC: Entity Declared ] 8153 * In a document with an external subset or external 8154 * parameter entities with "standalone='no'", ... 8155 * ... The declaration of a parameter entity must 8156 * precede any reference to it... 8157 */ 8158 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8159 "PEReference: %%%s; not found\n", 8160 name, NULL); 8161 ctxt->valid = 0; 8162 } 8163 } else { 8164 /* 8165 * Internal checking in case the entity quest barfed 8166 */ 8167 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8168 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8169 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8170 "%%%s; is not a parameter entity\n", 8171 name, NULL); 8172 } 8173 } 8174 ctxt->hasPErefs = 1; 8175 xmlFree(name); 8176 *str = ptr; 8177 return(entity); 8178} 8179 8180/** 8181 * xmlParseDocTypeDecl: 8182 * @ctxt: an XML parser context 8183 * 8184 * parse a DOCTYPE declaration 8185 * 8186 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8187 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8188 * 8189 * [ VC: Root Element Type ] 8190 * The Name in the document type declaration must match the element 8191 * type of the root element. 8192 */ 8193 8194void 8195xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8196 const xmlChar *name = NULL; 8197 xmlChar *ExternalID = NULL; 8198 xmlChar *URI = NULL; 8199 8200 /* 8201 * We know that '<!DOCTYPE' has been detected. 8202 */ 8203 SKIP(9); 8204 8205 SKIP_BLANKS; 8206 8207 /* 8208 * Parse the DOCTYPE name. 8209 */ 8210 name = xmlParseName(ctxt); 8211 if (name == NULL) { 8212 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8213 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8214 } 8215 ctxt->intSubName = name; 8216 8217 SKIP_BLANKS; 8218 8219 /* 8220 * Check for SystemID and ExternalID 8221 */ 8222 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8223 8224 if ((URI != NULL) || (ExternalID != NULL)) { 8225 ctxt->hasExternalSubset = 1; 8226 } 8227 ctxt->extSubURI = URI; 8228 ctxt->extSubSystem = ExternalID; 8229 8230 SKIP_BLANKS; 8231 8232 /* 8233 * Create and update the internal subset. 8234 */ 8235 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8236 (!ctxt->disableSAX)) 8237 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8238 8239 /* 8240 * Is there any internal subset declarations ? 8241 * they are handled separately in xmlParseInternalSubset() 8242 */ 8243 if (RAW == '[') 8244 return; 8245 8246 /* 8247 * We should be at the end of the DOCTYPE declaration. 8248 */ 8249 if (RAW != '>') { 8250 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8251 } 8252 NEXT; 8253} 8254 8255/** 8256 * xmlParseInternalSubset: 8257 * @ctxt: an XML parser context 8258 * 8259 * parse the internal subset declaration 8260 * 8261 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8262 */ 8263 8264static void 8265xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8266 /* 8267 * Is there any DTD definition ? 8268 */ 8269 if (RAW == '[') { 8270 ctxt->instate = XML_PARSER_DTD; 8271 NEXT; 8272 /* 8273 * Parse the succession of Markup declarations and 8274 * PEReferences. 8275 * Subsequence (markupdecl | PEReference | S)* 8276 */ 8277 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8278 const xmlChar *check = CUR_PTR; 8279 unsigned int cons = ctxt->input->consumed; 8280 8281 SKIP_BLANKS; 8282 xmlParseMarkupDecl(ctxt); 8283 xmlParsePEReference(ctxt); 8284 8285 /* 8286 * Pop-up of finished entities. 8287 */ 8288 while ((RAW == 0) && (ctxt->inputNr > 1)) 8289 xmlPopInput(ctxt); 8290 8291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8293 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8294 break; 8295 } 8296 } 8297 if (RAW == ']') { 8298 NEXT; 8299 SKIP_BLANKS; 8300 } 8301 } 8302 8303 /* 8304 * We should be at the end of the DOCTYPE declaration. 8305 */ 8306 if (RAW != '>') { 8307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8308 } 8309 NEXT; 8310} 8311 8312#ifdef LIBXML_SAX1_ENABLED 8313/** 8314 * xmlParseAttribute: 8315 * @ctxt: an XML parser context 8316 * @value: a xmlChar ** used to store the value of the attribute 8317 * 8318 * parse an attribute 8319 * 8320 * [41] Attribute ::= Name Eq AttValue 8321 * 8322 * [ WFC: No External Entity References ] 8323 * Attribute values cannot contain direct or indirect entity references 8324 * to external entities. 8325 * 8326 * [ WFC: No < in Attribute Values ] 8327 * The replacement text of any entity referred to directly or indirectly in 8328 * an attribute value (other than "<") must not contain a <. 8329 * 8330 * [ VC: Attribute Value Type ] 8331 * The attribute must have been declared; the value must be of the type 8332 * declared for it. 8333 * 8334 * [25] Eq ::= S? '=' S? 8335 * 8336 * With namespace: 8337 * 8338 * [NS 11] Attribute ::= QName Eq AttValue 8339 * 8340 * Also the case QName == xmlns:??? is handled independently as a namespace 8341 * definition. 8342 * 8343 * Returns the attribute name, and the value in *value. 8344 */ 8345 8346const xmlChar * 8347xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8348 const xmlChar *name; 8349 xmlChar *val; 8350 8351 *value = NULL; 8352 GROW; 8353 name = xmlParseName(ctxt); 8354 if (name == NULL) { 8355 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8356 "error parsing attribute name\n"); 8357 return(NULL); 8358 } 8359 8360 /* 8361 * read the value 8362 */ 8363 SKIP_BLANKS; 8364 if (RAW == '=') { 8365 NEXT; 8366 SKIP_BLANKS; 8367 val = xmlParseAttValue(ctxt); 8368 ctxt->instate = XML_PARSER_CONTENT; 8369 } else { 8370 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8371 "Specification mandate value for attribute %s\n", name); 8372 return(NULL); 8373 } 8374 8375 /* 8376 * Check that xml:lang conforms to the specification 8377 * No more registered as an error, just generate a warning now 8378 * since this was deprecated in XML second edition 8379 */ 8380 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8381 if (!xmlCheckLanguageID(val)) { 8382 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8383 "Malformed value for xml:lang : %s\n", 8384 val, NULL); 8385 } 8386 } 8387 8388 /* 8389 * Check that xml:space conforms to the specification 8390 */ 8391 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8392 if (xmlStrEqual(val, BAD_CAST "default")) 8393 *(ctxt->space) = 0; 8394 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8395 *(ctxt->space) = 1; 8396 else { 8397 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8398"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8399 val, NULL); 8400 } 8401 } 8402 8403 *value = val; 8404 return(name); 8405} 8406 8407/** 8408 * xmlParseStartTag: 8409 * @ctxt: an XML parser context 8410 * 8411 * parse a start of tag either for rule element or 8412 * EmptyElement. In both case we don't parse the tag closing chars. 8413 * 8414 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8415 * 8416 * [ WFC: Unique Att Spec ] 8417 * No attribute name may appear more than once in the same start-tag or 8418 * empty-element tag. 8419 * 8420 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8421 * 8422 * [ WFC: Unique Att Spec ] 8423 * No attribute name may appear more than once in the same start-tag or 8424 * empty-element tag. 8425 * 8426 * With namespace: 8427 * 8428 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8429 * 8430 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8431 * 8432 * Returns the element name parsed 8433 */ 8434 8435const xmlChar * 8436xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8437 const xmlChar *name; 8438 const xmlChar *attname; 8439 xmlChar *attvalue; 8440 const xmlChar **atts = ctxt->atts; 8441 int nbatts = 0; 8442 int maxatts = ctxt->maxatts; 8443 int i; 8444 8445 if (RAW != '<') return(NULL); 8446 NEXT1; 8447 8448 name = xmlParseName(ctxt); 8449 if (name == NULL) { 8450 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8451 "xmlParseStartTag: invalid element name\n"); 8452 return(NULL); 8453 } 8454 8455 /* 8456 * Now parse the attributes, it ends up with the ending 8457 * 8458 * (S Attribute)* S? 8459 */ 8460 SKIP_BLANKS; 8461 GROW; 8462 8463 while (((RAW != '>') && 8464 ((RAW != '/') || (NXT(1) != '>')) && 8465 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8466 const xmlChar *q = CUR_PTR; 8467 unsigned int cons = ctxt->input->consumed; 8468 8469 attname = xmlParseAttribute(ctxt, &attvalue); 8470 if ((attname != NULL) && (attvalue != NULL)) { 8471 /* 8472 * [ WFC: Unique Att Spec ] 8473 * No attribute name may appear more than once in the same 8474 * start-tag or empty-element tag. 8475 */ 8476 for (i = 0; i < nbatts;i += 2) { 8477 if (xmlStrEqual(atts[i], attname)) { 8478 xmlErrAttributeDup(ctxt, NULL, attname); 8479 xmlFree(attvalue); 8480 goto failed; 8481 } 8482 } 8483 /* 8484 * Add the pair to atts 8485 */ 8486 if (atts == NULL) { 8487 maxatts = 22; /* allow for 10 attrs by default */ 8488 atts = (const xmlChar **) 8489 xmlMalloc(maxatts * sizeof(xmlChar *)); 8490 if (atts == NULL) { 8491 xmlErrMemory(ctxt, NULL); 8492 if (attvalue != NULL) 8493 xmlFree(attvalue); 8494 goto failed; 8495 } 8496 ctxt->atts = atts; 8497 ctxt->maxatts = maxatts; 8498 } else if (nbatts + 4 > maxatts) { 8499 const xmlChar **n; 8500 8501 maxatts *= 2; 8502 n = (const xmlChar **) xmlRealloc((void *) atts, 8503 maxatts * sizeof(const xmlChar *)); 8504 if (n == NULL) { 8505 xmlErrMemory(ctxt, NULL); 8506 if (attvalue != NULL) 8507 xmlFree(attvalue); 8508 goto failed; 8509 } 8510 atts = n; 8511 ctxt->atts = atts; 8512 ctxt->maxatts = maxatts; 8513 } 8514 atts[nbatts++] = attname; 8515 atts[nbatts++] = attvalue; 8516 atts[nbatts] = NULL; 8517 atts[nbatts + 1] = NULL; 8518 } else { 8519 if (attvalue != NULL) 8520 xmlFree(attvalue); 8521 } 8522 8523failed: 8524 8525 GROW 8526 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8527 break; 8528 if (!IS_BLANK_CH(RAW)) { 8529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8530 "attributes construct error\n"); 8531 } 8532 SKIP_BLANKS; 8533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8534 (attname == NULL) && (attvalue == NULL)) { 8535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8536 "xmlParseStartTag: problem parsing attributes\n"); 8537 break; 8538 } 8539 SHRINK; 8540 GROW; 8541 } 8542 8543 /* 8544 * SAX: Start of Element ! 8545 */ 8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8547 (!ctxt->disableSAX)) { 8548 if (nbatts > 0) 8549 ctxt->sax->startElement(ctxt->userData, name, atts); 8550 else 8551 ctxt->sax->startElement(ctxt->userData, name, NULL); 8552 } 8553 8554 if (atts != NULL) { 8555 /* Free only the content strings */ 8556 for (i = 1;i < nbatts;i+=2) 8557 if (atts[i] != NULL) 8558 xmlFree((xmlChar *) atts[i]); 8559 } 8560 return(name); 8561} 8562 8563/** 8564 * xmlParseEndTag1: 8565 * @ctxt: an XML parser context 8566 * @line: line of the start tag 8567 * @nsNr: number of namespaces on the start tag 8568 * 8569 * parse an end of tag 8570 * 8571 * [42] ETag ::= '</' Name S? '>' 8572 * 8573 * With namespace 8574 * 8575 * [NS 9] ETag ::= '</' QName S? '>' 8576 */ 8577 8578static void 8579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8580 const xmlChar *name; 8581 8582 GROW; 8583 if ((RAW != '<') || (NXT(1) != '/')) { 8584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8585 "xmlParseEndTag: '</' not found\n"); 8586 return; 8587 } 8588 SKIP(2); 8589 8590 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8591 8592 /* 8593 * We should definitely be at the ending "S? '>'" part 8594 */ 8595 GROW; 8596 SKIP_BLANKS; 8597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8599 } else 8600 NEXT1; 8601 8602 /* 8603 * [ WFC: Element Type Match ] 8604 * The Name in an element's end-tag must match the element type in the 8605 * start-tag. 8606 * 8607 */ 8608 if (name != (xmlChar*)1) { 8609 if (name == NULL) name = BAD_CAST "unparseable"; 8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8611 "Opening and ending tag mismatch: %s line %d and %s\n", 8612 ctxt->name, line, name); 8613 } 8614 8615 /* 8616 * SAX: End of Tag 8617 */ 8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8619 (!ctxt->disableSAX)) 8620 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8621 8622 namePop(ctxt); 8623 spacePop(ctxt); 8624 return; 8625} 8626 8627/** 8628 * xmlParseEndTag: 8629 * @ctxt: an XML parser context 8630 * 8631 * parse an end of tag 8632 * 8633 * [42] ETag ::= '</' Name S? '>' 8634 * 8635 * With namespace 8636 * 8637 * [NS 9] ETag ::= '</' QName S? '>' 8638 */ 8639 8640void 8641xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8642 xmlParseEndTag1(ctxt, 0); 8643} 8644#endif /* LIBXML_SAX1_ENABLED */ 8645 8646/************************************************************************ 8647 * * 8648 * SAX 2 specific operations * 8649 * * 8650 ************************************************************************/ 8651 8652/* 8653 * xmlGetNamespace: 8654 * @ctxt: an XML parser context 8655 * @prefix: the prefix to lookup 8656 * 8657 * Lookup the namespace name for the @prefix (which ca be NULL) 8658 * The prefix must come from the @ctxt->dict dictionnary 8659 * 8660 * Returns the namespace name or NULL if not bound 8661 */ 8662static const xmlChar * 8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8664 int i; 8665 8666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8667 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8668 if (ctxt->nsTab[i] == prefix) { 8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8670 return(NULL); 8671 return(ctxt->nsTab[i + 1]); 8672 } 8673 return(NULL); 8674} 8675 8676/** 8677 * xmlParseQName: 8678 * @ctxt: an XML parser context 8679 * @prefix: pointer to store the prefix part 8680 * 8681 * parse an XML Namespace QName 8682 * 8683 * [6] QName ::= (Prefix ':')? LocalPart 8684 * [7] Prefix ::= NCName 8685 * [8] LocalPart ::= NCName 8686 * 8687 * Returns the Name parsed or NULL 8688 */ 8689 8690static const xmlChar * 8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8692 const xmlChar *l, *p; 8693 8694 GROW; 8695 8696 l = xmlParseNCName(ctxt); 8697 if (l == NULL) { 8698 if (CUR == ':') { 8699 l = xmlParseName(ctxt); 8700 if (l != NULL) { 8701 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8702 "Failed to parse QName '%s'\n", l, NULL, NULL); 8703 *prefix = NULL; 8704 return(l); 8705 } 8706 } 8707 return(NULL); 8708 } 8709 if (CUR == ':') { 8710 NEXT; 8711 p = l; 8712 l = xmlParseNCName(ctxt); 8713 if (l == NULL) { 8714 xmlChar *tmp; 8715 8716 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8717 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8718 l = xmlParseNmtoken(ctxt); 8719 if (l == NULL) 8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8721 else { 8722 tmp = xmlBuildQName(l, p, NULL, 0); 8723 xmlFree((char *)l); 8724 } 8725 p = xmlDictLookup(ctxt->dict, tmp, -1); 8726 if (tmp != NULL) xmlFree(tmp); 8727 *prefix = NULL; 8728 return(p); 8729 } 8730 if (CUR == ':') { 8731 xmlChar *tmp; 8732 8733 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8735 NEXT; 8736 tmp = (xmlChar *) xmlParseName(ctxt); 8737 if (tmp != NULL) { 8738 tmp = xmlBuildQName(tmp, l, NULL, 0); 8739 l = xmlDictLookup(ctxt->dict, tmp, -1); 8740 if (tmp != NULL) xmlFree(tmp); 8741 *prefix = p; 8742 return(l); 8743 } 8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8745 l = xmlDictLookup(ctxt->dict, tmp, -1); 8746 if (tmp != NULL) xmlFree(tmp); 8747 *prefix = p; 8748 return(l); 8749 } 8750 *prefix = p; 8751 } else 8752 *prefix = NULL; 8753 return(l); 8754} 8755 8756/** 8757 * xmlParseQNameAndCompare: 8758 * @ctxt: an XML parser context 8759 * @name: the localname 8760 * @prefix: the prefix, if any. 8761 * 8762 * parse an XML name and compares for match 8763 * (specialized for endtag parsing) 8764 * 8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8766 * and the name for mismatch 8767 */ 8768 8769static const xmlChar * 8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8771 xmlChar const *prefix) { 8772 const xmlChar *cmp; 8773 const xmlChar *in; 8774 const xmlChar *ret; 8775 const xmlChar *prefix2; 8776 8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8778 8779 GROW; 8780 in = ctxt->input->cur; 8781 8782 cmp = prefix; 8783 while (*in != 0 && *in == *cmp) { 8784 ++in; 8785 ++cmp; 8786 } 8787 if ((*cmp == 0) && (*in == ':')) { 8788 in++; 8789 cmp = name; 8790 while (*in != 0 && *in == *cmp) { 8791 ++in; 8792 ++cmp; 8793 } 8794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8795 /* success */ 8796 ctxt->input->cur = in; 8797 return((const xmlChar*) 1); 8798 } 8799 } 8800 /* 8801 * all strings coms from the dictionary, equality can be done directly 8802 */ 8803 ret = xmlParseQName (ctxt, &prefix2); 8804 if ((ret == name) && (prefix == prefix2)) 8805 return((const xmlChar*) 1); 8806 return ret; 8807} 8808 8809/** 8810 * xmlParseAttValueInternal: 8811 * @ctxt: an XML parser context 8812 * @len: attribute len result 8813 * @alloc: whether the attribute was reallocated as a new string 8814 * @normalize: if 1 then further non-CDATA normalization must be done 8815 * 8816 * parse a value for an attribute. 8817 * NOTE: if no normalization is needed, the routine will return pointers 8818 * directly from the data buffer. 8819 * 8820 * 3.3.3 Attribute-Value Normalization: 8821 * Before the value of an attribute is passed to the application or 8822 * checked for validity, the XML processor must normalize it as follows: 8823 * - a character reference is processed by appending the referenced 8824 * character to the attribute value 8825 * - an entity reference is processed by recursively processing the 8826 * replacement text of the entity 8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8828 * appending #x20 to the normalized value, except that only a single 8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8830 * parsed entity or the literal entity value of an internal parsed entity 8831 * - other characters are processed by appending them to the normalized value 8832 * If the declared value is not CDATA, then the XML processor must further 8833 * process the normalized attribute value by discarding any leading and 8834 * trailing space (#x20) characters, and by replacing sequences of space 8835 * (#x20) characters by a single space (#x20) character. 8836 * All attributes for which no declaration has been read should be treated 8837 * by a non-validating parser as if declared CDATA. 8838 * 8839 * Returns the AttValue parsed or NULL. The value has to be freed by the 8840 * caller if it was copied, this can be detected by val[*len] == 0. 8841 */ 8842 8843static xmlChar * 8844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8845 int normalize) 8846{ 8847 xmlChar limit = 0; 8848 const xmlChar *in = NULL, *start, *end, *last; 8849 xmlChar *ret = NULL; 8850 8851 GROW; 8852 in = (xmlChar *) CUR_PTR; 8853 if (*in != '"' && *in != '\'') { 8854 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8855 return (NULL); 8856 } 8857 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8858 8859 /* 8860 * try to handle in this routine the most common case where no 8861 * allocation of a new string is required and where content is 8862 * pure ASCII. 8863 */ 8864 limit = *in++; 8865 end = ctxt->input->end; 8866 start = in; 8867 if (in >= end) { 8868 const xmlChar *oldbase = ctxt->input->base; 8869 GROW; 8870 if (oldbase != ctxt->input->base) { 8871 long delta = ctxt->input->base - oldbase; 8872 start = start + delta; 8873 in = in + delta; 8874 } 8875 end = ctxt->input->end; 8876 } 8877 if (normalize) { 8878 /* 8879 * Skip any leading spaces 8880 */ 8881 while ((in < end) && (*in != limit) && 8882 ((*in == 0x20) || (*in == 0x9) || 8883 (*in == 0xA) || (*in == 0xD))) { 8884 in++; 8885 start = in; 8886 if (in >= end) { 8887 const xmlChar *oldbase = ctxt->input->base; 8888 GROW; 8889 if (ctxt->instate == XML_PARSER_EOF) 8890 return(NULL); 8891 if (oldbase != ctxt->input->base) { 8892 long delta = ctxt->input->base - oldbase; 8893 start = start + delta; 8894 in = in + delta; 8895 } 8896 end = ctxt->input->end; 8897 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8898 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8899 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8900 "AttValue lenght too long\n"); 8901 return(NULL); 8902 } 8903 } 8904 } 8905 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8906 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8907 if ((*in++ == 0x20) && (*in == 0x20)) break; 8908 if (in >= end) { 8909 const xmlChar *oldbase = ctxt->input->base; 8910 GROW; 8911 if (ctxt->instate == XML_PARSER_EOF) 8912 return(NULL); 8913 if (oldbase != ctxt->input->base) { 8914 long delta = ctxt->input->base - oldbase; 8915 start = start + delta; 8916 in = in + delta; 8917 } 8918 end = ctxt->input->end; 8919 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8920 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8921 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8922 "AttValue lenght too long\n"); 8923 return(NULL); 8924 } 8925 } 8926 } 8927 last = in; 8928 /* 8929 * skip the trailing blanks 8930 */ 8931 while ((last[-1] == 0x20) && (last > start)) last--; 8932 while ((in < end) && (*in != limit) && 8933 ((*in == 0x20) || (*in == 0x9) || 8934 (*in == 0xA) || (*in == 0xD))) { 8935 in++; 8936 if (in >= end) { 8937 const xmlChar *oldbase = ctxt->input->base; 8938 GROW; 8939 if (ctxt->instate == XML_PARSER_EOF) 8940 return(NULL); 8941 if (oldbase != ctxt->input->base) { 8942 long delta = ctxt->input->base - oldbase; 8943 start = start + delta; 8944 in = in + delta; 8945 last = last + delta; 8946 } 8947 end = ctxt->input->end; 8948 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8949 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8950 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8951 "AttValue lenght too long\n"); 8952 return(NULL); 8953 } 8954 } 8955 } 8956 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8959 "AttValue lenght too long\n"); 8960 return(NULL); 8961 } 8962 if (*in != limit) goto need_complex; 8963 } else { 8964 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8965 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8966 in++; 8967 if (in >= end) { 8968 const xmlChar *oldbase = ctxt->input->base; 8969 GROW; 8970 if (ctxt->instate == XML_PARSER_EOF) 8971 return(NULL); 8972 if (oldbase != ctxt->input->base) { 8973 long delta = ctxt->input->base - oldbase; 8974 start = start + delta; 8975 in = in + delta; 8976 } 8977 end = ctxt->input->end; 8978 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8981 "AttValue lenght too long\n"); 8982 return(NULL); 8983 } 8984 } 8985 } 8986 last = in; 8987 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8988 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8989 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8990 "AttValue lenght too long\n"); 8991 return(NULL); 8992 } 8993 if (*in != limit) goto need_complex; 8994 } 8995 in++; 8996 if (len != NULL) { 8997 *len = last - start; 8998 ret = (xmlChar *) start; 8999 } else { 9000 if (alloc) *alloc = 1; 9001 ret = xmlStrndup(start, last - start); 9002 } 9003 CUR_PTR = in; 9004 if (alloc) *alloc = 0; 9005 return ret; 9006need_complex: 9007 if (alloc) *alloc = 1; 9008 return xmlParseAttValueComplex(ctxt, len, normalize); 9009} 9010 9011/** 9012 * xmlParseAttribute2: 9013 * @ctxt: an XML parser context 9014 * @pref: the element prefix 9015 * @elem: the element name 9016 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9017 * @value: a xmlChar ** used to store the value of the attribute 9018 * @len: an int * to save the length of the attribute 9019 * @alloc: an int * to indicate if the attribute was allocated 9020 * 9021 * parse an attribute in the new SAX2 framework. 9022 * 9023 * Returns the attribute name, and the value in *value, . 9024 */ 9025 9026static const xmlChar * 9027xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9028 const xmlChar * pref, const xmlChar * elem, 9029 const xmlChar ** prefix, xmlChar ** value, 9030 int *len, int *alloc) 9031{ 9032 const xmlChar *name; 9033 xmlChar *val, *internal_val = NULL; 9034 int normalize = 0; 9035 9036 *value = NULL; 9037 GROW; 9038 name = xmlParseQName(ctxt, prefix); 9039 if (name == NULL) { 9040 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9041 "error parsing attribute name\n"); 9042 return (NULL); 9043 } 9044 9045 /* 9046 * get the type if needed 9047 */ 9048 if (ctxt->attsSpecial != NULL) { 9049 int type; 9050 9051 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9052 pref, elem, *prefix, name); 9053 if (type != 0) 9054 normalize = 1; 9055 } 9056 9057 /* 9058 * read the value 9059 */ 9060 SKIP_BLANKS; 9061 if (RAW == '=') { 9062 NEXT; 9063 SKIP_BLANKS; 9064 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9065 if (normalize) { 9066 /* 9067 * Sometimes a second normalisation pass for spaces is needed 9068 * but that only happens if charrefs or entities refernces 9069 * have been used in the attribute value, i.e. the attribute 9070 * value have been extracted in an allocated string already. 9071 */ 9072 if (*alloc) { 9073 const xmlChar *val2; 9074 9075 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9076 if ((val2 != NULL) && (val2 != val)) { 9077 xmlFree(val); 9078 val = (xmlChar *) val2; 9079 } 9080 } 9081 } 9082 ctxt->instate = XML_PARSER_CONTENT; 9083 } else { 9084 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9085 "Specification mandate value for attribute %s\n", 9086 name); 9087 return (NULL); 9088 } 9089 9090 if (*prefix == ctxt->str_xml) { 9091 /* 9092 * Check that xml:lang conforms to the specification 9093 * No more registered as an error, just generate a warning now 9094 * since this was deprecated in XML second edition 9095 */ 9096 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9097 internal_val = xmlStrndup(val, *len); 9098 if (!xmlCheckLanguageID(internal_val)) { 9099 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9100 "Malformed value for xml:lang : %s\n", 9101 internal_val, NULL); 9102 } 9103 } 9104 9105 /* 9106 * Check that xml:space conforms to the specification 9107 */ 9108 if (xmlStrEqual(name, BAD_CAST "space")) { 9109 internal_val = xmlStrndup(val, *len); 9110 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9111 *(ctxt->space) = 0; 9112 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9113 *(ctxt->space) = 1; 9114 else { 9115 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9116 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9117 internal_val, NULL); 9118 } 9119 } 9120 if (internal_val) { 9121 xmlFree(internal_val); 9122 } 9123 } 9124 9125 *value = val; 9126 return (name); 9127} 9128/** 9129 * xmlParseStartTag2: 9130 * @ctxt: an XML parser context 9131 * 9132 * parse a start of tag either for rule element or 9133 * EmptyElement. In both case we don't parse the tag closing chars. 9134 * This routine is called when running SAX2 parsing 9135 * 9136 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9137 * 9138 * [ WFC: Unique Att Spec ] 9139 * No attribute name may appear more than once in the same start-tag or 9140 * empty-element tag. 9141 * 9142 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9143 * 9144 * [ WFC: Unique Att Spec ] 9145 * No attribute name may appear more than once in the same start-tag or 9146 * empty-element tag. 9147 * 9148 * With namespace: 9149 * 9150 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9151 * 9152 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9153 * 9154 * Returns the element name parsed 9155 */ 9156 9157static const xmlChar * 9158xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9159 const xmlChar **URI, int *tlen) { 9160 const xmlChar *localname; 9161 const xmlChar *prefix; 9162 const xmlChar *attname; 9163 const xmlChar *aprefix; 9164 const xmlChar *nsname; 9165 xmlChar *attvalue; 9166 const xmlChar **atts = ctxt->atts; 9167 int maxatts = ctxt->maxatts; 9168 int nratts, nbatts, nbdef; 9169 int i, j, nbNs, attval, oldline, oldcol; 9170 const xmlChar *base; 9171 unsigned long cur; 9172 int nsNr = ctxt->nsNr; 9173 9174 if (RAW != '<') return(NULL); 9175 NEXT1; 9176 9177 /* 9178 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9179 * point since the attribute values may be stored as pointers to 9180 * the buffer and calling SHRINK would destroy them ! 9181 * The Shrinking is only possible once the full set of attribute 9182 * callbacks have been done. 9183 */ 9184reparse: 9185 SHRINK; 9186 base = ctxt->input->base; 9187 cur = ctxt->input->cur - ctxt->input->base; 9188 oldline = ctxt->input->line; 9189 oldcol = ctxt->input->col; 9190 nbatts = 0; 9191 nratts = 0; 9192 nbdef = 0; 9193 nbNs = 0; 9194 attval = 0; 9195 /* Forget any namespaces added during an earlier parse of this element. */ 9196 ctxt->nsNr = nsNr; 9197 9198 localname = xmlParseQName(ctxt, &prefix); 9199 if (localname == NULL) { 9200 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9201 "StartTag: invalid element name\n"); 9202 return(NULL); 9203 } 9204 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9205 9206 /* 9207 * Now parse the attributes, it ends up with the ending 9208 * 9209 * (S Attribute)* S? 9210 */ 9211 SKIP_BLANKS; 9212 GROW; 9213 if (ctxt->input->base != base) goto base_changed; 9214 9215 while (((RAW != '>') && 9216 ((RAW != '/') || (NXT(1) != '>')) && 9217 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9218 const xmlChar *q = CUR_PTR; 9219 unsigned int cons = ctxt->input->consumed; 9220 int len = -1, alloc = 0; 9221 9222 attname = xmlParseAttribute2(ctxt, prefix, localname, 9223 &aprefix, &attvalue, &len, &alloc); 9224 if (ctxt->input->base != base) { 9225 if ((attvalue != NULL) && (alloc != 0)) 9226 xmlFree(attvalue); 9227 attvalue = NULL; 9228 goto base_changed; 9229 } 9230 if ((attname != NULL) && (attvalue != NULL)) { 9231 if (len < 0) len = xmlStrlen(attvalue); 9232 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9233 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9234 xmlURIPtr uri; 9235 9236 if (*URL != 0) { 9237 uri = xmlParseURI((const char *) URL); 9238 if (uri == NULL) { 9239 xmlNsErr(ctxt, XML_WAR_NS_URI, 9240 "xmlns: '%s' is not a valid URI\n", 9241 URL, NULL, NULL); 9242 } else { 9243 if (uri->scheme == NULL) { 9244 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9245 "xmlns: URI %s is not absolute\n", 9246 URL, NULL, NULL); 9247 } 9248 xmlFreeURI(uri); 9249 } 9250 if (URL == ctxt->str_xml_ns) { 9251 if (attname != ctxt->str_xml) { 9252 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9253 "xml namespace URI cannot be the default namespace\n", 9254 NULL, NULL, NULL); 9255 } 9256 goto skip_default_ns; 9257 } 9258 if ((len == 29) && 9259 (xmlStrEqual(URL, 9260 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9261 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9262 "reuse of the xmlns namespace name is forbidden\n", 9263 NULL, NULL, NULL); 9264 goto skip_default_ns; 9265 } 9266 } 9267 /* 9268 * check that it's not a defined namespace 9269 */ 9270 for (j = 1;j <= nbNs;j++) 9271 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9272 break; 9273 if (j <= nbNs) 9274 xmlErrAttributeDup(ctxt, NULL, attname); 9275 else 9276 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9277skip_default_ns: 9278 if (alloc != 0) xmlFree(attvalue); 9279 SKIP_BLANKS; 9280 continue; 9281 } 9282 if (aprefix == ctxt->str_xmlns) { 9283 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9284 xmlURIPtr uri; 9285 9286 if (attname == ctxt->str_xml) { 9287 if (URL != ctxt->str_xml_ns) { 9288 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9289 "xml namespace prefix mapped to wrong URI\n", 9290 NULL, NULL, NULL); 9291 } 9292 /* 9293 * Do not keep a namespace definition node 9294 */ 9295 goto skip_ns; 9296 } 9297 if (URL == ctxt->str_xml_ns) { 9298 if (attname != ctxt->str_xml) { 9299 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9300 "xml namespace URI mapped to wrong prefix\n", 9301 NULL, NULL, NULL); 9302 } 9303 goto skip_ns; 9304 } 9305 if (attname == ctxt->str_xmlns) { 9306 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9307 "redefinition of the xmlns prefix is forbidden\n", 9308 NULL, NULL, NULL); 9309 goto skip_ns; 9310 } 9311 if ((len == 29) && 9312 (xmlStrEqual(URL, 9313 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9315 "reuse of the xmlns namespace name is forbidden\n", 9316 NULL, NULL, NULL); 9317 goto skip_ns; 9318 } 9319 if ((URL == NULL) || (URL[0] == 0)) { 9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9321 "xmlns:%s: Empty XML namespace is not allowed\n", 9322 attname, NULL, NULL); 9323 goto skip_ns; 9324 } else { 9325 uri = xmlParseURI((const char *) URL); 9326 if (uri == NULL) { 9327 xmlNsErr(ctxt, XML_WAR_NS_URI, 9328 "xmlns:%s: '%s' is not a valid URI\n", 9329 attname, URL, NULL); 9330 } else { 9331 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9332 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9333 "xmlns:%s: URI %s is not absolute\n", 9334 attname, URL, NULL); 9335 } 9336 xmlFreeURI(uri); 9337 } 9338 } 9339 9340 /* 9341 * check that it's not a defined namespace 9342 */ 9343 for (j = 1;j <= nbNs;j++) 9344 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9345 break; 9346 if (j <= nbNs) 9347 xmlErrAttributeDup(ctxt, aprefix, attname); 9348 else 9349 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9350skip_ns: 9351 if (alloc != 0) xmlFree(attvalue); 9352 SKIP_BLANKS; 9353 if (ctxt->input->base != base) goto base_changed; 9354 continue; 9355 } 9356 9357 /* 9358 * Add the pair to atts 9359 */ 9360 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9361 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9362 if (attvalue[len] == 0) 9363 xmlFree(attvalue); 9364 goto failed; 9365 } 9366 maxatts = ctxt->maxatts; 9367 atts = ctxt->atts; 9368 } 9369 ctxt->attallocs[nratts++] = alloc; 9370 atts[nbatts++] = attname; 9371 atts[nbatts++] = aprefix; 9372 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9373 atts[nbatts++] = attvalue; 9374 attvalue += len; 9375 atts[nbatts++] = attvalue; 9376 /* 9377 * tag if some deallocation is needed 9378 */ 9379 if (alloc != 0) attval = 1; 9380 } else { 9381 if ((attvalue != NULL) && (attvalue[len] == 0)) 9382 xmlFree(attvalue); 9383 } 9384 9385failed: 9386 9387 GROW 9388 if (ctxt->instate == XML_PARSER_EOF) 9389 break; 9390 if (ctxt->input->base != base) goto base_changed; 9391 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9392 break; 9393 if (!IS_BLANK_CH(RAW)) { 9394 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9395 "attributes construct error\n"); 9396 break; 9397 } 9398 SKIP_BLANKS; 9399 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9400 (attname == NULL) && (attvalue == NULL)) { 9401 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9402 "xmlParseStartTag: problem parsing attributes\n"); 9403 break; 9404 } 9405 GROW; 9406 if (ctxt->input->base != base) goto base_changed; 9407 } 9408 9409 /* 9410 * The attributes defaulting 9411 */ 9412 if (ctxt->attsDefault != NULL) { 9413 xmlDefAttrsPtr defaults; 9414 9415 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9416 if (defaults != NULL) { 9417 for (i = 0;i < defaults->nbAttrs;i++) { 9418 attname = defaults->values[5 * i]; 9419 aprefix = defaults->values[5 * i + 1]; 9420 9421 /* 9422 * special work for namespaces defaulted defs 9423 */ 9424 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9425 /* 9426 * check that it's not a defined namespace 9427 */ 9428 for (j = 1;j <= nbNs;j++) 9429 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9430 break; 9431 if (j <= nbNs) continue; 9432 9433 nsname = xmlGetNamespace(ctxt, NULL); 9434 if (nsname != defaults->values[5 * i + 2]) { 9435 if (nsPush(ctxt, NULL, 9436 defaults->values[5 * i + 2]) > 0) 9437 nbNs++; 9438 } 9439 } else if (aprefix == ctxt->str_xmlns) { 9440 /* 9441 * check that it's not a defined namespace 9442 */ 9443 for (j = 1;j <= nbNs;j++) 9444 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9445 break; 9446 if (j <= nbNs) continue; 9447 9448 nsname = xmlGetNamespace(ctxt, attname); 9449 if (nsname != defaults->values[2]) { 9450 if (nsPush(ctxt, attname, 9451 defaults->values[5 * i + 2]) > 0) 9452 nbNs++; 9453 } 9454 } else { 9455 /* 9456 * check that it's not a defined attribute 9457 */ 9458 for (j = 0;j < nbatts;j+=5) { 9459 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9460 break; 9461 } 9462 if (j < nbatts) continue; 9463 9464 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9465 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9466 return(NULL); 9467 } 9468 maxatts = ctxt->maxatts; 9469 atts = ctxt->atts; 9470 } 9471 atts[nbatts++] = attname; 9472 atts[nbatts++] = aprefix; 9473 if (aprefix == NULL) 9474 atts[nbatts++] = NULL; 9475 else 9476 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9477 atts[nbatts++] = defaults->values[5 * i + 2]; 9478 atts[nbatts++] = defaults->values[5 * i + 3]; 9479 if ((ctxt->standalone == 1) && 9480 (defaults->values[5 * i + 4] != NULL)) { 9481 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9482 "standalone: attribute %s on %s defaulted from external subset\n", 9483 attname, localname); 9484 } 9485 nbdef++; 9486 } 9487 } 9488 } 9489 } 9490 9491 /* 9492 * The attributes checkings 9493 */ 9494 for (i = 0; i < nbatts;i += 5) { 9495 /* 9496 * The default namespace does not apply to attribute names. 9497 */ 9498 if (atts[i + 1] != NULL) { 9499 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9500 if (nsname == NULL) { 9501 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9502 "Namespace prefix %s for %s on %s is not defined\n", 9503 atts[i + 1], atts[i], localname); 9504 } 9505 atts[i + 2] = nsname; 9506 } else 9507 nsname = NULL; 9508 /* 9509 * [ WFC: Unique Att Spec ] 9510 * No attribute name may appear more than once in the same 9511 * start-tag or empty-element tag. 9512 * As extended by the Namespace in XML REC. 9513 */ 9514 for (j = 0; j < i;j += 5) { 9515 if (atts[i] == atts[j]) { 9516 if (atts[i+1] == atts[j+1]) { 9517 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9518 break; 9519 } 9520 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9521 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9522 "Namespaced Attribute %s in '%s' redefined\n", 9523 atts[i], nsname, NULL); 9524 break; 9525 } 9526 } 9527 } 9528 } 9529 9530 nsname = xmlGetNamespace(ctxt, prefix); 9531 if ((prefix != NULL) && (nsname == NULL)) { 9532 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9533 "Namespace prefix %s on %s is not defined\n", 9534 prefix, localname, NULL); 9535 } 9536 *pref = prefix; 9537 *URI = nsname; 9538 9539 /* 9540 * SAX: Start of Element ! 9541 */ 9542 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9543 (!ctxt->disableSAX)) { 9544 if (nbNs > 0) 9545 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9546 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9547 nbatts / 5, nbdef, atts); 9548 else 9549 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9550 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9551 } 9552 9553 /* 9554 * Free up attribute allocated strings if needed 9555 */ 9556 if (attval != 0) { 9557 for (i = 3,j = 0; j < nratts;i += 5,j++) 9558 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9559 xmlFree((xmlChar *) atts[i]); 9560 } 9561 9562 return(localname); 9563 9564base_changed: 9565 /* 9566 * the attribute strings are valid iif the base didn't changed 9567 */ 9568 if (attval != 0) { 9569 for (i = 3,j = 0; j < nratts;i += 5,j++) 9570 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9571 xmlFree((xmlChar *) atts[i]); 9572 } 9573 ctxt->input->cur = ctxt->input->base + cur; 9574 ctxt->input->line = oldline; 9575 ctxt->input->col = oldcol; 9576 if (ctxt->wellFormed == 1) { 9577 goto reparse; 9578 } 9579 return(NULL); 9580} 9581 9582/** 9583 * xmlParseEndTag2: 9584 * @ctxt: an XML parser context 9585 * @line: line of the start tag 9586 * @nsNr: number of namespaces on the start tag 9587 * 9588 * parse an end of tag 9589 * 9590 * [42] ETag ::= '</' Name S? '>' 9591 * 9592 * With namespace 9593 * 9594 * [NS 9] ETag ::= '</' QName S? '>' 9595 */ 9596 9597static void 9598xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9599 const xmlChar *URI, int line, int nsNr, int tlen) { 9600 const xmlChar *name; 9601 9602 GROW; 9603 if ((RAW != '<') || (NXT(1) != '/')) { 9604 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9605 return; 9606 } 9607 SKIP(2); 9608 9609 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9610 if (ctxt->input->cur[tlen] == '>') { 9611 ctxt->input->cur += tlen + 1; 9612 goto done; 9613 } 9614 ctxt->input->cur += tlen; 9615 name = (xmlChar*)1; 9616 } else { 9617 if (prefix == NULL) 9618 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9619 else 9620 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9621 } 9622 9623 /* 9624 * We should definitely be at the ending "S? '>'" part 9625 */ 9626 GROW; 9627 if (ctxt->instate == XML_PARSER_EOF) 9628 return; 9629 SKIP_BLANKS; 9630 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9631 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9632 } else 9633 NEXT1; 9634 9635 /* 9636 * [ WFC: Element Type Match ] 9637 * The Name in an element's end-tag must match the element type in the 9638 * start-tag. 9639 * 9640 */ 9641 if (name != (xmlChar*)1) { 9642 if (name == NULL) name = BAD_CAST "unparseable"; 9643 if ((line == 0) && (ctxt->node != NULL)) 9644 line = ctxt->node->line; 9645 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9646 "Opening and ending tag mismatch: %s line %d and %s\n", 9647 ctxt->name, line, name); 9648 } 9649 9650 /* 9651 * SAX: End of Tag 9652 */ 9653done: 9654 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9655 (!ctxt->disableSAX)) 9656 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9657 9658 spacePop(ctxt); 9659 if (nsNr != 0) 9660 nsPop(ctxt, nsNr); 9661 return; 9662} 9663 9664/** 9665 * xmlParseCDSect: 9666 * @ctxt: an XML parser context 9667 * 9668 * Parse escaped pure raw content. 9669 * 9670 * [18] CDSect ::= CDStart CData CDEnd 9671 * 9672 * [19] CDStart ::= '<![CDATA[' 9673 * 9674 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9675 * 9676 * [21] CDEnd ::= ']]>' 9677 */ 9678void 9679xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9680 xmlChar *buf = NULL; 9681 int len = 0; 9682 int size = XML_PARSER_BUFFER_SIZE; 9683 int r, rl; 9684 int s, sl; 9685 int cur, l; 9686 int count = 0; 9687 9688 /* Check 2.6.0 was NXT(0) not RAW */ 9689 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9690 SKIP(9); 9691 } else 9692 return; 9693 9694 ctxt->instate = XML_PARSER_CDATA_SECTION; 9695 r = CUR_CHAR(rl); 9696 if (!IS_CHAR(r)) { 9697 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9698 ctxt->instate = XML_PARSER_CONTENT; 9699 return; 9700 } 9701 NEXTL(rl); 9702 s = CUR_CHAR(sl); 9703 if (!IS_CHAR(s)) { 9704 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9705 ctxt->instate = XML_PARSER_CONTENT; 9706 return; 9707 } 9708 NEXTL(sl); 9709 cur = CUR_CHAR(l); 9710 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9711 if (buf == NULL) { 9712 xmlErrMemory(ctxt, NULL); 9713 return; 9714 } 9715 while (IS_CHAR(cur) && 9716 ((r != ']') || (s != ']') || (cur != '>'))) { 9717 if (len + 5 >= size) { 9718 xmlChar *tmp; 9719 9720 if ((size > XML_MAX_TEXT_LENGTH) && 9721 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9722 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9723 "CData section too big found", NULL); 9724 xmlFree (buf); 9725 return; 9726 } 9727 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9728 if (tmp == NULL) { 9729 xmlFree(buf); 9730 xmlErrMemory(ctxt, NULL); 9731 return; 9732 } 9733 buf = tmp; 9734 size *= 2; 9735 } 9736 COPY_BUF(rl,buf,len,r); 9737 r = s; 9738 rl = sl; 9739 s = cur; 9740 sl = l; 9741 count++; 9742 if (count > 50) { 9743 GROW; 9744 if (ctxt->instate == XML_PARSER_EOF) { 9745 xmlFree(buf); 9746 return; 9747 } 9748 count = 0; 9749 } 9750 NEXTL(l); 9751 cur = CUR_CHAR(l); 9752 } 9753 buf[len] = 0; 9754 ctxt->instate = XML_PARSER_CONTENT; 9755 if (cur != '>') { 9756 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9757 "CData section not finished\n%.50s\n", buf); 9758 xmlFree(buf); 9759 return; 9760 } 9761 NEXTL(l); 9762 9763 /* 9764 * OK the buffer is to be consumed as cdata. 9765 */ 9766 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9767 if (ctxt->sax->cdataBlock != NULL) 9768 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9769 else if (ctxt->sax->characters != NULL) 9770 ctxt->sax->characters(ctxt->userData, buf, len); 9771 } 9772 xmlFree(buf); 9773} 9774 9775/** 9776 * xmlParseContent: 9777 * @ctxt: an XML parser context 9778 * 9779 * Parse a content: 9780 * 9781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9782 */ 9783 9784void 9785xmlParseContent(xmlParserCtxtPtr ctxt) { 9786 GROW; 9787 while ((RAW != 0) && 9788 ((RAW != '<') || (NXT(1) != '/')) && 9789 (ctxt->instate != XML_PARSER_EOF)) { 9790 const xmlChar *test = CUR_PTR; 9791 unsigned int cons = ctxt->input->consumed; 9792 const xmlChar *cur = ctxt->input->cur; 9793 9794 /* 9795 * First case : a Processing Instruction. 9796 */ 9797 if ((*cur == '<') && (cur[1] == '?')) { 9798 xmlParsePI(ctxt); 9799 } 9800 9801 /* 9802 * Second case : a CDSection 9803 */ 9804 /* 2.6.0 test was *cur not RAW */ 9805 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9806 xmlParseCDSect(ctxt); 9807 } 9808 9809 /* 9810 * Third case : a comment 9811 */ 9812 else if ((*cur == '<') && (NXT(1) == '!') && 9813 (NXT(2) == '-') && (NXT(3) == '-')) { 9814 xmlParseComment(ctxt); 9815 ctxt->instate = XML_PARSER_CONTENT; 9816 } 9817 9818 /* 9819 * Fourth case : a sub-element. 9820 */ 9821 else if (*cur == '<') { 9822 xmlParseElement(ctxt); 9823 } 9824 9825 /* 9826 * Fifth case : a reference. If if has not been resolved, 9827 * parsing returns it's Name, create the node 9828 */ 9829 9830 else if (*cur == '&') { 9831 xmlParseReference(ctxt); 9832 } 9833 9834 /* 9835 * Last case, text. Note that References are handled directly. 9836 */ 9837 else { 9838 xmlParseCharData(ctxt, 0); 9839 } 9840 9841 GROW; 9842 /* 9843 * Pop-up of finished entities. 9844 */ 9845 while ((RAW == 0) && (ctxt->inputNr > 1)) 9846 xmlPopInput(ctxt); 9847 SHRINK; 9848 9849 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9850 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9851 "detected an error in element content\n"); 9852 ctxt->instate = XML_PARSER_EOF; 9853 break; 9854 } 9855 } 9856} 9857 9858/** 9859 * xmlParseElement: 9860 * @ctxt: an XML parser context 9861 * 9862 * parse an XML element, this is highly recursive 9863 * 9864 * [39] element ::= EmptyElemTag | STag content ETag 9865 * 9866 * [ WFC: Element Type Match ] 9867 * The Name in an element's end-tag must match the element type in the 9868 * start-tag. 9869 * 9870 */ 9871 9872void 9873xmlParseElement(xmlParserCtxtPtr ctxt) { 9874 const xmlChar *name; 9875 const xmlChar *prefix = NULL; 9876 const xmlChar *URI = NULL; 9877 xmlParserNodeInfo node_info; 9878 int line, tlen = 0; 9879 xmlNodePtr ret; 9880 int nsNr = ctxt->nsNr; 9881 9882 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9883 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9884 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9885 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9886 xmlParserMaxDepth); 9887 ctxt->instate = XML_PARSER_EOF; 9888 return; 9889 } 9890 9891 /* Capture start position */ 9892 if (ctxt->record_info) { 9893 node_info.begin_pos = ctxt->input->consumed + 9894 (CUR_PTR - ctxt->input->base); 9895 node_info.begin_line = ctxt->input->line; 9896 } 9897 9898 if (ctxt->spaceNr == 0) 9899 spacePush(ctxt, -1); 9900 else if (*ctxt->space == -2) 9901 spacePush(ctxt, -1); 9902 else 9903 spacePush(ctxt, *ctxt->space); 9904 9905 line = ctxt->input->line; 9906#ifdef LIBXML_SAX1_ENABLED 9907 if (ctxt->sax2) 9908#endif /* LIBXML_SAX1_ENABLED */ 9909 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9910#ifdef LIBXML_SAX1_ENABLED 9911 else 9912 name = xmlParseStartTag(ctxt); 9913#endif /* LIBXML_SAX1_ENABLED */ 9914 if (ctxt->instate == XML_PARSER_EOF) 9915 return; 9916 if (name == NULL) { 9917 spacePop(ctxt); 9918 return; 9919 } 9920 namePush(ctxt, name); 9921 ret = ctxt->node; 9922 9923#ifdef LIBXML_VALID_ENABLED 9924 /* 9925 * [ VC: Root Element Type ] 9926 * The Name in the document type declaration must match the element 9927 * type of the root element. 9928 */ 9929 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9930 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9931 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9932#endif /* LIBXML_VALID_ENABLED */ 9933 9934 /* 9935 * Check for an Empty Element. 9936 */ 9937 if ((RAW == '/') && (NXT(1) == '>')) { 9938 SKIP(2); 9939 if (ctxt->sax2) { 9940 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9941 (!ctxt->disableSAX)) 9942 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9943#ifdef LIBXML_SAX1_ENABLED 9944 } else { 9945 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9946 (!ctxt->disableSAX)) 9947 ctxt->sax->endElement(ctxt->userData, name); 9948#endif /* LIBXML_SAX1_ENABLED */ 9949 } 9950 namePop(ctxt); 9951 spacePop(ctxt); 9952 if (nsNr != ctxt->nsNr) 9953 nsPop(ctxt, ctxt->nsNr - nsNr); 9954 if ( ret != NULL && ctxt->record_info ) { 9955 node_info.end_pos = ctxt->input->consumed + 9956 (CUR_PTR - ctxt->input->base); 9957 node_info.end_line = ctxt->input->line; 9958 node_info.node = ret; 9959 xmlParserAddNodeInfo(ctxt, &node_info); 9960 } 9961 return; 9962 } 9963 if (RAW == '>') { 9964 NEXT1; 9965 } else { 9966 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9967 "Couldn't find end of Start Tag %s line %d\n", 9968 name, line, NULL); 9969 9970 /* 9971 * end of parsing of this node. 9972 */ 9973 nodePop(ctxt); 9974 namePop(ctxt); 9975 spacePop(ctxt); 9976 if (nsNr != ctxt->nsNr) 9977 nsPop(ctxt, ctxt->nsNr - nsNr); 9978 9979 /* 9980 * Capture end position and add node 9981 */ 9982 if ( ret != NULL && ctxt->record_info ) { 9983 node_info.end_pos = ctxt->input->consumed + 9984 (CUR_PTR - ctxt->input->base); 9985 node_info.end_line = ctxt->input->line; 9986 node_info.node = ret; 9987 xmlParserAddNodeInfo(ctxt, &node_info); 9988 } 9989 return; 9990 } 9991 9992 /* 9993 * Parse the content of the element: 9994 */ 9995 xmlParseContent(ctxt); 9996 if (!IS_BYTE_CHAR(RAW)) { 9997 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9998 "Premature end of data in tag %s line %d\n", 9999 name, line, NULL); 10000 10001 /* 10002 * end of parsing of this node. 10003 */ 10004 nodePop(ctxt); 10005 namePop(ctxt); 10006 spacePop(ctxt); 10007 if (nsNr != ctxt->nsNr) 10008 nsPop(ctxt, ctxt->nsNr - nsNr); 10009 return; 10010 } 10011 10012 /* 10013 * parse the end of tag: '</' should be here. 10014 */ 10015 if (ctxt->sax2) { 10016 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10017 namePop(ctxt); 10018 } 10019#ifdef LIBXML_SAX1_ENABLED 10020 else 10021 xmlParseEndTag1(ctxt, line); 10022#endif /* LIBXML_SAX1_ENABLED */ 10023 10024 /* 10025 * Capture end position and add node 10026 */ 10027 if ( ret != NULL && ctxt->record_info ) { 10028 node_info.end_pos = ctxt->input->consumed + 10029 (CUR_PTR - ctxt->input->base); 10030 node_info.end_line = ctxt->input->line; 10031 node_info.node = ret; 10032 xmlParserAddNodeInfo(ctxt, &node_info); 10033 } 10034} 10035 10036/** 10037 * xmlParseVersionNum: 10038 * @ctxt: an XML parser context 10039 * 10040 * parse the XML version value. 10041 * 10042 * [26] VersionNum ::= '1.' [0-9]+ 10043 * 10044 * In practice allow [0-9].[0-9]+ at that level 10045 * 10046 * Returns the string giving the XML version number, or NULL 10047 */ 10048xmlChar * 10049xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10050 xmlChar *buf = NULL; 10051 int len = 0; 10052 int size = 10; 10053 xmlChar cur; 10054 10055 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10056 if (buf == NULL) { 10057 xmlErrMemory(ctxt, NULL); 10058 return(NULL); 10059 } 10060 cur = CUR; 10061 if (!((cur >= '0') && (cur <= '9'))) { 10062 xmlFree(buf); 10063 return(NULL); 10064 } 10065 buf[len++] = cur; 10066 NEXT; 10067 cur=CUR; 10068 if (cur != '.') { 10069 xmlFree(buf); 10070 return(NULL); 10071 } 10072 buf[len++] = cur; 10073 NEXT; 10074 cur=CUR; 10075 while ((cur >= '0') && (cur <= '9')) { 10076 if (len + 1 >= size) { 10077 xmlChar *tmp; 10078 10079 size *= 2; 10080 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10081 if (tmp == NULL) { 10082 xmlFree(buf); 10083 xmlErrMemory(ctxt, NULL); 10084 return(NULL); 10085 } 10086 buf = tmp; 10087 } 10088 buf[len++] = cur; 10089 NEXT; 10090 cur=CUR; 10091 } 10092 buf[len] = 0; 10093 return(buf); 10094} 10095 10096/** 10097 * xmlParseVersionInfo: 10098 * @ctxt: an XML parser context 10099 * 10100 * parse the XML version. 10101 * 10102 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10103 * 10104 * [25] Eq ::= S? '=' S? 10105 * 10106 * Returns the version string, e.g. "1.0" 10107 */ 10108 10109xmlChar * 10110xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10111 xmlChar *version = NULL; 10112 10113 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10114 SKIP(7); 10115 SKIP_BLANKS; 10116 if (RAW != '=') { 10117 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10118 return(NULL); 10119 } 10120 NEXT; 10121 SKIP_BLANKS; 10122 if (RAW == '"') { 10123 NEXT; 10124 version = xmlParseVersionNum(ctxt); 10125 if (RAW != '"') { 10126 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10127 } else 10128 NEXT; 10129 } else if (RAW == '\''){ 10130 NEXT; 10131 version = xmlParseVersionNum(ctxt); 10132 if (RAW != '\'') { 10133 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10134 } else 10135 NEXT; 10136 } else { 10137 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10138 } 10139 } 10140 return(version); 10141} 10142 10143/** 10144 * xmlParseEncName: 10145 * @ctxt: an XML parser context 10146 * 10147 * parse the XML encoding name 10148 * 10149 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10150 * 10151 * Returns the encoding name value or NULL 10152 */ 10153xmlChar * 10154xmlParseEncName(xmlParserCtxtPtr ctxt) { 10155 xmlChar *buf = NULL; 10156 int len = 0; 10157 int size = 10; 10158 xmlChar cur; 10159 10160 cur = CUR; 10161 if (((cur >= 'a') && (cur <= 'z')) || 10162 ((cur >= 'A') && (cur <= 'Z'))) { 10163 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10164 if (buf == NULL) { 10165 xmlErrMemory(ctxt, NULL); 10166 return(NULL); 10167 } 10168 10169 buf[len++] = cur; 10170 NEXT; 10171 cur = CUR; 10172 while (((cur >= 'a') && (cur <= 'z')) || 10173 ((cur >= 'A') && (cur <= 'Z')) || 10174 ((cur >= '0') && (cur <= '9')) || 10175 (cur == '.') || (cur == '_') || 10176 (cur == '-')) { 10177 if (len + 1 >= size) { 10178 xmlChar *tmp; 10179 10180 size *= 2; 10181 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10182 if (tmp == NULL) { 10183 xmlErrMemory(ctxt, NULL); 10184 xmlFree(buf); 10185 return(NULL); 10186 } 10187 buf = tmp; 10188 } 10189 buf[len++] = cur; 10190 NEXT; 10191 cur = CUR; 10192 if (cur == 0) { 10193 SHRINK; 10194 GROW; 10195 cur = CUR; 10196 } 10197 } 10198 buf[len] = 0; 10199 } else { 10200 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10201 } 10202 return(buf); 10203} 10204 10205/** 10206 * xmlParseEncodingDecl: 10207 * @ctxt: an XML parser context 10208 * 10209 * parse the XML encoding declaration 10210 * 10211 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10212 * 10213 * this setups the conversion filters. 10214 * 10215 * Returns the encoding value or NULL 10216 */ 10217 10218const xmlChar * 10219xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10220 xmlChar *encoding = NULL; 10221 10222 SKIP_BLANKS; 10223 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10224 SKIP(8); 10225 SKIP_BLANKS; 10226 if (RAW != '=') { 10227 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10228 return(NULL); 10229 } 10230 NEXT; 10231 SKIP_BLANKS; 10232 if (RAW == '"') { 10233 NEXT; 10234 encoding = xmlParseEncName(ctxt); 10235 if (RAW != '"') { 10236 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10237 } else 10238 NEXT; 10239 } else if (RAW == '\''){ 10240 NEXT; 10241 encoding = xmlParseEncName(ctxt); 10242 if (RAW != '\'') { 10243 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10244 } else 10245 NEXT; 10246 } else { 10247 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10248 } 10249 10250 /* 10251 * Non standard parsing, allowing the user to ignore encoding 10252 */ 10253 if (ctxt->options & XML_PARSE_IGNORE_ENC) 10254 return(encoding); 10255 10256 /* 10257 * UTF-16 encoding stwich has already taken place at this stage, 10258 * more over the little-endian/big-endian selection is already done 10259 */ 10260 if ((encoding != NULL) && 10261 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10262 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10263 /* 10264 * If no encoding was passed to the parser, that we are 10265 * using UTF-16 and no decoder is present i.e. the 10266 * document is apparently UTF-8 compatible, then raise an 10267 * encoding mismatch fatal error 10268 */ 10269 if ((ctxt->encoding == NULL) && 10270 (ctxt->input->buf != NULL) && 10271 (ctxt->input->buf->encoder == NULL)) { 10272 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10273 "Document labelled UTF-16 but has UTF-8 content\n"); 10274 } 10275 if (ctxt->encoding != NULL) 10276 xmlFree((xmlChar *) ctxt->encoding); 10277 ctxt->encoding = encoding; 10278 } 10279 /* 10280 * UTF-8 encoding is handled natively 10281 */ 10282 else if ((encoding != NULL) && 10283 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10284 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10285 if (ctxt->encoding != NULL) 10286 xmlFree((xmlChar *) ctxt->encoding); 10287 ctxt->encoding = encoding; 10288 } 10289 else if (encoding != NULL) { 10290 xmlCharEncodingHandlerPtr handler; 10291 10292 if (ctxt->input->encoding != NULL) 10293 xmlFree((xmlChar *) ctxt->input->encoding); 10294 ctxt->input->encoding = encoding; 10295 10296 handler = xmlFindCharEncodingHandler((const char *) encoding); 10297 if (handler != NULL) { 10298 xmlSwitchToEncoding(ctxt, handler); 10299 } else { 10300 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10301 "Unsupported encoding %s\n", encoding); 10302 return(NULL); 10303 } 10304 } 10305 } 10306 return(encoding); 10307} 10308 10309/** 10310 * xmlParseSDDecl: 10311 * @ctxt: an XML parser context 10312 * 10313 * parse the XML standalone declaration 10314 * 10315 * [32] SDDecl ::= S 'standalone' Eq 10316 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10317 * 10318 * [ VC: Standalone Document Declaration ] 10319 * TODO The standalone document declaration must have the value "no" 10320 * if any external markup declarations contain declarations of: 10321 * - attributes with default values, if elements to which these 10322 * attributes apply appear in the document without specifications 10323 * of values for these attributes, or 10324 * - entities (other than amp, lt, gt, apos, quot), if references 10325 * to those entities appear in the document, or 10326 * - attributes with values subject to normalization, where the 10327 * attribute appears in the document with a value which will change 10328 * as a result of normalization, or 10329 * - element types with element content, if white space occurs directly 10330 * within any instance of those types. 10331 * 10332 * Returns: 10333 * 1 if standalone="yes" 10334 * 0 if standalone="no" 10335 * -2 if standalone attribute is missing or invalid 10336 * (A standalone value of -2 means that the XML declaration was found, 10337 * but no value was specified for the standalone attribute). 10338 */ 10339 10340int 10341xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10342 int standalone = -2; 10343 10344 SKIP_BLANKS; 10345 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10346 SKIP(10); 10347 SKIP_BLANKS; 10348 if (RAW != '=') { 10349 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10350 return(standalone); 10351 } 10352 NEXT; 10353 SKIP_BLANKS; 10354 if (RAW == '\''){ 10355 NEXT; 10356 if ((RAW == 'n') && (NXT(1) == 'o')) { 10357 standalone = 0; 10358 SKIP(2); 10359 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10360 (NXT(2) == 's')) { 10361 standalone = 1; 10362 SKIP(3); 10363 } else { 10364 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10365 } 10366 if (RAW != '\'') { 10367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10368 } else 10369 NEXT; 10370 } else if (RAW == '"'){ 10371 NEXT; 10372 if ((RAW == 'n') && (NXT(1) == 'o')) { 10373 standalone = 0; 10374 SKIP(2); 10375 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10376 (NXT(2) == 's')) { 10377 standalone = 1; 10378 SKIP(3); 10379 } else { 10380 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10381 } 10382 if (RAW != '"') { 10383 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10384 } else 10385 NEXT; 10386 } else { 10387 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10388 } 10389 } 10390 return(standalone); 10391} 10392 10393/** 10394 * xmlParseXMLDecl: 10395 * @ctxt: an XML parser context 10396 * 10397 * parse an XML declaration header 10398 * 10399 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10400 */ 10401 10402void 10403xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10404 xmlChar *version; 10405 10406 /* 10407 * This value for standalone indicates that the document has an 10408 * XML declaration but it does not have a standalone attribute. 10409 * It will be overwritten later if a standalone attribute is found. 10410 */ 10411 ctxt->input->standalone = -2; 10412 10413 /* 10414 * We know that '<?xml' is here. 10415 */ 10416 SKIP(5); 10417 10418 if (!IS_BLANK_CH(RAW)) { 10419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10420 "Blank needed after '<?xml'\n"); 10421 } 10422 SKIP_BLANKS; 10423 10424 /* 10425 * We must have the VersionInfo here. 10426 */ 10427 version = xmlParseVersionInfo(ctxt); 10428 if (version == NULL) { 10429 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10430 } else { 10431 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10432 /* 10433 * Changed here for XML-1.0 5th edition 10434 */ 10435 if (ctxt->options & XML_PARSE_OLD10) { 10436 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10437 "Unsupported version '%s'\n", 10438 version); 10439 } else { 10440 if ((version[0] == '1') && ((version[1] == '.'))) { 10441 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10442 "Unsupported version '%s'\n", 10443 version, NULL); 10444 } else { 10445 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10446 "Unsupported version '%s'\n", 10447 version); 10448 } 10449 } 10450 } 10451 if (ctxt->version != NULL) 10452 xmlFree((void *) ctxt->version); 10453 ctxt->version = version; 10454 } 10455 10456 /* 10457 * We may have the encoding declaration 10458 */ 10459 if (!IS_BLANK_CH(RAW)) { 10460 if ((RAW == '?') && (NXT(1) == '>')) { 10461 SKIP(2); 10462 return; 10463 } 10464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10465 } 10466 xmlParseEncodingDecl(ctxt); 10467 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10468 /* 10469 * The XML REC instructs us to stop parsing right here 10470 */ 10471 return; 10472 } 10473 10474 /* 10475 * We may have the standalone status. 10476 */ 10477 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10478 if ((RAW == '?') && (NXT(1) == '>')) { 10479 SKIP(2); 10480 return; 10481 } 10482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10483 } 10484 10485 /* 10486 * We can grow the input buffer freely at that point 10487 */ 10488 GROW; 10489 10490 SKIP_BLANKS; 10491 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10492 10493 SKIP_BLANKS; 10494 if ((RAW == '?') && (NXT(1) == '>')) { 10495 SKIP(2); 10496 } else if (RAW == '>') { 10497 /* Deprecated old WD ... */ 10498 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10499 NEXT; 10500 } else { 10501 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10502 MOVETO_ENDTAG(CUR_PTR); 10503 NEXT; 10504 } 10505} 10506 10507/** 10508 * xmlParseMisc: 10509 * @ctxt: an XML parser context 10510 * 10511 * parse an XML Misc* optional field. 10512 * 10513 * [27] Misc ::= Comment | PI | S 10514 */ 10515 10516void 10517xmlParseMisc(xmlParserCtxtPtr ctxt) { 10518 while ((ctxt->instate != XML_PARSER_EOF) && 10519 (((RAW == '<') && (NXT(1) == '?')) || 10520 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10521 IS_BLANK_CH(CUR))) { 10522 if ((RAW == '<') && (NXT(1) == '?')) { 10523 xmlParsePI(ctxt); 10524 } else if (IS_BLANK_CH(CUR)) { 10525 NEXT; 10526 } else 10527 xmlParseComment(ctxt); 10528 } 10529} 10530 10531/** 10532 * xmlParseDocument: 10533 * @ctxt: an XML parser context 10534 * 10535 * parse an XML document (and build a tree if using the standard SAX 10536 * interface). 10537 * 10538 * [1] document ::= prolog element Misc* 10539 * 10540 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10541 * 10542 * Returns 0, -1 in case of error. the parser context is augmented 10543 * as a result of the parsing. 10544 */ 10545 10546int 10547xmlParseDocument(xmlParserCtxtPtr ctxt) { 10548 xmlChar start[4]; 10549 xmlCharEncoding enc; 10550 10551 xmlInitParser(); 10552 10553 if ((ctxt == NULL) || (ctxt->input == NULL)) 10554 return(-1); 10555 10556 GROW; 10557 10558 /* 10559 * SAX: detecting the level. 10560 */ 10561 xmlDetectSAX2(ctxt); 10562 10563 /* 10564 * SAX: beginning of the document processing. 10565 */ 10566 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10567 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10568 10569 if ((ctxt->encoding == NULL) && 10570 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10571 /* 10572 * Get the 4 first bytes and decode the charset 10573 * if enc != XML_CHAR_ENCODING_NONE 10574 * plug some encoding conversion routines. 10575 */ 10576 start[0] = RAW; 10577 start[1] = NXT(1); 10578 start[2] = NXT(2); 10579 start[3] = NXT(3); 10580 enc = xmlDetectCharEncoding(&start[0], 4); 10581 if (enc != XML_CHAR_ENCODING_NONE) { 10582 xmlSwitchEncoding(ctxt, enc); 10583 } 10584 } 10585 10586 10587 if (CUR == 0) { 10588 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10589 } 10590 10591 /* 10592 * Check for the XMLDecl in the Prolog. 10593 * do not GROW here to avoid the detected encoder to decode more 10594 * than just the first line, unless the amount of data is really 10595 * too small to hold "<?xml version="1.0" encoding="foo" 10596 */ 10597 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10598 GROW; 10599 } 10600 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10601 10602 /* 10603 * Note that we will switch encoding on the fly. 10604 */ 10605 xmlParseXMLDecl(ctxt); 10606 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10607 /* 10608 * The XML REC instructs us to stop parsing right here 10609 */ 10610 return(-1); 10611 } 10612 ctxt->standalone = ctxt->input->standalone; 10613 SKIP_BLANKS; 10614 } else { 10615 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10616 } 10617 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10618 ctxt->sax->startDocument(ctxt->userData); 10619 10620 /* 10621 * The Misc part of the Prolog 10622 */ 10623 GROW; 10624 xmlParseMisc(ctxt); 10625 10626 /* 10627 * Then possibly doc type declaration(s) and more Misc 10628 * (doctypedecl Misc*)? 10629 */ 10630 GROW; 10631 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10632 10633 ctxt->inSubset = 1; 10634 xmlParseDocTypeDecl(ctxt); 10635 if (RAW == '[') { 10636 ctxt->instate = XML_PARSER_DTD; 10637 xmlParseInternalSubset(ctxt); 10638 } 10639 10640 /* 10641 * Create and update the external subset. 10642 */ 10643 ctxt->inSubset = 2; 10644 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10645 (!ctxt->disableSAX)) 10646 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10647 ctxt->extSubSystem, ctxt->extSubURI); 10648 ctxt->inSubset = 0; 10649 10650 xmlCleanSpecialAttr(ctxt); 10651 10652 ctxt->instate = XML_PARSER_PROLOG; 10653 xmlParseMisc(ctxt); 10654 } 10655 10656 /* 10657 * Time to start parsing the tree itself 10658 */ 10659 GROW; 10660 if (RAW != '<') { 10661 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10662 "Start tag expected, '<' not found\n"); 10663 } else { 10664 ctxt->instate = XML_PARSER_CONTENT; 10665 xmlParseElement(ctxt); 10666 ctxt->instate = XML_PARSER_EPILOG; 10667 10668 10669 /* 10670 * The Misc part at the end 10671 */ 10672 xmlParseMisc(ctxt); 10673 10674 if (RAW != 0) { 10675 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10676 } 10677 ctxt->instate = XML_PARSER_EOF; 10678 } 10679 10680 /* 10681 * SAX: end of the document processing. 10682 */ 10683 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10684 ctxt->sax->endDocument(ctxt->userData); 10685 10686 /* 10687 * Remove locally kept entity definitions if the tree was not built 10688 */ 10689 if ((ctxt->myDoc != NULL) && 10690 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10691 xmlFreeDoc(ctxt->myDoc); 10692 ctxt->myDoc = NULL; 10693 } 10694 10695 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10696 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10697 if (ctxt->valid) 10698 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10699 if (ctxt->nsWellFormed) 10700 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10701 if (ctxt->options & XML_PARSE_OLD10) 10702 ctxt->myDoc->properties |= XML_DOC_OLD10; 10703 } 10704 if (! ctxt->wellFormed) { 10705 ctxt->valid = 0; 10706 return(-1); 10707 } 10708 return(0); 10709} 10710 10711/** 10712 * xmlParseExtParsedEnt: 10713 * @ctxt: an XML parser context 10714 * 10715 * parse a general parsed entity 10716 * An external general parsed entity is well-formed if it matches the 10717 * production labeled extParsedEnt. 10718 * 10719 * [78] extParsedEnt ::= TextDecl? content 10720 * 10721 * Returns 0, -1 in case of error. the parser context is augmented 10722 * as a result of the parsing. 10723 */ 10724 10725int 10726xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10727 xmlChar start[4]; 10728 xmlCharEncoding enc; 10729 10730 if ((ctxt == NULL) || (ctxt->input == NULL)) 10731 return(-1); 10732 10733 xmlDefaultSAXHandlerInit(); 10734 10735 xmlDetectSAX2(ctxt); 10736 10737 GROW; 10738 10739 /* 10740 * SAX: beginning of the document processing. 10741 */ 10742 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10743 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10744 10745 /* 10746 * Get the 4 first bytes and decode the charset 10747 * if enc != XML_CHAR_ENCODING_NONE 10748 * plug some encoding conversion routines. 10749 */ 10750 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10751 start[0] = RAW; 10752 start[1] = NXT(1); 10753 start[2] = NXT(2); 10754 start[3] = NXT(3); 10755 enc = xmlDetectCharEncoding(start, 4); 10756 if (enc != XML_CHAR_ENCODING_NONE) { 10757 xmlSwitchEncoding(ctxt, enc); 10758 } 10759 } 10760 10761 10762 if (CUR == 0) { 10763 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10764 } 10765 10766 /* 10767 * Check for the XMLDecl in the Prolog. 10768 */ 10769 GROW; 10770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10771 10772 /* 10773 * Note that we will switch encoding on the fly. 10774 */ 10775 xmlParseXMLDecl(ctxt); 10776 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10777 /* 10778 * The XML REC instructs us to stop parsing right here 10779 */ 10780 return(-1); 10781 } 10782 SKIP_BLANKS; 10783 } else { 10784 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10785 } 10786 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10787 ctxt->sax->startDocument(ctxt->userData); 10788 10789 /* 10790 * Doing validity checking on chunk doesn't make sense 10791 */ 10792 ctxt->instate = XML_PARSER_CONTENT; 10793 ctxt->validate = 0; 10794 ctxt->loadsubset = 0; 10795 ctxt->depth = 0; 10796 10797 xmlParseContent(ctxt); 10798 10799 if ((RAW == '<') && (NXT(1) == '/')) { 10800 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10801 } else if (RAW != 0) { 10802 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10803 } 10804 10805 /* 10806 * SAX: end of the document processing. 10807 */ 10808 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10809 ctxt->sax->endDocument(ctxt->userData); 10810 10811 if (! ctxt->wellFormed) return(-1); 10812 return(0); 10813} 10814 10815#ifdef LIBXML_PUSH_ENABLED 10816/************************************************************************ 10817 * * 10818 * Progressive parsing interfaces * 10819 * * 10820 ************************************************************************/ 10821 10822/** 10823 * xmlParseLookupSequence: 10824 * @ctxt: an XML parser context 10825 * @first: the first char to lookup 10826 * @next: the next char to lookup or zero 10827 * @third: the next char to lookup or zero 10828 * 10829 * Try to find if a sequence (first, next, third) or just (first next) or 10830 * (first) is available in the input stream. 10831 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10832 * to avoid rescanning sequences of bytes, it DOES change the state of the 10833 * parser, do not use liberally. 10834 * 10835 * Returns the index to the current parsing point if the full sequence 10836 * is available, -1 otherwise. 10837 */ 10838static int 10839xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10840 xmlChar next, xmlChar third) { 10841 int base, len; 10842 xmlParserInputPtr in; 10843 const xmlChar *buf; 10844 10845 in = ctxt->input; 10846 if (in == NULL) return(-1); 10847 base = in->cur - in->base; 10848 if (base < 0) return(-1); 10849 if (ctxt->checkIndex > base) 10850 base = ctxt->checkIndex; 10851 if (in->buf == NULL) { 10852 buf = in->base; 10853 len = in->length; 10854 } else { 10855 buf = xmlBufContent(in->buf->buffer); 10856 len = xmlBufUse(in->buf->buffer); 10857 } 10858 /* take into account the sequence length */ 10859 if (third) len -= 2; 10860 else if (next) len --; 10861 for (;base < len;base++) { 10862 if (buf[base] == first) { 10863 if (third != 0) { 10864 if ((buf[base + 1] != next) || 10865 (buf[base + 2] != third)) continue; 10866 } else if (next != 0) { 10867 if (buf[base + 1] != next) continue; 10868 } 10869 ctxt->checkIndex = 0; 10870#ifdef DEBUG_PUSH 10871 if (next == 0) 10872 xmlGenericError(xmlGenericErrorContext, 10873 "PP: lookup '%c' found at %d\n", 10874 first, base); 10875 else if (third == 0) 10876 xmlGenericError(xmlGenericErrorContext, 10877 "PP: lookup '%c%c' found at %d\n", 10878 first, next, base); 10879 else 10880 xmlGenericError(xmlGenericErrorContext, 10881 "PP: lookup '%c%c%c' found at %d\n", 10882 first, next, third, base); 10883#endif 10884 return(base - (in->cur - in->base)); 10885 } 10886 } 10887 ctxt->checkIndex = base; 10888#ifdef DEBUG_PUSH 10889 if (next == 0) 10890 xmlGenericError(xmlGenericErrorContext, 10891 "PP: lookup '%c' failed\n", first); 10892 else if (third == 0) 10893 xmlGenericError(xmlGenericErrorContext, 10894 "PP: lookup '%c%c' failed\n", first, next); 10895 else 10896 xmlGenericError(xmlGenericErrorContext, 10897 "PP: lookup '%c%c%c' failed\n", first, next, third); 10898#endif 10899 return(-1); 10900} 10901 10902/** 10903 * xmlParseGetLasts: 10904 * @ctxt: an XML parser context 10905 * @lastlt: pointer to store the last '<' from the input 10906 * @lastgt: pointer to store the last '>' from the input 10907 * 10908 * Lookup the last < and > in the current chunk 10909 */ 10910static void 10911xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10912 const xmlChar **lastgt) { 10913 const xmlChar *tmp; 10914 10915 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10916 xmlGenericError(xmlGenericErrorContext, 10917 "Internal error: xmlParseGetLasts\n"); 10918 return; 10919 } 10920 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10921 tmp = ctxt->input->end; 10922 tmp--; 10923 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10924 if (tmp < ctxt->input->base) { 10925 *lastlt = NULL; 10926 *lastgt = NULL; 10927 } else { 10928 *lastlt = tmp; 10929 tmp++; 10930 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10931 if (*tmp == '\'') { 10932 tmp++; 10933 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10934 if (tmp < ctxt->input->end) tmp++; 10935 } else if (*tmp == '"') { 10936 tmp++; 10937 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10938 if (tmp < ctxt->input->end) tmp++; 10939 } else 10940 tmp++; 10941 } 10942 if (tmp < ctxt->input->end) 10943 *lastgt = tmp; 10944 else { 10945 tmp = *lastlt; 10946 tmp--; 10947 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10948 if (tmp >= ctxt->input->base) 10949 *lastgt = tmp; 10950 else 10951 *lastgt = NULL; 10952 } 10953 } 10954 } else { 10955 *lastlt = NULL; 10956 *lastgt = NULL; 10957 } 10958} 10959/** 10960 * xmlCheckCdataPush: 10961 * @cur: pointer to the bock of characters 10962 * @len: length of the block in bytes 10963 * 10964 * Check that the block of characters is okay as SCdata content [20] 10965 * 10966 * Returns the number of bytes to pass if okay, a negative index where an 10967 * UTF-8 error occured otherwise 10968 */ 10969static int 10970xmlCheckCdataPush(const xmlChar *utf, int len) { 10971 int ix; 10972 unsigned char c; 10973 int codepoint; 10974 10975 if ((utf == NULL) || (len <= 0)) 10976 return(0); 10977 10978 for (ix = 0; ix < len;) { /* string is 0-terminated */ 10979 c = utf[ix]; 10980 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10981 if (c >= 0x20) 10982 ix++; 10983 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10984 ix++; 10985 else 10986 return(-ix); 10987 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10988 if (ix + 2 > len) return(ix); 10989 if ((utf[ix+1] & 0xc0 ) != 0x80) 10990 return(-ix); 10991 codepoint = (utf[ix] & 0x1f) << 6; 10992 codepoint |= utf[ix+1] & 0x3f; 10993 if (!xmlIsCharQ(codepoint)) 10994 return(-ix); 10995 ix += 2; 10996 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10997 if (ix + 3 > len) return(ix); 10998 if (((utf[ix+1] & 0xc0) != 0x80) || 10999 ((utf[ix+2] & 0xc0) != 0x80)) 11000 return(-ix); 11001 codepoint = (utf[ix] & 0xf) << 12; 11002 codepoint |= (utf[ix+1] & 0x3f) << 6; 11003 codepoint |= utf[ix+2] & 0x3f; 11004 if (!xmlIsCharQ(codepoint)) 11005 return(-ix); 11006 ix += 3; 11007 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11008 if (ix + 4 > len) return(ix); 11009 if (((utf[ix+1] & 0xc0) != 0x80) || 11010 ((utf[ix+2] & 0xc0) != 0x80) || 11011 ((utf[ix+3] & 0xc0) != 0x80)) 11012 return(-ix); 11013 codepoint = (utf[ix] & 0x7) << 18; 11014 codepoint |= (utf[ix+1] & 0x3f) << 12; 11015 codepoint |= (utf[ix+2] & 0x3f) << 6; 11016 codepoint |= utf[ix+3] & 0x3f; 11017 if (!xmlIsCharQ(codepoint)) 11018 return(-ix); 11019 ix += 4; 11020 } else /* unknown encoding */ 11021 return(-ix); 11022 } 11023 return(ix); 11024} 11025 11026/** 11027 * xmlParseTryOrFinish: 11028 * @ctxt: an XML parser context 11029 * @terminate: last chunk indicator 11030 * 11031 * Try to progress on parsing 11032 * 11033 * Returns zero if no parsing was possible 11034 */ 11035static int 11036xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11037 int ret = 0; 11038 int avail, tlen; 11039 xmlChar cur, next; 11040 const xmlChar *lastlt, *lastgt; 11041 11042 if (ctxt->input == NULL) 11043 return(0); 11044 11045#ifdef DEBUG_PUSH 11046 switch (ctxt->instate) { 11047 case XML_PARSER_EOF: 11048 xmlGenericError(xmlGenericErrorContext, 11049 "PP: try EOF\n"); break; 11050 case XML_PARSER_START: 11051 xmlGenericError(xmlGenericErrorContext, 11052 "PP: try START\n"); break; 11053 case XML_PARSER_MISC: 11054 xmlGenericError(xmlGenericErrorContext, 11055 "PP: try MISC\n");break; 11056 case XML_PARSER_COMMENT: 11057 xmlGenericError(xmlGenericErrorContext, 11058 "PP: try COMMENT\n");break; 11059 case XML_PARSER_PROLOG: 11060 xmlGenericError(xmlGenericErrorContext, 11061 "PP: try PROLOG\n");break; 11062 case XML_PARSER_START_TAG: 11063 xmlGenericError(xmlGenericErrorContext, 11064 "PP: try START_TAG\n");break; 11065 case XML_PARSER_CONTENT: 11066 xmlGenericError(xmlGenericErrorContext, 11067 "PP: try CONTENT\n");break; 11068 case XML_PARSER_CDATA_SECTION: 11069 xmlGenericError(xmlGenericErrorContext, 11070 "PP: try CDATA_SECTION\n");break; 11071 case XML_PARSER_END_TAG: 11072 xmlGenericError(xmlGenericErrorContext, 11073 "PP: try END_TAG\n");break; 11074 case XML_PARSER_ENTITY_DECL: 11075 xmlGenericError(xmlGenericErrorContext, 11076 "PP: try ENTITY_DECL\n");break; 11077 case XML_PARSER_ENTITY_VALUE: 11078 xmlGenericError(xmlGenericErrorContext, 11079 "PP: try ENTITY_VALUE\n");break; 11080 case XML_PARSER_ATTRIBUTE_VALUE: 11081 xmlGenericError(xmlGenericErrorContext, 11082 "PP: try ATTRIBUTE_VALUE\n");break; 11083 case XML_PARSER_DTD: 11084 xmlGenericError(xmlGenericErrorContext, 11085 "PP: try DTD\n");break; 11086 case XML_PARSER_EPILOG: 11087 xmlGenericError(xmlGenericErrorContext, 11088 "PP: try EPILOG\n");break; 11089 case XML_PARSER_PI: 11090 xmlGenericError(xmlGenericErrorContext, 11091 "PP: try PI\n");break; 11092 case XML_PARSER_IGNORE: 11093 xmlGenericError(xmlGenericErrorContext, 11094 "PP: try IGNORE\n");break; 11095 } 11096#endif 11097 11098 if ((ctxt->input != NULL) && 11099 (ctxt->input->cur - ctxt->input->base > 4096)) { 11100 xmlSHRINK(ctxt); 11101 ctxt->checkIndex = 0; 11102 } 11103 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11104 11105 while (1) { 11106 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11107 return(0); 11108 11109 11110 /* 11111 * Pop-up of finished entities. 11112 */ 11113 while ((RAW == 0) && (ctxt->inputNr > 1)) 11114 xmlPopInput(ctxt); 11115 11116 if (ctxt->input == NULL) break; 11117 if (ctxt->input->buf == NULL) 11118 avail = ctxt->input->length - 11119 (ctxt->input->cur - ctxt->input->base); 11120 else { 11121 /* 11122 * If we are operating on converted input, try to flush 11123 * remainng chars to avoid them stalling in the non-converted 11124 * buffer. 11125 */ 11126 if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) { 11127 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11128 ctxt->input); 11129 size_t current = ctxt->input->cur - ctxt->input->base; 11130 11131 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11132 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11133 base, current); 11134 } 11135 avail = xmlBufUse(ctxt->input->buf->buffer) - 11136 (ctxt->input->cur - ctxt->input->base); 11137 } 11138 if (avail < 1) 11139 goto done; 11140 switch (ctxt->instate) { 11141 case XML_PARSER_EOF: 11142 /* 11143 * Document parsing is done ! 11144 */ 11145 goto done; 11146 case XML_PARSER_START: 11147 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11148 xmlChar start[4]; 11149 xmlCharEncoding enc; 11150 11151 /* 11152 * Very first chars read from the document flow. 11153 */ 11154 if (avail < 4) 11155 goto done; 11156 11157 /* 11158 * Get the 4 first bytes and decode the charset 11159 * if enc != XML_CHAR_ENCODING_NONE 11160 * plug some encoding conversion routines, 11161 * else xmlSwitchEncoding will set to (default) 11162 * UTF8. 11163 */ 11164 start[0] = RAW; 11165 start[1] = NXT(1); 11166 start[2] = NXT(2); 11167 start[3] = NXT(3); 11168 enc = xmlDetectCharEncoding(start, 4); 11169 xmlSwitchEncoding(ctxt, enc); 11170 break; 11171 } 11172 11173 if (avail < 2) 11174 goto done; 11175 cur = ctxt->input->cur[0]; 11176 next = ctxt->input->cur[1]; 11177 if (cur == 0) { 11178 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11179 ctxt->sax->setDocumentLocator(ctxt->userData, 11180 &xmlDefaultSAXLocator); 11181 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11182 ctxt->instate = XML_PARSER_EOF; 11183#ifdef DEBUG_PUSH 11184 xmlGenericError(xmlGenericErrorContext, 11185 "PP: entering EOF\n"); 11186#endif 11187 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11188 ctxt->sax->endDocument(ctxt->userData); 11189 goto done; 11190 } 11191 if ((cur == '<') && (next == '?')) { 11192 /* PI or XML decl */ 11193 if (avail < 5) return(ret); 11194 if ((!terminate) && 11195 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11196 return(ret); 11197 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11198 ctxt->sax->setDocumentLocator(ctxt->userData, 11199 &xmlDefaultSAXLocator); 11200 if ((ctxt->input->cur[2] == 'x') && 11201 (ctxt->input->cur[3] == 'm') && 11202 (ctxt->input->cur[4] == 'l') && 11203 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11204 ret += 5; 11205#ifdef DEBUG_PUSH 11206 xmlGenericError(xmlGenericErrorContext, 11207 "PP: Parsing XML Decl\n"); 11208#endif 11209 xmlParseXMLDecl(ctxt); 11210 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11211 /* 11212 * The XML REC instructs us to stop parsing right 11213 * here 11214 */ 11215 ctxt->instate = XML_PARSER_EOF; 11216 return(0); 11217 } 11218 ctxt->standalone = ctxt->input->standalone; 11219 if ((ctxt->encoding == NULL) && 11220 (ctxt->input->encoding != NULL)) 11221 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11222 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11223 (!ctxt->disableSAX)) 11224 ctxt->sax->startDocument(ctxt->userData); 11225 ctxt->instate = XML_PARSER_MISC; 11226#ifdef DEBUG_PUSH 11227 xmlGenericError(xmlGenericErrorContext, 11228 "PP: entering MISC\n"); 11229#endif 11230 } else { 11231 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11232 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11233 (!ctxt->disableSAX)) 11234 ctxt->sax->startDocument(ctxt->userData); 11235 ctxt->instate = XML_PARSER_MISC; 11236#ifdef DEBUG_PUSH 11237 xmlGenericError(xmlGenericErrorContext, 11238 "PP: entering MISC\n"); 11239#endif 11240 } 11241 } else { 11242 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11243 ctxt->sax->setDocumentLocator(ctxt->userData, 11244 &xmlDefaultSAXLocator); 11245 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11246 if (ctxt->version == NULL) { 11247 xmlErrMemory(ctxt, NULL); 11248 break; 11249 } 11250 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11251 (!ctxt->disableSAX)) 11252 ctxt->sax->startDocument(ctxt->userData); 11253 ctxt->instate = XML_PARSER_MISC; 11254#ifdef DEBUG_PUSH 11255 xmlGenericError(xmlGenericErrorContext, 11256 "PP: entering MISC\n"); 11257#endif 11258 } 11259 break; 11260 case XML_PARSER_START_TAG: { 11261 const xmlChar *name; 11262 const xmlChar *prefix = NULL; 11263 const xmlChar *URI = NULL; 11264 int nsNr = ctxt->nsNr; 11265 11266 if ((avail < 2) && (ctxt->inputNr == 1)) 11267 goto done; 11268 cur = ctxt->input->cur[0]; 11269 if (cur != '<') { 11270 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11271 ctxt->instate = XML_PARSER_EOF; 11272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11273 ctxt->sax->endDocument(ctxt->userData); 11274 goto done; 11275 } 11276 if (!terminate) { 11277 if (ctxt->progressive) { 11278 /* > can be found unescaped in attribute values */ 11279 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11280 goto done; 11281 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11282 goto done; 11283 } 11284 } 11285 if (ctxt->spaceNr == 0) 11286 spacePush(ctxt, -1); 11287 else if (*ctxt->space == -2) 11288 spacePush(ctxt, -1); 11289 else 11290 spacePush(ctxt, *ctxt->space); 11291#ifdef LIBXML_SAX1_ENABLED 11292 if (ctxt->sax2) 11293#endif /* LIBXML_SAX1_ENABLED */ 11294 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11295#ifdef LIBXML_SAX1_ENABLED 11296 else 11297 name = xmlParseStartTag(ctxt); 11298#endif /* LIBXML_SAX1_ENABLED */ 11299 if (ctxt->instate == XML_PARSER_EOF) 11300 goto done; 11301 if (name == NULL) { 11302 spacePop(ctxt); 11303 ctxt->instate = XML_PARSER_EOF; 11304 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11305 ctxt->sax->endDocument(ctxt->userData); 11306 goto done; 11307 } 11308#ifdef LIBXML_VALID_ENABLED 11309 /* 11310 * [ VC: Root Element Type ] 11311 * The Name in the document type declaration must match 11312 * the element type of the root element. 11313 */ 11314 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11315 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11316 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11317#endif /* LIBXML_VALID_ENABLED */ 11318 11319 /* 11320 * Check for an Empty Element. 11321 */ 11322 if ((RAW == '/') && (NXT(1) == '>')) { 11323 SKIP(2); 11324 11325 if (ctxt->sax2) { 11326 if ((ctxt->sax != NULL) && 11327 (ctxt->sax->endElementNs != NULL) && 11328 (!ctxt->disableSAX)) 11329 ctxt->sax->endElementNs(ctxt->userData, name, 11330 prefix, URI); 11331 if (ctxt->nsNr - nsNr > 0) 11332 nsPop(ctxt, ctxt->nsNr - nsNr); 11333#ifdef LIBXML_SAX1_ENABLED 11334 } else { 11335 if ((ctxt->sax != NULL) && 11336 (ctxt->sax->endElement != NULL) && 11337 (!ctxt->disableSAX)) 11338 ctxt->sax->endElement(ctxt->userData, name); 11339#endif /* LIBXML_SAX1_ENABLED */ 11340 } 11341 spacePop(ctxt); 11342 if (ctxt->nameNr == 0) { 11343 ctxt->instate = XML_PARSER_EPILOG; 11344 } else { 11345 ctxt->instate = XML_PARSER_CONTENT; 11346 } 11347 ctxt->progressive = 1; 11348 break; 11349 } 11350 if (RAW == '>') { 11351 NEXT; 11352 } else { 11353 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11354 "Couldn't find end of Start Tag %s\n", 11355 name); 11356 nodePop(ctxt); 11357 spacePop(ctxt); 11358 } 11359 if (ctxt->sax2) 11360 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11361#ifdef LIBXML_SAX1_ENABLED 11362 else 11363 namePush(ctxt, name); 11364#endif /* LIBXML_SAX1_ENABLED */ 11365 11366 ctxt->instate = XML_PARSER_CONTENT; 11367 ctxt->progressive = 1; 11368 break; 11369 } 11370 case XML_PARSER_CONTENT: { 11371 const xmlChar *test; 11372 unsigned int cons; 11373 if ((avail < 2) && (ctxt->inputNr == 1)) 11374 goto done; 11375 cur = ctxt->input->cur[0]; 11376 next = ctxt->input->cur[1]; 11377 11378 test = CUR_PTR; 11379 cons = ctxt->input->consumed; 11380 if ((cur == '<') && (next == '/')) { 11381 ctxt->instate = XML_PARSER_END_TAG; 11382 break; 11383 } else if ((cur == '<') && (next == '?')) { 11384 if ((!terminate) && 11385 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11386 ctxt->progressive = XML_PARSER_PI; 11387 goto done; 11388 } 11389 xmlParsePI(ctxt); 11390 ctxt->instate = XML_PARSER_CONTENT; 11391 ctxt->progressive = 1; 11392 } else if ((cur == '<') && (next != '!')) { 11393 ctxt->instate = XML_PARSER_START_TAG; 11394 break; 11395 } else if ((cur == '<') && (next == '!') && 11396 (ctxt->input->cur[2] == '-') && 11397 (ctxt->input->cur[3] == '-')) { 11398 int term; 11399 11400 if (avail < 4) 11401 goto done; 11402 ctxt->input->cur += 4; 11403 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11404 ctxt->input->cur -= 4; 11405 if ((!terminate) && (term < 0)) { 11406 ctxt->progressive = XML_PARSER_COMMENT; 11407 goto done; 11408 } 11409 xmlParseComment(ctxt); 11410 ctxt->instate = XML_PARSER_CONTENT; 11411 ctxt->progressive = 1; 11412 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11413 (ctxt->input->cur[2] == '[') && 11414 (ctxt->input->cur[3] == 'C') && 11415 (ctxt->input->cur[4] == 'D') && 11416 (ctxt->input->cur[5] == 'A') && 11417 (ctxt->input->cur[6] == 'T') && 11418 (ctxt->input->cur[7] == 'A') && 11419 (ctxt->input->cur[8] == '[')) { 11420 SKIP(9); 11421 ctxt->instate = XML_PARSER_CDATA_SECTION; 11422 break; 11423 } else if ((cur == '<') && (next == '!') && 11424 (avail < 9)) { 11425 goto done; 11426 } else if (cur == '&') { 11427 if ((!terminate) && 11428 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11429 goto done; 11430 xmlParseReference(ctxt); 11431 } else { 11432 /* TODO Avoid the extra copy, handle directly !!! */ 11433 /* 11434 * Goal of the following test is: 11435 * - minimize calls to the SAX 'character' callback 11436 * when they are mergeable 11437 * - handle an problem for isBlank when we only parse 11438 * a sequence of blank chars and the next one is 11439 * not available to check against '<' presence. 11440 * - tries to homogenize the differences in SAX 11441 * callbacks between the push and pull versions 11442 * of the parser. 11443 */ 11444 if ((ctxt->inputNr == 1) && 11445 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11446 if (!terminate) { 11447 if (ctxt->progressive) { 11448 if ((lastlt == NULL) || 11449 (ctxt->input->cur > lastlt)) 11450 goto done; 11451 } else if (xmlParseLookupSequence(ctxt, 11452 '<', 0, 0) < 0) { 11453 goto done; 11454 } 11455 } 11456 } 11457 ctxt->checkIndex = 0; 11458 xmlParseCharData(ctxt, 0); 11459 } 11460 /* 11461 * Pop-up of finished entities. 11462 */ 11463 while ((RAW == 0) && (ctxt->inputNr > 1)) 11464 xmlPopInput(ctxt); 11465 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11466 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11467 "detected an error in element content\n"); 11468 ctxt->instate = XML_PARSER_EOF; 11469 break; 11470 } 11471 break; 11472 } 11473 case XML_PARSER_END_TAG: 11474 if (avail < 2) 11475 goto done; 11476 if (!terminate) { 11477 if (ctxt->progressive) { 11478 /* > can be found unescaped in attribute values */ 11479 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11480 goto done; 11481 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11482 goto done; 11483 } 11484 } 11485 if (ctxt->sax2) { 11486 xmlParseEndTag2(ctxt, 11487 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11488 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11489 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11490 nameNsPop(ctxt); 11491 } 11492#ifdef LIBXML_SAX1_ENABLED 11493 else 11494 xmlParseEndTag1(ctxt, 0); 11495#endif /* LIBXML_SAX1_ENABLED */ 11496 if (ctxt->instate == XML_PARSER_EOF) { 11497 /* Nothing */ 11498 } else if (ctxt->nameNr == 0) { 11499 ctxt->instate = XML_PARSER_EPILOG; 11500 } else { 11501 ctxt->instate = XML_PARSER_CONTENT; 11502 } 11503 break; 11504 case XML_PARSER_CDATA_SECTION: { 11505 /* 11506 * The Push mode need to have the SAX callback for 11507 * cdataBlock merge back contiguous callbacks. 11508 */ 11509 int base; 11510 11511 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11512 if (base < 0) { 11513 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11514 int tmp; 11515 11516 tmp = xmlCheckCdataPush(ctxt->input->cur, 11517 XML_PARSER_BIG_BUFFER_SIZE); 11518 if (tmp < 0) { 11519 tmp = -tmp; 11520 ctxt->input->cur += tmp; 11521 goto encoding_error; 11522 } 11523 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11524 if (ctxt->sax->cdataBlock != NULL) 11525 ctxt->sax->cdataBlock(ctxt->userData, 11526 ctxt->input->cur, tmp); 11527 else if (ctxt->sax->characters != NULL) 11528 ctxt->sax->characters(ctxt->userData, 11529 ctxt->input->cur, tmp); 11530 } 11531 SKIPL(tmp); 11532 ctxt->checkIndex = 0; 11533 } 11534 goto done; 11535 } else { 11536 int tmp; 11537 11538 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11539 if ((tmp < 0) || (tmp != base)) { 11540 tmp = -tmp; 11541 ctxt->input->cur += tmp; 11542 goto encoding_error; 11543 } 11544 if ((ctxt->sax != NULL) && (base == 0) && 11545 (ctxt->sax->cdataBlock != NULL) && 11546 (!ctxt->disableSAX)) { 11547 /* 11548 * Special case to provide identical behaviour 11549 * between pull and push parsers on enpty CDATA 11550 * sections 11551 */ 11552 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11553 (!strncmp((const char *)&ctxt->input->cur[-9], 11554 "<![CDATA[", 9))) 11555 ctxt->sax->cdataBlock(ctxt->userData, 11556 BAD_CAST "", 0); 11557 } else if ((ctxt->sax != NULL) && (base > 0) && 11558 (!ctxt->disableSAX)) { 11559 if (ctxt->sax->cdataBlock != NULL) 11560 ctxt->sax->cdataBlock(ctxt->userData, 11561 ctxt->input->cur, base); 11562 else if (ctxt->sax->characters != NULL) 11563 ctxt->sax->characters(ctxt->userData, 11564 ctxt->input->cur, base); 11565 } 11566 SKIPL(base + 3); 11567 ctxt->checkIndex = 0; 11568 ctxt->instate = XML_PARSER_CONTENT; 11569#ifdef DEBUG_PUSH 11570 xmlGenericError(xmlGenericErrorContext, 11571 "PP: entering CONTENT\n"); 11572#endif 11573 } 11574 break; 11575 } 11576 case XML_PARSER_MISC: 11577 SKIP_BLANKS; 11578 if (ctxt->input->buf == NULL) 11579 avail = ctxt->input->length - 11580 (ctxt->input->cur - ctxt->input->base); 11581 else 11582 avail = xmlBufUse(ctxt->input->buf->buffer) - 11583 (ctxt->input->cur - ctxt->input->base); 11584 if (avail < 2) 11585 goto done; 11586 cur = ctxt->input->cur[0]; 11587 next = ctxt->input->cur[1]; 11588 if ((cur == '<') && (next == '?')) { 11589 if ((!terminate) && 11590 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11591 ctxt->progressive = XML_PARSER_PI; 11592 goto done; 11593 } 11594#ifdef DEBUG_PUSH 11595 xmlGenericError(xmlGenericErrorContext, 11596 "PP: Parsing PI\n"); 11597#endif 11598 xmlParsePI(ctxt); 11599 ctxt->instate = XML_PARSER_MISC; 11600 ctxt->progressive = 1; 11601 ctxt->checkIndex = 0; 11602 } else if ((cur == '<') && (next == '!') && 11603 (ctxt->input->cur[2] == '-') && 11604 (ctxt->input->cur[3] == '-')) { 11605 if ((!terminate) && 11606 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11607 ctxt->progressive = XML_PARSER_COMMENT; 11608 goto done; 11609 } 11610#ifdef DEBUG_PUSH 11611 xmlGenericError(xmlGenericErrorContext, 11612 "PP: Parsing Comment\n"); 11613#endif 11614 xmlParseComment(ctxt); 11615 ctxt->instate = XML_PARSER_MISC; 11616 ctxt->progressive = 1; 11617 ctxt->checkIndex = 0; 11618 } else if ((cur == '<') && (next == '!') && 11619 (ctxt->input->cur[2] == 'D') && 11620 (ctxt->input->cur[3] == 'O') && 11621 (ctxt->input->cur[4] == 'C') && 11622 (ctxt->input->cur[5] == 'T') && 11623 (ctxt->input->cur[6] == 'Y') && 11624 (ctxt->input->cur[7] == 'P') && 11625 (ctxt->input->cur[8] == 'E')) { 11626 if ((!terminate) && 11627 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11628 ctxt->progressive = XML_PARSER_DTD; 11629 goto done; 11630 } 11631#ifdef DEBUG_PUSH 11632 xmlGenericError(xmlGenericErrorContext, 11633 "PP: Parsing internal subset\n"); 11634#endif 11635 ctxt->inSubset = 1; 11636 ctxt->progressive = 1; 11637 ctxt->checkIndex = 0; 11638 xmlParseDocTypeDecl(ctxt); 11639 if (RAW == '[') { 11640 ctxt->instate = XML_PARSER_DTD; 11641#ifdef DEBUG_PUSH 11642 xmlGenericError(xmlGenericErrorContext, 11643 "PP: entering DTD\n"); 11644#endif 11645 } else { 11646 /* 11647 * Create and update the external subset. 11648 */ 11649 ctxt->inSubset = 2; 11650 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11651 (ctxt->sax->externalSubset != NULL)) 11652 ctxt->sax->externalSubset(ctxt->userData, 11653 ctxt->intSubName, ctxt->extSubSystem, 11654 ctxt->extSubURI); 11655 ctxt->inSubset = 0; 11656 xmlCleanSpecialAttr(ctxt); 11657 ctxt->instate = XML_PARSER_PROLOG; 11658#ifdef DEBUG_PUSH 11659 xmlGenericError(xmlGenericErrorContext, 11660 "PP: entering PROLOG\n"); 11661#endif 11662 } 11663 } else if ((cur == '<') && (next == '!') && 11664 (avail < 9)) { 11665 goto done; 11666 } else { 11667 ctxt->instate = XML_PARSER_START_TAG; 11668 ctxt->progressive = XML_PARSER_START_TAG; 11669 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11670#ifdef DEBUG_PUSH 11671 xmlGenericError(xmlGenericErrorContext, 11672 "PP: entering START_TAG\n"); 11673#endif 11674 } 11675 break; 11676 case XML_PARSER_PROLOG: 11677 SKIP_BLANKS; 11678 if (ctxt->input->buf == NULL) 11679 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11680 else 11681 avail = xmlBufUse(ctxt->input->buf->buffer) - 11682 (ctxt->input->cur - ctxt->input->base); 11683 if (avail < 2) 11684 goto done; 11685 cur = ctxt->input->cur[0]; 11686 next = ctxt->input->cur[1]; 11687 if ((cur == '<') && (next == '?')) { 11688 if ((!terminate) && 11689 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11690 ctxt->progressive = XML_PARSER_PI; 11691 goto done; 11692 } 11693#ifdef DEBUG_PUSH 11694 xmlGenericError(xmlGenericErrorContext, 11695 "PP: Parsing PI\n"); 11696#endif 11697 xmlParsePI(ctxt); 11698 ctxt->instate = XML_PARSER_PROLOG; 11699 ctxt->progressive = 1; 11700 } else if ((cur == '<') && (next == '!') && 11701 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11702 if ((!terminate) && 11703 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11704 ctxt->progressive = XML_PARSER_COMMENT; 11705 goto done; 11706 } 11707#ifdef DEBUG_PUSH 11708 xmlGenericError(xmlGenericErrorContext, 11709 "PP: Parsing Comment\n"); 11710#endif 11711 xmlParseComment(ctxt); 11712 ctxt->instate = XML_PARSER_PROLOG; 11713 ctxt->progressive = 1; 11714 } else if ((cur == '<') && (next == '!') && 11715 (avail < 4)) { 11716 goto done; 11717 } else { 11718 ctxt->instate = XML_PARSER_START_TAG; 11719 if (ctxt->progressive == 0) 11720 ctxt->progressive = XML_PARSER_START_TAG; 11721 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11722#ifdef DEBUG_PUSH 11723 xmlGenericError(xmlGenericErrorContext, 11724 "PP: entering START_TAG\n"); 11725#endif 11726 } 11727 break; 11728 case XML_PARSER_EPILOG: 11729 SKIP_BLANKS; 11730 if (ctxt->input->buf == NULL) 11731 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11732 else 11733 avail = xmlBufUse(ctxt->input->buf->buffer) - 11734 (ctxt->input->cur - ctxt->input->base); 11735 if (avail < 2) 11736 goto done; 11737 cur = ctxt->input->cur[0]; 11738 next = ctxt->input->cur[1]; 11739 if ((cur == '<') && (next == '?')) { 11740 if ((!terminate) && 11741 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11742 ctxt->progressive = XML_PARSER_PI; 11743 goto done; 11744 } 11745#ifdef DEBUG_PUSH 11746 xmlGenericError(xmlGenericErrorContext, 11747 "PP: Parsing PI\n"); 11748#endif 11749 xmlParsePI(ctxt); 11750 ctxt->instate = XML_PARSER_EPILOG; 11751 ctxt->progressive = 1; 11752 } else if ((cur == '<') && (next == '!') && 11753 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11754 if ((!terminate) && 11755 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11756 ctxt->progressive = XML_PARSER_COMMENT; 11757 goto done; 11758 } 11759#ifdef DEBUG_PUSH 11760 xmlGenericError(xmlGenericErrorContext, 11761 "PP: Parsing Comment\n"); 11762#endif 11763 xmlParseComment(ctxt); 11764 ctxt->instate = XML_PARSER_EPILOG; 11765 ctxt->progressive = 1; 11766 } else if ((cur == '<') && (next == '!') && 11767 (avail < 4)) { 11768 goto done; 11769 } else { 11770 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11771 ctxt->instate = XML_PARSER_EOF; 11772#ifdef DEBUG_PUSH 11773 xmlGenericError(xmlGenericErrorContext, 11774 "PP: entering EOF\n"); 11775#endif 11776 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11777 ctxt->sax->endDocument(ctxt->userData); 11778 goto done; 11779 } 11780 break; 11781 case XML_PARSER_DTD: { 11782 /* 11783 * Sorry but progressive parsing of the internal subset 11784 * is not expected to be supported. We first check that 11785 * the full content of the internal subset is available and 11786 * the parsing is launched only at that point. 11787 * Internal subset ends up with "']' S? '>'" in an unescaped 11788 * section and not in a ']]>' sequence which are conditional 11789 * sections (whoever argued to keep that crap in XML deserve 11790 * a place in hell !). 11791 */ 11792 int base, i; 11793 xmlChar *buf; 11794 xmlChar quote = 0; 11795 size_t use; 11796 11797 base = ctxt->input->cur - ctxt->input->base; 11798 if (base < 0) return(0); 11799 if (ctxt->checkIndex > base) 11800 base = ctxt->checkIndex; 11801 buf = xmlBufContent(ctxt->input->buf->buffer); 11802 use = xmlBufUse(ctxt->input->buf->buffer); 11803 for (;(unsigned int) base < use; base++) { 11804 if (quote != 0) { 11805 if (buf[base] == quote) 11806 quote = 0; 11807 continue; 11808 } 11809 if ((quote == 0) && (buf[base] == '<')) { 11810 int found = 0; 11811 /* special handling of comments */ 11812 if (((unsigned int) base + 4 < use) && 11813 (buf[base + 1] == '!') && 11814 (buf[base + 2] == '-') && 11815 (buf[base + 3] == '-')) { 11816 for (;(unsigned int) base + 3 < use; base++) { 11817 if ((buf[base] == '-') && 11818 (buf[base + 1] == '-') && 11819 (buf[base + 2] == '>')) { 11820 found = 1; 11821 base += 2; 11822 break; 11823 } 11824 } 11825 if (!found) { 11826#if 0 11827 fprintf(stderr, "unfinished comment\n"); 11828#endif 11829 break; /* for */ 11830 } 11831 continue; 11832 } 11833 } 11834 if (buf[base] == '"') { 11835 quote = '"'; 11836 continue; 11837 } 11838 if (buf[base] == '\'') { 11839 quote = '\''; 11840 continue; 11841 } 11842 if (buf[base] == ']') { 11843#if 0 11844 fprintf(stderr, "%c%c%c%c: ", buf[base], 11845 buf[base + 1], buf[base + 2], buf[base + 3]); 11846#endif 11847 if ((unsigned int) base +1 >= use) 11848 break; 11849 if (buf[base + 1] == ']') { 11850 /* conditional crap, skip both ']' ! */ 11851 base++; 11852 continue; 11853 } 11854 for (i = 1; (unsigned int) base + i < use; i++) { 11855 if (buf[base + i] == '>') { 11856#if 0 11857 fprintf(stderr, "found\n"); 11858#endif 11859 goto found_end_int_subset; 11860 } 11861 if (!IS_BLANK_CH(buf[base + i])) { 11862#if 0 11863 fprintf(stderr, "not found\n"); 11864#endif 11865 goto not_end_of_int_subset; 11866 } 11867 } 11868#if 0 11869 fprintf(stderr, "end of stream\n"); 11870#endif 11871 break; 11872 11873 } 11874not_end_of_int_subset: 11875 continue; /* for */ 11876 } 11877 /* 11878 * We didn't found the end of the Internal subset 11879 */ 11880 if (quote == 0) 11881 ctxt->checkIndex = base; 11882 else 11883 ctxt->checkIndex = 0; 11884#ifdef DEBUG_PUSH 11885 if (next == 0) 11886 xmlGenericError(xmlGenericErrorContext, 11887 "PP: lookup of int subset end filed\n"); 11888#endif 11889 goto done; 11890 11891found_end_int_subset: 11892 ctxt->checkIndex = 0; 11893 xmlParseInternalSubset(ctxt); 11894 ctxt->inSubset = 2; 11895 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11896 (ctxt->sax->externalSubset != NULL)) 11897 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11898 ctxt->extSubSystem, ctxt->extSubURI); 11899 ctxt->inSubset = 0; 11900 xmlCleanSpecialAttr(ctxt); 11901 ctxt->instate = XML_PARSER_PROLOG; 11902 ctxt->checkIndex = 0; 11903#ifdef DEBUG_PUSH 11904 xmlGenericError(xmlGenericErrorContext, 11905 "PP: entering PROLOG\n"); 11906#endif 11907 break; 11908 } 11909 case XML_PARSER_COMMENT: 11910 xmlGenericError(xmlGenericErrorContext, 11911 "PP: internal error, state == COMMENT\n"); 11912 ctxt->instate = XML_PARSER_CONTENT; 11913#ifdef DEBUG_PUSH 11914 xmlGenericError(xmlGenericErrorContext, 11915 "PP: entering CONTENT\n"); 11916#endif 11917 break; 11918 case XML_PARSER_IGNORE: 11919 xmlGenericError(xmlGenericErrorContext, 11920 "PP: internal error, state == IGNORE"); 11921 ctxt->instate = XML_PARSER_DTD; 11922#ifdef DEBUG_PUSH 11923 xmlGenericError(xmlGenericErrorContext, 11924 "PP: entering DTD\n"); 11925#endif 11926 break; 11927 case XML_PARSER_PI: 11928 xmlGenericError(xmlGenericErrorContext, 11929 "PP: internal error, state == PI\n"); 11930 ctxt->instate = XML_PARSER_CONTENT; 11931#ifdef DEBUG_PUSH 11932 xmlGenericError(xmlGenericErrorContext, 11933 "PP: entering CONTENT\n"); 11934#endif 11935 break; 11936 case XML_PARSER_ENTITY_DECL: 11937 xmlGenericError(xmlGenericErrorContext, 11938 "PP: internal error, state == ENTITY_DECL\n"); 11939 ctxt->instate = XML_PARSER_DTD; 11940#ifdef DEBUG_PUSH 11941 xmlGenericError(xmlGenericErrorContext, 11942 "PP: entering DTD\n"); 11943#endif 11944 break; 11945 case XML_PARSER_ENTITY_VALUE: 11946 xmlGenericError(xmlGenericErrorContext, 11947 "PP: internal error, state == ENTITY_VALUE\n"); 11948 ctxt->instate = XML_PARSER_CONTENT; 11949#ifdef DEBUG_PUSH 11950 xmlGenericError(xmlGenericErrorContext, 11951 "PP: entering DTD\n"); 11952#endif 11953 break; 11954 case XML_PARSER_ATTRIBUTE_VALUE: 11955 xmlGenericError(xmlGenericErrorContext, 11956 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11957 ctxt->instate = XML_PARSER_START_TAG; 11958#ifdef DEBUG_PUSH 11959 xmlGenericError(xmlGenericErrorContext, 11960 "PP: entering START_TAG\n"); 11961#endif 11962 break; 11963 case XML_PARSER_SYSTEM_LITERAL: 11964 xmlGenericError(xmlGenericErrorContext, 11965 "PP: internal error, state == SYSTEM_LITERAL\n"); 11966 ctxt->instate = XML_PARSER_START_TAG; 11967#ifdef DEBUG_PUSH 11968 xmlGenericError(xmlGenericErrorContext, 11969 "PP: entering START_TAG\n"); 11970#endif 11971 break; 11972 case XML_PARSER_PUBLIC_LITERAL: 11973 xmlGenericError(xmlGenericErrorContext, 11974 "PP: internal error, state == PUBLIC_LITERAL\n"); 11975 ctxt->instate = XML_PARSER_START_TAG; 11976#ifdef DEBUG_PUSH 11977 xmlGenericError(xmlGenericErrorContext, 11978 "PP: entering START_TAG\n"); 11979#endif 11980 break; 11981 } 11982 } 11983done: 11984#ifdef DEBUG_PUSH 11985 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11986#endif 11987 return(ret); 11988encoding_error: 11989 { 11990 char buffer[150]; 11991 11992 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11993 ctxt->input->cur[0], ctxt->input->cur[1], 11994 ctxt->input->cur[2], ctxt->input->cur[3]); 11995 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11996 "Input is not proper UTF-8, indicate encoding !\n%s", 11997 BAD_CAST buffer, NULL); 11998 } 11999 return(0); 12000} 12001 12002/** 12003 * xmlParseCheckTransition: 12004 * @ctxt: an XML parser context 12005 * @chunk: a char array 12006 * @size: the size in byte of the chunk 12007 * 12008 * Check depending on the current parser state if the chunk given must be 12009 * processed immediately or one need more data to advance on parsing. 12010 * 12011 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12012 */ 12013static int 12014xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12015 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12016 return(-1); 12017 if (ctxt->instate == XML_PARSER_START_TAG) { 12018 if (memchr(chunk, '>', size) != NULL) 12019 return(1); 12020 return(0); 12021 } 12022 if (ctxt->progressive == XML_PARSER_COMMENT) { 12023 if (memchr(chunk, '>', size) != NULL) 12024 return(1); 12025 return(0); 12026 } 12027 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12028 if (memchr(chunk, '>', size) != NULL) 12029 return(1); 12030 return(0); 12031 } 12032 if (ctxt->progressive == XML_PARSER_PI) { 12033 if (memchr(chunk, '>', size) != NULL) 12034 return(1); 12035 return(0); 12036 } 12037 if (ctxt->instate == XML_PARSER_END_TAG) { 12038 if (memchr(chunk, '>', size) != NULL) 12039 return(1); 12040 return(0); 12041 } 12042 if ((ctxt->progressive == XML_PARSER_DTD) || 12043 (ctxt->instate == XML_PARSER_DTD)) { 12044 if (memchr(chunk, ']', size) != NULL) 12045 return(1); 12046 return(0); 12047 } 12048 return(1); 12049} 12050 12051/** 12052 * xmlParseChunk: 12053 * @ctxt: an XML parser context 12054 * @chunk: an char array 12055 * @size: the size in byte of the chunk 12056 * @terminate: last chunk indicator 12057 * 12058 * Parse a Chunk of memory 12059 * 12060 * Returns zero if no error, the xmlParserErrors otherwise. 12061 */ 12062int 12063xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12064 int terminate) { 12065 int end_in_lf = 0; 12066 int remain = 0; 12067 size_t old_avail = 0; 12068 size_t avail = 0; 12069 12070 if (ctxt == NULL) 12071 return(XML_ERR_INTERNAL_ERROR); 12072 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12073 return(ctxt->errNo); 12074 if (ctxt->instate == XML_PARSER_EOF) 12075 return(-1); 12076 if (ctxt->instate == XML_PARSER_START) 12077 xmlDetectSAX2(ctxt); 12078 if ((size > 0) && (chunk != NULL) && (!terminate) && 12079 (chunk[size - 1] == '\r')) { 12080 end_in_lf = 1; 12081 size--; 12082 } 12083 12084xmldecl_done: 12085 12086 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12087 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12088 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12089 size_t cur = ctxt->input->cur - ctxt->input->base; 12090 int res; 12091 12092 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12093 /* 12094 * Specific handling if we autodetected an encoding, we should not 12095 * push more than the first line ... which depend on the encoding 12096 * And only push the rest once the final encoding was detected 12097 */ 12098 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12099 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12100 unsigned int len = 45; 12101 12102 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12103 BAD_CAST "UTF-16")) || 12104 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12105 BAD_CAST "UTF16"))) 12106 len = 90; 12107 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12108 BAD_CAST "UCS-4")) || 12109 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12110 BAD_CAST "UCS4"))) 12111 len = 180; 12112 12113 if (ctxt->input->buf->rawconsumed < len) 12114 len -= ctxt->input->buf->rawconsumed; 12115 12116 /* 12117 * Change size for reading the initial declaration only 12118 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12119 * will blindly copy extra bytes from memory. 12120 */ 12121 if ((unsigned int) size > len) { 12122 remain = size - len; 12123 size = len; 12124 } else { 12125 remain = 0; 12126 } 12127 } 12128 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12129 if (res < 0) { 12130 ctxt->errNo = XML_PARSER_EOF; 12131 ctxt->disableSAX = 1; 12132 return (XML_PARSER_EOF); 12133 } 12134 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12135#ifdef DEBUG_PUSH 12136 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12137#endif 12138 12139 } else if (ctxt->instate != XML_PARSER_EOF) { 12140 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12141 xmlParserInputBufferPtr in = ctxt->input->buf; 12142 if ((in->encoder != NULL) && (in->buffer != NULL) && 12143 (in->raw != NULL)) { 12144 int nbchars; 12145 12146 nbchars = xmlCharEncInput(in); 12147 if (nbchars < 0) { 12148 /* TODO 2.6.0 */ 12149 xmlGenericError(xmlGenericErrorContext, 12150 "xmlParseChunk: encoder error\n"); 12151 return(XML_ERR_INVALID_ENCODING); 12152 } 12153 } 12154 } 12155 } 12156 if (remain != 0) { 12157 xmlParseTryOrFinish(ctxt, 0); 12158 } else { 12159 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12160 avail = xmlBufUse(ctxt->input->buf->buffer); 12161 /* 12162 * Depending on the current state it may not be such 12163 * a good idea to try parsing if there is nothing in the chunk 12164 * which would be worth doing a parser state transition and we 12165 * need to wait for more data 12166 */ 12167 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12168 (old_avail == 0) || (avail == 0) || 12169 (xmlParseCheckTransition(ctxt, 12170 (const char *)&ctxt->input->base[old_avail], 12171 avail - old_avail))) 12172 xmlParseTryOrFinish(ctxt, terminate); 12173 } 12174 if ((ctxt->input != NULL) && 12175 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12176 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12177 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12178 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12179 ctxt->instate = XML_PARSER_EOF; 12180 } 12181 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12182 return(ctxt->errNo); 12183 12184 if (remain != 0) { 12185 chunk += size; 12186 size = remain; 12187 remain = 0; 12188 goto xmldecl_done; 12189 } 12190 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12191 (ctxt->input->buf != NULL)) { 12192 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12193 } 12194 if (terminate) { 12195 /* 12196 * Check for termination 12197 */ 12198 int cur_avail = 0; 12199 12200 if (ctxt->input != NULL) { 12201 if (ctxt->input->buf == NULL) 12202 cur_avail = ctxt->input->length - 12203 (ctxt->input->cur - ctxt->input->base); 12204 else 12205 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12206 (ctxt->input->cur - ctxt->input->base); 12207 } 12208 12209 if ((ctxt->instate != XML_PARSER_EOF) && 12210 (ctxt->instate != XML_PARSER_EPILOG)) { 12211 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12212 } 12213 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12215 } 12216 if (ctxt->instate != XML_PARSER_EOF) { 12217 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12218 ctxt->sax->endDocument(ctxt->userData); 12219 } 12220 ctxt->instate = XML_PARSER_EOF; 12221 } 12222 return((xmlParserErrors) ctxt->errNo); 12223} 12224 12225/************************************************************************ 12226 * * 12227 * I/O front end functions to the parser * 12228 * * 12229 ************************************************************************/ 12230 12231/** 12232 * xmlCreatePushParserCtxt: 12233 * @sax: a SAX handler 12234 * @user_data: The user data returned on SAX callbacks 12235 * @chunk: a pointer to an array of chars 12236 * @size: number of chars in the array 12237 * @filename: an optional file name or URI 12238 * 12239 * Create a parser context for using the XML parser in push mode. 12240 * If @buffer and @size are non-NULL, the data is used to detect 12241 * the encoding. The remaining characters will be parsed so they 12242 * don't need to be fed in again through xmlParseChunk. 12243 * To allow content encoding detection, @size should be >= 4 12244 * The value of @filename is used for fetching external entities 12245 * and error/warning reports. 12246 * 12247 * Returns the new parser context or NULL 12248 */ 12249 12250xmlParserCtxtPtr 12251xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12252 const char *chunk, int size, const char *filename) { 12253 xmlParserCtxtPtr ctxt; 12254 xmlParserInputPtr inputStream; 12255 xmlParserInputBufferPtr buf; 12256 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12257 12258 /* 12259 * plug some encoding conversion routines 12260 */ 12261 if ((chunk != NULL) && (size >= 4)) 12262 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12263 12264 buf = xmlAllocParserInputBuffer(enc); 12265 if (buf == NULL) return(NULL); 12266 12267 ctxt = xmlNewParserCtxt(); 12268 if (ctxt == NULL) { 12269 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12270 xmlFreeParserInputBuffer(buf); 12271 return(NULL); 12272 } 12273 ctxt->dictNames = 1; 12274 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12275 if (ctxt->pushTab == NULL) { 12276 xmlErrMemory(ctxt, NULL); 12277 xmlFreeParserInputBuffer(buf); 12278 xmlFreeParserCtxt(ctxt); 12279 return(NULL); 12280 } 12281 if (sax != NULL) { 12282#ifdef LIBXML_SAX1_ENABLED 12283 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12284#endif /* LIBXML_SAX1_ENABLED */ 12285 xmlFree(ctxt->sax); 12286 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12287 if (ctxt->sax == NULL) { 12288 xmlErrMemory(ctxt, NULL); 12289 xmlFreeParserInputBuffer(buf); 12290 xmlFreeParserCtxt(ctxt); 12291 return(NULL); 12292 } 12293 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12294 if (sax->initialized == XML_SAX2_MAGIC) 12295 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12296 else 12297 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12298 if (user_data != NULL) 12299 ctxt->userData = user_data; 12300 } 12301 if (filename == NULL) { 12302 ctxt->directory = NULL; 12303 } else { 12304 ctxt->directory = xmlParserGetDirectory(filename); 12305 } 12306 12307 inputStream = xmlNewInputStream(ctxt); 12308 if (inputStream == NULL) { 12309 xmlFreeParserCtxt(ctxt); 12310 xmlFreeParserInputBuffer(buf); 12311 return(NULL); 12312 } 12313 12314 if (filename == NULL) 12315 inputStream->filename = NULL; 12316 else { 12317 inputStream->filename = (char *) 12318 xmlCanonicPath((const xmlChar *) filename); 12319 if (inputStream->filename == NULL) { 12320 xmlFreeParserCtxt(ctxt); 12321 xmlFreeParserInputBuffer(buf); 12322 return(NULL); 12323 } 12324 } 12325 inputStream->buf = buf; 12326 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12327 inputPush(ctxt, inputStream); 12328 12329 /* 12330 * If the caller didn't provide an initial 'chunk' for determining 12331 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12332 * that it can be automatically determined later 12333 */ 12334 if ((size == 0) || (chunk == NULL)) { 12335 ctxt->charset = XML_CHAR_ENCODING_NONE; 12336 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12337 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12338 size_t cur = ctxt->input->cur - ctxt->input->base; 12339 12340 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12341 12342 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12343#ifdef DEBUG_PUSH 12344 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12345#endif 12346 } 12347 12348 if (enc != XML_CHAR_ENCODING_NONE) { 12349 xmlSwitchEncoding(ctxt, enc); 12350 } 12351 12352 return(ctxt); 12353} 12354#endif /* LIBXML_PUSH_ENABLED */ 12355 12356/** 12357 * xmlStopParser: 12358 * @ctxt: an XML parser context 12359 * 12360 * Blocks further parser processing 12361 */ 12362void 12363xmlStopParser(xmlParserCtxtPtr ctxt) { 12364 if (ctxt == NULL) 12365 return; 12366 ctxt->instate = XML_PARSER_EOF; 12367 ctxt->disableSAX = 1; 12368 if (ctxt->input != NULL) { 12369 ctxt->input->cur = BAD_CAST""; 12370 ctxt->input->base = ctxt->input->cur; 12371 } 12372} 12373 12374/** 12375 * xmlCreateIOParserCtxt: 12376 * @sax: a SAX handler 12377 * @user_data: The user data returned on SAX callbacks 12378 * @ioread: an I/O read function 12379 * @ioclose: an I/O close function 12380 * @ioctx: an I/O handler 12381 * @enc: the charset encoding if known 12382 * 12383 * Create a parser context for using the XML parser with an existing 12384 * I/O stream 12385 * 12386 * Returns the new parser context or NULL 12387 */ 12388xmlParserCtxtPtr 12389xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12390 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12391 void *ioctx, xmlCharEncoding enc) { 12392 xmlParserCtxtPtr ctxt; 12393 xmlParserInputPtr inputStream; 12394 xmlParserInputBufferPtr buf; 12395 12396 if (ioread == NULL) return(NULL); 12397 12398 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12399 if (buf == NULL) { 12400 if (ioclose != NULL) 12401 ioclose(ioctx); 12402 return (NULL); 12403 } 12404 12405 ctxt = xmlNewParserCtxt(); 12406 if (ctxt == NULL) { 12407 xmlFreeParserInputBuffer(buf); 12408 return(NULL); 12409 } 12410 if (sax != NULL) { 12411#ifdef LIBXML_SAX1_ENABLED 12412 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12413#endif /* LIBXML_SAX1_ENABLED */ 12414 xmlFree(ctxt->sax); 12415 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12416 if (ctxt->sax == NULL) { 12417 xmlErrMemory(ctxt, NULL); 12418 xmlFreeParserCtxt(ctxt); 12419 return(NULL); 12420 } 12421 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12422 if (sax->initialized == XML_SAX2_MAGIC) 12423 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12424 else 12425 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12426 if (user_data != NULL) 12427 ctxt->userData = user_data; 12428 } 12429 12430 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12431 if (inputStream == NULL) { 12432 xmlFreeParserCtxt(ctxt); 12433 return(NULL); 12434 } 12435 inputPush(ctxt, inputStream); 12436 12437 return(ctxt); 12438} 12439 12440#ifdef LIBXML_VALID_ENABLED 12441/************************************************************************ 12442 * * 12443 * Front ends when parsing a DTD * 12444 * * 12445 ************************************************************************/ 12446 12447/** 12448 * xmlIOParseDTD: 12449 * @sax: the SAX handler block or NULL 12450 * @input: an Input Buffer 12451 * @enc: the charset encoding if known 12452 * 12453 * Load and parse a DTD 12454 * 12455 * Returns the resulting xmlDtdPtr or NULL in case of error. 12456 * @input will be freed by the function in any case. 12457 */ 12458 12459xmlDtdPtr 12460xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12461 xmlCharEncoding enc) { 12462 xmlDtdPtr ret = NULL; 12463 xmlParserCtxtPtr ctxt; 12464 xmlParserInputPtr pinput = NULL; 12465 xmlChar start[4]; 12466 12467 if (input == NULL) 12468 return(NULL); 12469 12470 ctxt = xmlNewParserCtxt(); 12471 if (ctxt == NULL) { 12472 xmlFreeParserInputBuffer(input); 12473 return(NULL); 12474 } 12475 12476 /* 12477 * Set-up the SAX context 12478 */ 12479 if (sax != NULL) { 12480 if (ctxt->sax != NULL) 12481 xmlFree(ctxt->sax); 12482 ctxt->sax = sax; 12483 ctxt->userData = ctxt; 12484 } 12485 xmlDetectSAX2(ctxt); 12486 12487 /* 12488 * generate a parser input from the I/O handler 12489 */ 12490 12491 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12492 if (pinput == NULL) { 12493 if (sax != NULL) ctxt->sax = NULL; 12494 xmlFreeParserInputBuffer(input); 12495 xmlFreeParserCtxt(ctxt); 12496 return(NULL); 12497 } 12498 12499 /* 12500 * plug some encoding conversion routines here. 12501 */ 12502 if (xmlPushInput(ctxt, pinput) < 0) { 12503 if (sax != NULL) ctxt->sax = NULL; 12504 xmlFreeParserCtxt(ctxt); 12505 return(NULL); 12506 } 12507 if (enc != XML_CHAR_ENCODING_NONE) { 12508 xmlSwitchEncoding(ctxt, enc); 12509 } 12510 12511 pinput->filename = NULL; 12512 pinput->line = 1; 12513 pinput->col = 1; 12514 pinput->base = ctxt->input->cur; 12515 pinput->cur = ctxt->input->cur; 12516 pinput->free = NULL; 12517 12518 /* 12519 * let's parse that entity knowing it's an external subset. 12520 */ 12521 ctxt->inSubset = 2; 12522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12523 if (ctxt->myDoc == NULL) { 12524 xmlErrMemory(ctxt, "New Doc failed"); 12525 return(NULL); 12526 } 12527 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12528 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12529 BAD_CAST "none", BAD_CAST "none"); 12530 12531 if ((enc == XML_CHAR_ENCODING_NONE) && 12532 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12533 /* 12534 * Get the 4 first bytes and decode the charset 12535 * if enc != XML_CHAR_ENCODING_NONE 12536 * plug some encoding conversion routines. 12537 */ 12538 start[0] = RAW; 12539 start[1] = NXT(1); 12540 start[2] = NXT(2); 12541 start[3] = NXT(3); 12542 enc = xmlDetectCharEncoding(start, 4); 12543 if (enc != XML_CHAR_ENCODING_NONE) { 12544 xmlSwitchEncoding(ctxt, enc); 12545 } 12546 } 12547 12548 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12549 12550 if (ctxt->myDoc != NULL) { 12551 if (ctxt->wellFormed) { 12552 ret = ctxt->myDoc->extSubset; 12553 ctxt->myDoc->extSubset = NULL; 12554 if (ret != NULL) { 12555 xmlNodePtr tmp; 12556 12557 ret->doc = NULL; 12558 tmp = ret->children; 12559 while (tmp != NULL) { 12560 tmp->doc = NULL; 12561 tmp = tmp->next; 12562 } 12563 } 12564 } else { 12565 ret = NULL; 12566 } 12567 xmlFreeDoc(ctxt->myDoc); 12568 ctxt->myDoc = NULL; 12569 } 12570 if (sax != NULL) ctxt->sax = NULL; 12571 xmlFreeParserCtxt(ctxt); 12572 12573 return(ret); 12574} 12575 12576/** 12577 * xmlSAXParseDTD: 12578 * @sax: the SAX handler block 12579 * @ExternalID: a NAME* containing the External ID of the DTD 12580 * @SystemID: a NAME* containing the URL to the DTD 12581 * 12582 * Load and parse an external subset. 12583 * 12584 * Returns the resulting xmlDtdPtr or NULL in case of error. 12585 */ 12586 12587xmlDtdPtr 12588xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12589 const xmlChar *SystemID) { 12590 xmlDtdPtr ret = NULL; 12591 xmlParserCtxtPtr ctxt; 12592 xmlParserInputPtr input = NULL; 12593 xmlCharEncoding enc; 12594 xmlChar* systemIdCanonic; 12595 12596 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12597 12598 ctxt = xmlNewParserCtxt(); 12599 if (ctxt == NULL) { 12600 return(NULL); 12601 } 12602 12603 /* 12604 * Set-up the SAX context 12605 */ 12606 if (sax != NULL) { 12607 if (ctxt->sax != NULL) 12608 xmlFree(ctxt->sax); 12609 ctxt->sax = sax; 12610 ctxt->userData = ctxt; 12611 } 12612 12613 /* 12614 * Canonicalise the system ID 12615 */ 12616 systemIdCanonic = xmlCanonicPath(SystemID); 12617 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12618 xmlFreeParserCtxt(ctxt); 12619 return(NULL); 12620 } 12621 12622 /* 12623 * Ask the Entity resolver to load the damn thing 12624 */ 12625 12626 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12627 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12628 systemIdCanonic); 12629 if (input == NULL) { 12630 if (sax != NULL) ctxt->sax = NULL; 12631 xmlFreeParserCtxt(ctxt); 12632 if (systemIdCanonic != NULL) 12633 xmlFree(systemIdCanonic); 12634 return(NULL); 12635 } 12636 12637 /* 12638 * plug some encoding conversion routines here. 12639 */ 12640 if (xmlPushInput(ctxt, input) < 0) { 12641 if (sax != NULL) ctxt->sax = NULL; 12642 xmlFreeParserCtxt(ctxt); 12643 if (systemIdCanonic != NULL) 12644 xmlFree(systemIdCanonic); 12645 return(NULL); 12646 } 12647 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12648 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12649 xmlSwitchEncoding(ctxt, enc); 12650 } 12651 12652 if (input->filename == NULL) 12653 input->filename = (char *) systemIdCanonic; 12654 else 12655 xmlFree(systemIdCanonic); 12656 input->line = 1; 12657 input->col = 1; 12658 input->base = ctxt->input->cur; 12659 input->cur = ctxt->input->cur; 12660 input->free = NULL; 12661 12662 /* 12663 * let's parse that entity knowing it's an external subset. 12664 */ 12665 ctxt->inSubset = 2; 12666 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12667 if (ctxt->myDoc == NULL) { 12668 xmlErrMemory(ctxt, "New Doc failed"); 12669 if (sax != NULL) ctxt->sax = NULL; 12670 xmlFreeParserCtxt(ctxt); 12671 return(NULL); 12672 } 12673 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12674 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12675 ExternalID, SystemID); 12676 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12677 12678 if (ctxt->myDoc != NULL) { 12679 if (ctxt->wellFormed) { 12680 ret = ctxt->myDoc->extSubset; 12681 ctxt->myDoc->extSubset = NULL; 12682 if (ret != NULL) { 12683 xmlNodePtr tmp; 12684 12685 ret->doc = NULL; 12686 tmp = ret->children; 12687 while (tmp != NULL) { 12688 tmp->doc = NULL; 12689 tmp = tmp->next; 12690 } 12691 } 12692 } else { 12693 ret = NULL; 12694 } 12695 xmlFreeDoc(ctxt->myDoc); 12696 ctxt->myDoc = NULL; 12697 } 12698 if (sax != NULL) ctxt->sax = NULL; 12699 xmlFreeParserCtxt(ctxt); 12700 12701 return(ret); 12702} 12703 12704 12705/** 12706 * xmlParseDTD: 12707 * @ExternalID: a NAME* containing the External ID of the DTD 12708 * @SystemID: a NAME* containing the URL to the DTD 12709 * 12710 * Load and parse an external subset. 12711 * 12712 * Returns the resulting xmlDtdPtr or NULL in case of error. 12713 */ 12714 12715xmlDtdPtr 12716xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12717 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12718} 12719#endif /* LIBXML_VALID_ENABLED */ 12720 12721/************************************************************************ 12722 * * 12723 * Front ends when parsing an Entity * 12724 * * 12725 ************************************************************************/ 12726 12727/** 12728 * xmlParseCtxtExternalEntity: 12729 * @ctx: the existing parsing context 12730 * @URL: the URL for the entity to load 12731 * @ID: the System ID for the entity to load 12732 * @lst: the return value for the set of parsed nodes 12733 * 12734 * Parse an external general entity within an existing parsing context 12735 * An external general parsed entity is well-formed if it matches the 12736 * production labeled extParsedEnt. 12737 * 12738 * [78] extParsedEnt ::= TextDecl? content 12739 * 12740 * Returns 0 if the entity is well formed, -1 in case of args problem and 12741 * the parser error code otherwise 12742 */ 12743 12744int 12745xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12746 const xmlChar *ID, xmlNodePtr *lst) { 12747 xmlParserCtxtPtr ctxt; 12748 xmlDocPtr newDoc; 12749 xmlNodePtr newRoot; 12750 xmlSAXHandlerPtr oldsax = NULL; 12751 int ret = 0; 12752 xmlChar start[4]; 12753 xmlCharEncoding enc; 12754 12755 if (ctx == NULL) return(-1); 12756 12757 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12758 (ctx->depth > 1024)) { 12759 return(XML_ERR_ENTITY_LOOP); 12760 } 12761 12762 if (lst != NULL) 12763 *lst = NULL; 12764 if ((URL == NULL) && (ID == NULL)) 12765 return(-1); 12766 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12767 return(-1); 12768 12769 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12770 if (ctxt == NULL) { 12771 return(-1); 12772 } 12773 12774 oldsax = ctxt->sax; 12775 ctxt->sax = ctx->sax; 12776 xmlDetectSAX2(ctxt); 12777 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12778 if (newDoc == NULL) { 12779 xmlFreeParserCtxt(ctxt); 12780 return(-1); 12781 } 12782 newDoc->properties = XML_DOC_INTERNAL; 12783 if (ctx->myDoc->dict) { 12784 newDoc->dict = ctx->myDoc->dict; 12785 xmlDictReference(newDoc->dict); 12786 } 12787 if (ctx->myDoc != NULL) { 12788 newDoc->intSubset = ctx->myDoc->intSubset; 12789 newDoc->extSubset = ctx->myDoc->extSubset; 12790 } 12791 if (ctx->myDoc->URL != NULL) { 12792 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12793 } 12794 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12795 if (newRoot == NULL) { 12796 ctxt->sax = oldsax; 12797 xmlFreeParserCtxt(ctxt); 12798 newDoc->intSubset = NULL; 12799 newDoc->extSubset = NULL; 12800 xmlFreeDoc(newDoc); 12801 return(-1); 12802 } 12803 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12804 nodePush(ctxt, newDoc->children); 12805 if (ctx->myDoc == NULL) { 12806 ctxt->myDoc = newDoc; 12807 } else { 12808 ctxt->myDoc = ctx->myDoc; 12809 newDoc->children->doc = ctx->myDoc; 12810 } 12811 12812 /* 12813 * Get the 4 first bytes and decode the charset 12814 * if enc != XML_CHAR_ENCODING_NONE 12815 * plug some encoding conversion routines. 12816 */ 12817 GROW 12818 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12819 start[0] = RAW; 12820 start[1] = NXT(1); 12821 start[2] = NXT(2); 12822 start[3] = NXT(3); 12823 enc = xmlDetectCharEncoding(start, 4); 12824 if (enc != XML_CHAR_ENCODING_NONE) { 12825 xmlSwitchEncoding(ctxt, enc); 12826 } 12827 } 12828 12829 /* 12830 * Parse a possible text declaration first 12831 */ 12832 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12833 xmlParseTextDecl(ctxt); 12834 /* 12835 * An XML-1.0 document can't reference an entity not XML-1.0 12836 */ 12837 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12838 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12839 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12840 "Version mismatch between document and entity\n"); 12841 } 12842 } 12843 12844 /* 12845 * If the user provided its own SAX callbacks then reuse the 12846 * useData callback field, otherwise the expected setup in a 12847 * DOM builder is to have userData == ctxt 12848 */ 12849 if (ctx->userData == ctx) 12850 ctxt->userData = ctxt; 12851 else 12852 ctxt->userData = ctx->userData; 12853 12854 /* 12855 * Doing validity checking on chunk doesn't make sense 12856 */ 12857 ctxt->instate = XML_PARSER_CONTENT; 12858 ctxt->validate = ctx->validate; 12859 ctxt->valid = ctx->valid; 12860 ctxt->loadsubset = ctx->loadsubset; 12861 ctxt->depth = ctx->depth + 1; 12862 ctxt->replaceEntities = ctx->replaceEntities; 12863 if (ctxt->validate) { 12864 ctxt->vctxt.error = ctx->vctxt.error; 12865 ctxt->vctxt.warning = ctx->vctxt.warning; 12866 } else { 12867 ctxt->vctxt.error = NULL; 12868 ctxt->vctxt.warning = NULL; 12869 } 12870 ctxt->vctxt.nodeTab = NULL; 12871 ctxt->vctxt.nodeNr = 0; 12872 ctxt->vctxt.nodeMax = 0; 12873 ctxt->vctxt.node = NULL; 12874 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12875 ctxt->dict = ctx->dict; 12876 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12877 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12878 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12879 ctxt->dictNames = ctx->dictNames; 12880 ctxt->attsDefault = ctx->attsDefault; 12881 ctxt->attsSpecial = ctx->attsSpecial; 12882 ctxt->linenumbers = ctx->linenumbers; 12883 12884 xmlParseContent(ctxt); 12885 12886 ctx->validate = ctxt->validate; 12887 ctx->valid = ctxt->valid; 12888 if ((RAW == '<') && (NXT(1) == '/')) { 12889 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12890 } else if (RAW != 0) { 12891 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12892 } 12893 if (ctxt->node != newDoc->children) { 12894 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12895 } 12896 12897 if (!ctxt->wellFormed) { 12898 if (ctxt->errNo == 0) 12899 ret = 1; 12900 else 12901 ret = ctxt->errNo; 12902 } else { 12903 if (lst != NULL) { 12904 xmlNodePtr cur; 12905 12906 /* 12907 * Return the newly created nodeset after unlinking it from 12908 * they pseudo parent. 12909 */ 12910 cur = newDoc->children->children; 12911 *lst = cur; 12912 while (cur != NULL) { 12913 cur->parent = NULL; 12914 cur = cur->next; 12915 } 12916 newDoc->children->children = NULL; 12917 } 12918 ret = 0; 12919 } 12920 ctxt->sax = oldsax; 12921 ctxt->dict = NULL; 12922 ctxt->attsDefault = NULL; 12923 ctxt->attsSpecial = NULL; 12924 xmlFreeParserCtxt(ctxt); 12925 newDoc->intSubset = NULL; 12926 newDoc->extSubset = NULL; 12927 xmlFreeDoc(newDoc); 12928 12929 return(ret); 12930} 12931 12932/** 12933 * xmlParseExternalEntityPrivate: 12934 * @doc: the document the chunk pertains to 12935 * @oldctxt: the previous parser context if available 12936 * @sax: the SAX handler bloc (possibly NULL) 12937 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12938 * @depth: Used for loop detection, use 0 12939 * @URL: the URL for the entity to load 12940 * @ID: the System ID for the entity to load 12941 * @list: the return value for the set of parsed nodes 12942 * 12943 * Private version of xmlParseExternalEntity() 12944 * 12945 * Returns 0 if the entity is well formed, -1 in case of args problem and 12946 * the parser error code otherwise 12947 */ 12948 12949static xmlParserErrors 12950xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12951 xmlSAXHandlerPtr sax, 12952 void *user_data, int depth, const xmlChar *URL, 12953 const xmlChar *ID, xmlNodePtr *list) { 12954 xmlParserCtxtPtr ctxt; 12955 xmlDocPtr newDoc; 12956 xmlNodePtr newRoot; 12957 xmlSAXHandlerPtr oldsax = NULL; 12958 xmlParserErrors ret = XML_ERR_OK; 12959 xmlChar start[4]; 12960 xmlCharEncoding enc; 12961 12962 if (((depth > 40) && 12963 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12964 (depth > 1024)) { 12965 return(XML_ERR_ENTITY_LOOP); 12966 } 12967 12968 if (list != NULL) 12969 *list = NULL; 12970 if ((URL == NULL) && (ID == NULL)) 12971 return(XML_ERR_INTERNAL_ERROR); 12972 if (doc == NULL) 12973 return(XML_ERR_INTERNAL_ERROR); 12974 12975 12976 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12977 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12978 ctxt->userData = ctxt; 12979 if (oldctxt != NULL) { 12980 ctxt->_private = oldctxt->_private; 12981 ctxt->loadsubset = oldctxt->loadsubset; 12982 ctxt->validate = oldctxt->validate; 12983 ctxt->external = oldctxt->external; 12984 ctxt->record_info = oldctxt->record_info; 12985 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12986 ctxt->node_seq.length = oldctxt->node_seq.length; 12987 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12988 } else { 12989 /* 12990 * Doing validity checking on chunk without context 12991 * doesn't make sense 12992 */ 12993 ctxt->_private = NULL; 12994 ctxt->validate = 0; 12995 ctxt->external = 2; 12996 ctxt->loadsubset = 0; 12997 } 12998 if (sax != NULL) { 12999 oldsax = ctxt->sax; 13000 ctxt->sax = sax; 13001 if (user_data != NULL) 13002 ctxt->userData = user_data; 13003 } 13004 xmlDetectSAX2(ctxt); 13005 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13006 if (newDoc == NULL) { 13007 ctxt->node_seq.maximum = 0; 13008 ctxt->node_seq.length = 0; 13009 ctxt->node_seq.buffer = NULL; 13010 xmlFreeParserCtxt(ctxt); 13011 return(XML_ERR_INTERNAL_ERROR); 13012 } 13013 newDoc->properties = XML_DOC_INTERNAL; 13014 newDoc->intSubset = doc->intSubset; 13015 newDoc->extSubset = doc->extSubset; 13016 newDoc->dict = doc->dict; 13017 xmlDictReference(newDoc->dict); 13018 13019 if (doc->URL != NULL) { 13020 newDoc->URL = xmlStrdup(doc->URL); 13021 } 13022 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13023 if (newRoot == NULL) { 13024 if (sax != NULL) 13025 ctxt->sax = oldsax; 13026 ctxt->node_seq.maximum = 0; 13027 ctxt->node_seq.length = 0; 13028 ctxt->node_seq.buffer = NULL; 13029 xmlFreeParserCtxt(ctxt); 13030 newDoc->intSubset = NULL; 13031 newDoc->extSubset = NULL; 13032 xmlFreeDoc(newDoc); 13033 return(XML_ERR_INTERNAL_ERROR); 13034 } 13035 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13036 nodePush(ctxt, newDoc->children); 13037 ctxt->myDoc = doc; 13038 newRoot->doc = doc; 13039 13040 /* 13041 * Get the 4 first bytes and decode the charset 13042 * if enc != XML_CHAR_ENCODING_NONE 13043 * plug some encoding conversion routines. 13044 */ 13045 GROW; 13046 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13047 start[0] = RAW; 13048 start[1] = NXT(1); 13049 start[2] = NXT(2); 13050 start[3] = NXT(3); 13051 enc = xmlDetectCharEncoding(start, 4); 13052 if (enc != XML_CHAR_ENCODING_NONE) { 13053 xmlSwitchEncoding(ctxt, enc); 13054 } 13055 } 13056 13057 /* 13058 * Parse a possible text declaration first 13059 */ 13060 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13061 xmlParseTextDecl(ctxt); 13062 } 13063 13064 ctxt->instate = XML_PARSER_CONTENT; 13065 ctxt->depth = depth; 13066 13067 xmlParseContent(ctxt); 13068 13069 if ((RAW == '<') && (NXT(1) == '/')) { 13070 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13071 } else if (RAW != 0) { 13072 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13073 } 13074 if (ctxt->node != newDoc->children) { 13075 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13076 } 13077 13078 if (!ctxt->wellFormed) { 13079 if (ctxt->errNo == 0) 13080 ret = XML_ERR_INTERNAL_ERROR; 13081 else 13082 ret = (xmlParserErrors)ctxt->errNo; 13083 } else { 13084 if (list != NULL) { 13085 xmlNodePtr cur; 13086 13087 /* 13088 * Return the newly created nodeset after unlinking it from 13089 * they pseudo parent. 13090 */ 13091 cur = newDoc->children->children; 13092 *list = cur; 13093 while (cur != NULL) { 13094 cur->parent = NULL; 13095 cur = cur->next; 13096 } 13097 newDoc->children->children = NULL; 13098 } 13099 ret = XML_ERR_OK; 13100 } 13101 13102 /* 13103 * Record in the parent context the number of entities replacement 13104 * done when parsing that reference. 13105 */ 13106 if (oldctxt != NULL) 13107 oldctxt->nbentities += ctxt->nbentities; 13108 13109 /* 13110 * Also record the size of the entity parsed 13111 */ 13112 if (ctxt->input != NULL) { 13113 oldctxt->sizeentities += ctxt->input->consumed; 13114 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13115 } 13116 /* 13117 * And record the last error if any 13118 */ 13119 if (ctxt->lastError.code != XML_ERR_OK) 13120 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13121 13122 if (sax != NULL) 13123 ctxt->sax = oldsax; 13124 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13125 oldctxt->node_seq.length = ctxt->node_seq.length; 13126 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13127 ctxt->node_seq.maximum = 0; 13128 ctxt->node_seq.length = 0; 13129 ctxt->node_seq.buffer = NULL; 13130 xmlFreeParserCtxt(ctxt); 13131 newDoc->intSubset = NULL; 13132 newDoc->extSubset = NULL; 13133 xmlFreeDoc(newDoc); 13134 13135 return(ret); 13136} 13137 13138#ifdef LIBXML_SAX1_ENABLED 13139/** 13140 * xmlParseExternalEntity: 13141 * @doc: the document the chunk pertains to 13142 * @sax: the SAX handler bloc (possibly NULL) 13143 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13144 * @depth: Used for loop detection, use 0 13145 * @URL: the URL for the entity to load 13146 * @ID: the System ID for the entity to load 13147 * @lst: the return value for the set of parsed nodes 13148 * 13149 * Parse an external general entity 13150 * An external general parsed entity is well-formed if it matches the 13151 * production labeled extParsedEnt. 13152 * 13153 * [78] extParsedEnt ::= TextDecl? content 13154 * 13155 * Returns 0 if the entity is well formed, -1 in case of args problem and 13156 * the parser error code otherwise 13157 */ 13158 13159int 13160xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13161 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13162 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13163 ID, lst)); 13164} 13165 13166/** 13167 * xmlParseBalancedChunkMemory: 13168 * @doc: the document the chunk pertains to 13169 * @sax: the SAX handler bloc (possibly NULL) 13170 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13171 * @depth: Used for loop detection, use 0 13172 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13173 * @lst: the return value for the set of parsed nodes 13174 * 13175 * Parse a well-balanced chunk of an XML document 13176 * called by the parser 13177 * The allowed sequence for the Well Balanced Chunk is the one defined by 13178 * the content production in the XML grammar: 13179 * 13180 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13181 * 13182 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13183 * the parser error code otherwise 13184 */ 13185 13186int 13187xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13188 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13189 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13190 depth, string, lst, 0 ); 13191} 13192#endif /* LIBXML_SAX1_ENABLED */ 13193 13194/** 13195 * xmlParseBalancedChunkMemoryInternal: 13196 * @oldctxt: the existing parsing context 13197 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13198 * @user_data: the user data field for the parser context 13199 * @lst: the return value for the set of parsed nodes 13200 * 13201 * 13202 * Parse a well-balanced chunk of an XML document 13203 * called by the parser 13204 * The allowed sequence for the Well Balanced Chunk is the one defined by 13205 * the content production in the XML grammar: 13206 * 13207 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13208 * 13209 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13210 * error code otherwise 13211 * 13212 * In case recover is set to 1, the nodelist will not be empty even if 13213 * the parsed chunk is not well balanced. 13214 */ 13215static xmlParserErrors 13216xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13217 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13218 xmlParserCtxtPtr ctxt; 13219 xmlDocPtr newDoc = NULL; 13220 xmlNodePtr newRoot; 13221 xmlSAXHandlerPtr oldsax = NULL; 13222 xmlNodePtr content = NULL; 13223 xmlNodePtr last = NULL; 13224 int size; 13225 xmlParserErrors ret = XML_ERR_OK; 13226#ifdef SAX2 13227 int i; 13228#endif 13229 13230 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13231 (oldctxt->depth > 1024)) { 13232 return(XML_ERR_ENTITY_LOOP); 13233 } 13234 13235 13236 if (lst != NULL) 13237 *lst = NULL; 13238 if (string == NULL) 13239 return(XML_ERR_INTERNAL_ERROR); 13240 13241 size = xmlStrlen(string); 13242 13243 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13244 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13245 if (user_data != NULL) 13246 ctxt->userData = user_data; 13247 else 13248 ctxt->userData = ctxt; 13249 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13250 ctxt->dict = oldctxt->dict; 13251 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13252 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13253 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13254 13255#ifdef SAX2 13256 /* propagate namespaces down the entity */ 13257 for (i = 0;i < oldctxt->nsNr;i += 2) { 13258 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13259 } 13260#endif 13261 13262 oldsax = ctxt->sax; 13263 ctxt->sax = oldctxt->sax; 13264 xmlDetectSAX2(ctxt); 13265 ctxt->replaceEntities = oldctxt->replaceEntities; 13266 ctxt->options = oldctxt->options; 13267 13268 ctxt->_private = oldctxt->_private; 13269 if (oldctxt->myDoc == NULL) { 13270 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13271 if (newDoc == NULL) { 13272 ctxt->sax = oldsax; 13273 ctxt->dict = NULL; 13274 xmlFreeParserCtxt(ctxt); 13275 return(XML_ERR_INTERNAL_ERROR); 13276 } 13277 newDoc->properties = XML_DOC_INTERNAL; 13278 newDoc->dict = ctxt->dict; 13279 xmlDictReference(newDoc->dict); 13280 ctxt->myDoc = newDoc; 13281 } else { 13282 ctxt->myDoc = oldctxt->myDoc; 13283 content = ctxt->myDoc->children; 13284 last = ctxt->myDoc->last; 13285 } 13286 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13287 if (newRoot == NULL) { 13288 ctxt->sax = oldsax; 13289 ctxt->dict = NULL; 13290 xmlFreeParserCtxt(ctxt); 13291 if (newDoc != NULL) { 13292 xmlFreeDoc(newDoc); 13293 } 13294 return(XML_ERR_INTERNAL_ERROR); 13295 } 13296 ctxt->myDoc->children = NULL; 13297 ctxt->myDoc->last = NULL; 13298 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13299 nodePush(ctxt, ctxt->myDoc->children); 13300 ctxt->instate = XML_PARSER_CONTENT; 13301 ctxt->depth = oldctxt->depth + 1; 13302 13303 ctxt->validate = 0; 13304 ctxt->loadsubset = oldctxt->loadsubset; 13305 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13306 /* 13307 * ID/IDREF registration will be done in xmlValidateElement below 13308 */ 13309 ctxt->loadsubset |= XML_SKIP_IDS; 13310 } 13311 ctxt->dictNames = oldctxt->dictNames; 13312 ctxt->attsDefault = oldctxt->attsDefault; 13313 ctxt->attsSpecial = oldctxt->attsSpecial; 13314 13315 xmlParseContent(ctxt); 13316 if ((RAW == '<') && (NXT(1) == '/')) { 13317 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13318 } else if (RAW != 0) { 13319 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13320 } 13321 if (ctxt->node != ctxt->myDoc->children) { 13322 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13323 } 13324 13325 if (!ctxt->wellFormed) { 13326 if (ctxt->errNo == 0) 13327 ret = XML_ERR_INTERNAL_ERROR; 13328 else 13329 ret = (xmlParserErrors)ctxt->errNo; 13330 } else { 13331 ret = XML_ERR_OK; 13332 } 13333 13334 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13335 xmlNodePtr cur; 13336 13337 /* 13338 * Return the newly created nodeset after unlinking it from 13339 * they pseudo parent. 13340 */ 13341 cur = ctxt->myDoc->children->children; 13342 *lst = cur; 13343 while (cur != NULL) { 13344#ifdef LIBXML_VALID_ENABLED 13345 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13346 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13347 (cur->type == XML_ELEMENT_NODE)) { 13348 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13349 oldctxt->myDoc, cur); 13350 } 13351#endif /* LIBXML_VALID_ENABLED */ 13352 cur->parent = NULL; 13353 cur = cur->next; 13354 } 13355 ctxt->myDoc->children->children = NULL; 13356 } 13357 if (ctxt->myDoc != NULL) { 13358 xmlFreeNode(ctxt->myDoc->children); 13359 ctxt->myDoc->children = content; 13360 ctxt->myDoc->last = last; 13361 } 13362 13363 /* 13364 * Record in the parent context the number of entities replacement 13365 * done when parsing that reference. 13366 */ 13367 if (oldctxt != NULL) 13368 oldctxt->nbentities += ctxt->nbentities; 13369 13370 /* 13371 * Also record the last error if any 13372 */ 13373 if (ctxt->lastError.code != XML_ERR_OK) 13374 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13375 13376 ctxt->sax = oldsax; 13377 ctxt->dict = NULL; 13378 ctxt->attsDefault = NULL; 13379 ctxt->attsSpecial = NULL; 13380 xmlFreeParserCtxt(ctxt); 13381 if (newDoc != NULL) { 13382 xmlFreeDoc(newDoc); 13383 } 13384 13385 return(ret); 13386} 13387 13388/** 13389 * xmlParseInNodeContext: 13390 * @node: the context node 13391 * @data: the input string 13392 * @datalen: the input string length in bytes 13393 * @options: a combination of xmlParserOption 13394 * @lst: the return value for the set of parsed nodes 13395 * 13396 * Parse a well-balanced chunk of an XML document 13397 * within the context (DTD, namespaces, etc ...) of the given node. 13398 * 13399 * The allowed sequence for the data is a Well Balanced Chunk defined by 13400 * the content production in the XML grammar: 13401 * 13402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13403 * 13404 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13405 * error code otherwise 13406 */ 13407xmlParserErrors 13408xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13409 int options, xmlNodePtr *lst) { 13410#ifdef SAX2 13411 xmlParserCtxtPtr ctxt; 13412 xmlDocPtr doc = NULL; 13413 xmlNodePtr fake, cur; 13414 int nsnr = 0; 13415 13416 xmlParserErrors ret = XML_ERR_OK; 13417 13418 /* 13419 * check all input parameters, grab the document 13420 */ 13421 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13422 return(XML_ERR_INTERNAL_ERROR); 13423 switch (node->type) { 13424 case XML_ELEMENT_NODE: 13425 case XML_ATTRIBUTE_NODE: 13426 case XML_TEXT_NODE: 13427 case XML_CDATA_SECTION_NODE: 13428 case XML_ENTITY_REF_NODE: 13429 case XML_PI_NODE: 13430 case XML_COMMENT_NODE: 13431 case XML_DOCUMENT_NODE: 13432 case XML_HTML_DOCUMENT_NODE: 13433 break; 13434 default: 13435 return(XML_ERR_INTERNAL_ERROR); 13436 13437 } 13438 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13439 (node->type != XML_DOCUMENT_NODE) && 13440 (node->type != XML_HTML_DOCUMENT_NODE)) 13441 node = node->parent; 13442 if (node == NULL) 13443 return(XML_ERR_INTERNAL_ERROR); 13444 if (node->type == XML_ELEMENT_NODE) 13445 doc = node->doc; 13446 else 13447 doc = (xmlDocPtr) node; 13448 if (doc == NULL) 13449 return(XML_ERR_INTERNAL_ERROR); 13450 13451 /* 13452 * allocate a context and set-up everything not related to the 13453 * node position in the tree 13454 */ 13455 if (doc->type == XML_DOCUMENT_NODE) 13456 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13457#ifdef LIBXML_HTML_ENABLED 13458 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13459 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13460 /* 13461 * When parsing in context, it makes no sense to add implied 13462 * elements like html/body/etc... 13463 */ 13464 options |= HTML_PARSE_NOIMPLIED; 13465 } 13466#endif 13467 else 13468 return(XML_ERR_INTERNAL_ERROR); 13469 13470 if (ctxt == NULL) 13471 return(XML_ERR_NO_MEMORY); 13472 13473 /* 13474 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13475 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13476 * we must wait until the last moment to free the original one. 13477 */ 13478 if (doc->dict != NULL) { 13479 if (ctxt->dict != NULL) 13480 xmlDictFree(ctxt->dict); 13481 ctxt->dict = doc->dict; 13482 } else 13483 options |= XML_PARSE_NODICT; 13484 13485 if (doc->encoding != NULL) { 13486 xmlCharEncodingHandlerPtr hdlr; 13487 13488 if (ctxt->encoding != NULL) 13489 xmlFree((xmlChar *) ctxt->encoding); 13490 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13491 13492 hdlr = xmlFindCharEncodingHandler(doc->encoding); 13493 if (hdlr != NULL) { 13494 xmlSwitchToEncoding(ctxt, hdlr); 13495 } else { 13496 return(XML_ERR_UNSUPPORTED_ENCODING); 13497 } 13498 } 13499 13500 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13501 xmlDetectSAX2(ctxt); 13502 ctxt->myDoc = doc; 13503 13504 fake = xmlNewComment(NULL); 13505 if (fake == NULL) { 13506 xmlFreeParserCtxt(ctxt); 13507 return(XML_ERR_NO_MEMORY); 13508 } 13509 xmlAddChild(node, fake); 13510 13511 if (node->type == XML_ELEMENT_NODE) { 13512 nodePush(ctxt, node); 13513 /* 13514 * initialize the SAX2 namespaces stack 13515 */ 13516 cur = node; 13517 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13518 xmlNsPtr ns = cur->nsDef; 13519 const xmlChar *iprefix, *ihref; 13520 13521 while (ns != NULL) { 13522 if (ctxt->dict) { 13523 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13524 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13525 } else { 13526 iprefix = ns->prefix; 13527 ihref = ns->href; 13528 } 13529 13530 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13531 nsPush(ctxt, iprefix, ihref); 13532 nsnr++; 13533 } 13534 ns = ns->next; 13535 } 13536 cur = cur->parent; 13537 } 13538 ctxt->instate = XML_PARSER_CONTENT; 13539 } 13540 13541 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13542 /* 13543 * ID/IDREF registration will be done in xmlValidateElement below 13544 */ 13545 ctxt->loadsubset |= XML_SKIP_IDS; 13546 } 13547 13548#ifdef LIBXML_HTML_ENABLED 13549 if (doc->type == XML_HTML_DOCUMENT_NODE) 13550 __htmlParseContent(ctxt); 13551 else 13552#endif 13553 xmlParseContent(ctxt); 13554 13555 nsPop(ctxt, nsnr); 13556 if ((RAW == '<') && (NXT(1) == '/')) { 13557 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13558 } else if (RAW != 0) { 13559 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13560 } 13561 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13562 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13563 ctxt->wellFormed = 0; 13564 } 13565 13566 if (!ctxt->wellFormed) { 13567 if (ctxt->errNo == 0) 13568 ret = XML_ERR_INTERNAL_ERROR; 13569 else 13570 ret = (xmlParserErrors)ctxt->errNo; 13571 } else { 13572 ret = XML_ERR_OK; 13573 } 13574 13575 /* 13576 * Return the newly created nodeset after unlinking it from 13577 * the pseudo sibling. 13578 */ 13579 13580 cur = fake->next; 13581 fake->next = NULL; 13582 node->last = fake; 13583 13584 if (cur != NULL) { 13585 cur->prev = NULL; 13586 } 13587 13588 *lst = cur; 13589 13590 while (cur != NULL) { 13591 cur->parent = NULL; 13592 cur = cur->next; 13593 } 13594 13595 xmlUnlinkNode(fake); 13596 xmlFreeNode(fake); 13597 13598 13599 if (ret != XML_ERR_OK) { 13600 xmlFreeNodeList(*lst); 13601 *lst = NULL; 13602 } 13603 13604 if (doc->dict != NULL) 13605 ctxt->dict = NULL; 13606 xmlFreeParserCtxt(ctxt); 13607 13608 return(ret); 13609#else /* !SAX2 */ 13610 return(XML_ERR_INTERNAL_ERROR); 13611#endif 13612} 13613 13614#ifdef LIBXML_SAX1_ENABLED 13615/** 13616 * xmlParseBalancedChunkMemoryRecover: 13617 * @doc: the document the chunk pertains to 13618 * @sax: the SAX handler bloc (possibly NULL) 13619 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13620 * @depth: Used for loop detection, use 0 13621 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13622 * @lst: the return value for the set of parsed nodes 13623 * @recover: return nodes even if the data is broken (use 0) 13624 * 13625 * 13626 * Parse a well-balanced chunk of an XML document 13627 * called by the parser 13628 * The allowed sequence for the Well Balanced Chunk is the one defined by 13629 * the content production in the XML grammar: 13630 * 13631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13632 * 13633 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13634 * the parser error code otherwise 13635 * 13636 * In case recover is set to 1, the nodelist will not be empty even if 13637 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13638 * some extent. 13639 */ 13640int 13641xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13642 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13643 int recover) { 13644 xmlParserCtxtPtr ctxt; 13645 xmlDocPtr newDoc; 13646 xmlSAXHandlerPtr oldsax = NULL; 13647 xmlNodePtr content, newRoot; 13648 int size; 13649 int ret = 0; 13650 13651 if (depth > 40) { 13652 return(XML_ERR_ENTITY_LOOP); 13653 } 13654 13655 13656 if (lst != NULL) 13657 *lst = NULL; 13658 if (string == NULL) 13659 return(-1); 13660 13661 size = xmlStrlen(string); 13662 13663 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13664 if (ctxt == NULL) return(-1); 13665 ctxt->userData = ctxt; 13666 if (sax != NULL) { 13667 oldsax = ctxt->sax; 13668 ctxt->sax = sax; 13669 if (user_data != NULL) 13670 ctxt->userData = user_data; 13671 } 13672 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13673 if (newDoc == NULL) { 13674 xmlFreeParserCtxt(ctxt); 13675 return(-1); 13676 } 13677 newDoc->properties = XML_DOC_INTERNAL; 13678 if ((doc != NULL) && (doc->dict != NULL)) { 13679 xmlDictFree(ctxt->dict); 13680 ctxt->dict = doc->dict; 13681 xmlDictReference(ctxt->dict); 13682 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13683 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13684 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13685 ctxt->dictNames = 1; 13686 } else { 13687 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13688 } 13689 if (doc != NULL) { 13690 newDoc->intSubset = doc->intSubset; 13691 newDoc->extSubset = doc->extSubset; 13692 } 13693 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13694 if (newRoot == NULL) { 13695 if (sax != NULL) 13696 ctxt->sax = oldsax; 13697 xmlFreeParserCtxt(ctxt); 13698 newDoc->intSubset = NULL; 13699 newDoc->extSubset = NULL; 13700 xmlFreeDoc(newDoc); 13701 return(-1); 13702 } 13703 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13704 nodePush(ctxt, newRoot); 13705 if (doc == NULL) { 13706 ctxt->myDoc = newDoc; 13707 } else { 13708 ctxt->myDoc = newDoc; 13709 newDoc->children->doc = doc; 13710 /* Ensure that doc has XML spec namespace */ 13711 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13712 newDoc->oldNs = doc->oldNs; 13713 } 13714 ctxt->instate = XML_PARSER_CONTENT; 13715 ctxt->depth = depth; 13716 13717 /* 13718 * Doing validity checking on chunk doesn't make sense 13719 */ 13720 ctxt->validate = 0; 13721 ctxt->loadsubset = 0; 13722 xmlDetectSAX2(ctxt); 13723 13724 if ( doc != NULL ){ 13725 content = doc->children; 13726 doc->children = NULL; 13727 xmlParseContent(ctxt); 13728 doc->children = content; 13729 } 13730 else { 13731 xmlParseContent(ctxt); 13732 } 13733 if ((RAW == '<') && (NXT(1) == '/')) { 13734 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13735 } else if (RAW != 0) { 13736 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13737 } 13738 if (ctxt->node != newDoc->children) { 13739 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13740 } 13741 13742 if (!ctxt->wellFormed) { 13743 if (ctxt->errNo == 0) 13744 ret = 1; 13745 else 13746 ret = ctxt->errNo; 13747 } else { 13748 ret = 0; 13749 } 13750 13751 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13752 xmlNodePtr cur; 13753 13754 /* 13755 * Return the newly created nodeset after unlinking it from 13756 * they pseudo parent. 13757 */ 13758 cur = newDoc->children->children; 13759 *lst = cur; 13760 while (cur != NULL) { 13761 xmlSetTreeDoc(cur, doc); 13762 cur->parent = NULL; 13763 cur = cur->next; 13764 } 13765 newDoc->children->children = NULL; 13766 } 13767 13768 if (sax != NULL) 13769 ctxt->sax = oldsax; 13770 xmlFreeParserCtxt(ctxt); 13771 newDoc->intSubset = NULL; 13772 newDoc->extSubset = NULL; 13773 newDoc->oldNs = NULL; 13774 xmlFreeDoc(newDoc); 13775 13776 return(ret); 13777} 13778 13779/** 13780 * xmlSAXParseEntity: 13781 * @sax: the SAX handler block 13782 * @filename: the filename 13783 * 13784 * parse an XML external entity out of context and build a tree. 13785 * It use the given SAX function block to handle the parsing callback. 13786 * If sax is NULL, fallback to the default DOM tree building routines. 13787 * 13788 * [78] extParsedEnt ::= TextDecl? content 13789 * 13790 * This correspond to a "Well Balanced" chunk 13791 * 13792 * Returns the resulting document tree 13793 */ 13794 13795xmlDocPtr 13796xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13797 xmlDocPtr ret; 13798 xmlParserCtxtPtr ctxt; 13799 13800 ctxt = xmlCreateFileParserCtxt(filename); 13801 if (ctxt == NULL) { 13802 return(NULL); 13803 } 13804 if (sax != NULL) { 13805 if (ctxt->sax != NULL) 13806 xmlFree(ctxt->sax); 13807 ctxt->sax = sax; 13808 ctxt->userData = NULL; 13809 } 13810 13811 xmlParseExtParsedEnt(ctxt); 13812 13813 if (ctxt->wellFormed) 13814 ret = ctxt->myDoc; 13815 else { 13816 ret = NULL; 13817 xmlFreeDoc(ctxt->myDoc); 13818 ctxt->myDoc = NULL; 13819 } 13820 if (sax != NULL) 13821 ctxt->sax = NULL; 13822 xmlFreeParserCtxt(ctxt); 13823 13824 return(ret); 13825} 13826 13827/** 13828 * xmlParseEntity: 13829 * @filename: the filename 13830 * 13831 * parse an XML external entity out of context and build a tree. 13832 * 13833 * [78] extParsedEnt ::= TextDecl? content 13834 * 13835 * This correspond to a "Well Balanced" chunk 13836 * 13837 * Returns the resulting document tree 13838 */ 13839 13840xmlDocPtr 13841xmlParseEntity(const char *filename) { 13842 return(xmlSAXParseEntity(NULL, filename)); 13843} 13844#endif /* LIBXML_SAX1_ENABLED */ 13845 13846/** 13847 * xmlCreateEntityParserCtxtInternal: 13848 * @URL: the entity URL 13849 * @ID: the entity PUBLIC ID 13850 * @base: a possible base for the target URI 13851 * @pctx: parser context used to set options on new context 13852 * 13853 * Create a parser context for an external entity 13854 * Automatic support for ZLIB/Compress compressed document is provided 13855 * by default if found at compile-time. 13856 * 13857 * Returns the new parser context or NULL 13858 */ 13859static xmlParserCtxtPtr 13860xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13861 const xmlChar *base, xmlParserCtxtPtr pctx) { 13862 xmlParserCtxtPtr ctxt; 13863 xmlParserInputPtr inputStream; 13864 char *directory = NULL; 13865 xmlChar *uri; 13866 13867 ctxt = xmlNewParserCtxt(); 13868 if (ctxt == NULL) { 13869 return(NULL); 13870 } 13871 13872 if (pctx != NULL) { 13873 ctxt->options = pctx->options; 13874 ctxt->_private = pctx->_private; 13875 } 13876 13877 uri = xmlBuildURI(URL, base); 13878 13879 if (uri == NULL) { 13880 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13881 if (inputStream == NULL) { 13882 xmlFreeParserCtxt(ctxt); 13883 return(NULL); 13884 } 13885 13886 inputPush(ctxt, inputStream); 13887 13888 if ((ctxt->directory == NULL) && (directory == NULL)) 13889 directory = xmlParserGetDirectory((char *)URL); 13890 if ((ctxt->directory == NULL) && (directory != NULL)) 13891 ctxt->directory = directory; 13892 } else { 13893 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13894 if (inputStream == NULL) { 13895 xmlFree(uri); 13896 xmlFreeParserCtxt(ctxt); 13897 return(NULL); 13898 } 13899 13900 inputPush(ctxt, inputStream); 13901 13902 if ((ctxt->directory == NULL) && (directory == NULL)) 13903 directory = xmlParserGetDirectory((char *)uri); 13904 if ((ctxt->directory == NULL) && (directory != NULL)) 13905 ctxt->directory = directory; 13906 xmlFree(uri); 13907 } 13908 return(ctxt); 13909} 13910 13911/** 13912 * xmlCreateEntityParserCtxt: 13913 * @URL: the entity URL 13914 * @ID: the entity PUBLIC ID 13915 * @base: a possible base for the target URI 13916 * 13917 * Create a parser context for an external entity 13918 * Automatic support for ZLIB/Compress compressed document is provided 13919 * by default if found at compile-time. 13920 * 13921 * Returns the new parser context or NULL 13922 */ 13923xmlParserCtxtPtr 13924xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13925 const xmlChar *base) { 13926 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13927 13928} 13929 13930/************************************************************************ 13931 * * 13932 * Front ends when parsing from a file * 13933 * * 13934 ************************************************************************/ 13935 13936/** 13937 * xmlCreateURLParserCtxt: 13938 * @filename: the filename or URL 13939 * @options: a combination of xmlParserOption 13940 * 13941 * Create a parser context for a file or URL content. 13942 * Automatic support for ZLIB/Compress compressed document is provided 13943 * by default if found at compile-time and for file accesses 13944 * 13945 * Returns the new parser context or NULL 13946 */ 13947xmlParserCtxtPtr 13948xmlCreateURLParserCtxt(const char *filename, int options) 13949{ 13950 xmlParserCtxtPtr ctxt; 13951 xmlParserInputPtr inputStream; 13952 char *directory = NULL; 13953 13954 ctxt = xmlNewParserCtxt(); 13955 if (ctxt == NULL) { 13956 xmlErrMemory(NULL, "cannot allocate parser context"); 13957 return(NULL); 13958 } 13959 13960 if (options) 13961 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13962 ctxt->linenumbers = 1; 13963 13964 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13965 if (inputStream == NULL) { 13966 xmlFreeParserCtxt(ctxt); 13967 return(NULL); 13968 } 13969 13970 inputPush(ctxt, inputStream); 13971 if ((ctxt->directory == NULL) && (directory == NULL)) 13972 directory = xmlParserGetDirectory(filename); 13973 if ((ctxt->directory == NULL) && (directory != NULL)) 13974 ctxt->directory = directory; 13975 13976 return(ctxt); 13977} 13978 13979/** 13980 * xmlCreateFileParserCtxt: 13981 * @filename: the filename 13982 * 13983 * Create a parser context for a file content. 13984 * Automatic support for ZLIB/Compress compressed document is provided 13985 * by default if found at compile-time. 13986 * 13987 * Returns the new parser context or NULL 13988 */ 13989xmlParserCtxtPtr 13990xmlCreateFileParserCtxt(const char *filename) 13991{ 13992 return(xmlCreateURLParserCtxt(filename, 0)); 13993} 13994 13995#ifdef LIBXML_SAX1_ENABLED 13996/** 13997 * xmlSAXParseFileWithData: 13998 * @sax: the SAX handler block 13999 * @filename: the filename 14000 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14001 * documents 14002 * @data: the userdata 14003 * 14004 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14005 * compressed document is provided by default if found at compile-time. 14006 * It use the given SAX function block to handle the parsing callback. 14007 * If sax is NULL, fallback to the default DOM tree building routines. 14008 * 14009 * User data (void *) is stored within the parser context in the 14010 * context's _private member, so it is available nearly everywhere in libxml 14011 * 14012 * Returns the resulting document tree 14013 */ 14014 14015xmlDocPtr 14016xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14017 int recovery, void *data) { 14018 xmlDocPtr ret; 14019 xmlParserCtxtPtr ctxt; 14020 14021 xmlInitParser(); 14022 14023 ctxt = xmlCreateFileParserCtxt(filename); 14024 if (ctxt == NULL) { 14025 return(NULL); 14026 } 14027 if (sax != NULL) { 14028 if (ctxt->sax != NULL) 14029 xmlFree(ctxt->sax); 14030 ctxt->sax = sax; 14031 } 14032 xmlDetectSAX2(ctxt); 14033 if (data!=NULL) { 14034 ctxt->_private = data; 14035 } 14036 14037 if (ctxt->directory == NULL) 14038 ctxt->directory = xmlParserGetDirectory(filename); 14039 14040 ctxt->recovery = recovery; 14041 14042 xmlParseDocument(ctxt); 14043 14044 if ((ctxt->wellFormed) || recovery) { 14045 ret = ctxt->myDoc; 14046 if (ret != NULL) { 14047 if (ctxt->input->buf->compressed > 0) 14048 ret->compression = 9; 14049 else 14050 ret->compression = ctxt->input->buf->compressed; 14051 } 14052 } 14053 else { 14054 ret = NULL; 14055 xmlFreeDoc(ctxt->myDoc); 14056 ctxt->myDoc = NULL; 14057 } 14058 if (sax != NULL) 14059 ctxt->sax = NULL; 14060 xmlFreeParserCtxt(ctxt); 14061 14062 return(ret); 14063} 14064 14065/** 14066 * xmlSAXParseFile: 14067 * @sax: the SAX handler block 14068 * @filename: the filename 14069 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14070 * documents 14071 * 14072 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14073 * compressed document is provided by default if found at compile-time. 14074 * It use the given SAX function block to handle the parsing callback. 14075 * If sax is NULL, fallback to the default DOM tree building routines. 14076 * 14077 * Returns the resulting document tree 14078 */ 14079 14080xmlDocPtr 14081xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14082 int recovery) { 14083 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14084} 14085 14086/** 14087 * xmlRecoverDoc: 14088 * @cur: a pointer to an array of xmlChar 14089 * 14090 * parse an XML in-memory document and build a tree. 14091 * In the case the document is not Well Formed, a attempt to build a 14092 * tree is tried anyway 14093 * 14094 * Returns the resulting document tree or NULL in case of failure 14095 */ 14096 14097xmlDocPtr 14098xmlRecoverDoc(const xmlChar *cur) { 14099 return(xmlSAXParseDoc(NULL, cur, 1)); 14100} 14101 14102/** 14103 * xmlParseFile: 14104 * @filename: the filename 14105 * 14106 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14107 * compressed document is provided by default if found at compile-time. 14108 * 14109 * Returns the resulting document tree if the file was wellformed, 14110 * NULL otherwise. 14111 */ 14112 14113xmlDocPtr 14114xmlParseFile(const char *filename) { 14115 return(xmlSAXParseFile(NULL, filename, 0)); 14116} 14117 14118/** 14119 * xmlRecoverFile: 14120 * @filename: the filename 14121 * 14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14123 * compressed document is provided by default if found at compile-time. 14124 * In the case the document is not Well Formed, it attempts to build 14125 * a tree anyway 14126 * 14127 * Returns the resulting document tree or NULL in case of failure 14128 */ 14129 14130xmlDocPtr 14131xmlRecoverFile(const char *filename) { 14132 return(xmlSAXParseFile(NULL, filename, 1)); 14133} 14134 14135 14136/** 14137 * xmlSetupParserForBuffer: 14138 * @ctxt: an XML parser context 14139 * @buffer: a xmlChar * buffer 14140 * @filename: a file name 14141 * 14142 * Setup the parser context to parse a new buffer; Clears any prior 14143 * contents from the parser context. The buffer parameter must not be 14144 * NULL, but the filename parameter can be 14145 */ 14146void 14147xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14148 const char* filename) 14149{ 14150 xmlParserInputPtr input; 14151 14152 if ((ctxt == NULL) || (buffer == NULL)) 14153 return; 14154 14155 input = xmlNewInputStream(ctxt); 14156 if (input == NULL) { 14157 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14158 xmlClearParserCtxt(ctxt); 14159 return; 14160 } 14161 14162 xmlClearParserCtxt(ctxt); 14163 if (filename != NULL) 14164 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14165 input->base = buffer; 14166 input->cur = buffer; 14167 input->end = &buffer[xmlStrlen(buffer)]; 14168 inputPush(ctxt, input); 14169} 14170 14171/** 14172 * xmlSAXUserParseFile: 14173 * @sax: a SAX handler 14174 * @user_data: The user data returned on SAX callbacks 14175 * @filename: a file name 14176 * 14177 * parse an XML file and call the given SAX handler routines. 14178 * Automatic support for ZLIB/Compress compressed document is provided 14179 * 14180 * Returns 0 in case of success or a error number otherwise 14181 */ 14182int 14183xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14184 const char *filename) { 14185 int ret = 0; 14186 xmlParserCtxtPtr ctxt; 14187 14188 ctxt = xmlCreateFileParserCtxt(filename); 14189 if (ctxt == NULL) return -1; 14190 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14191 xmlFree(ctxt->sax); 14192 ctxt->sax = sax; 14193 xmlDetectSAX2(ctxt); 14194 14195 if (user_data != NULL) 14196 ctxt->userData = user_data; 14197 14198 xmlParseDocument(ctxt); 14199 14200 if (ctxt->wellFormed) 14201 ret = 0; 14202 else { 14203 if (ctxt->errNo != 0) 14204 ret = ctxt->errNo; 14205 else 14206 ret = -1; 14207 } 14208 if (sax != NULL) 14209 ctxt->sax = NULL; 14210 if (ctxt->myDoc != NULL) { 14211 xmlFreeDoc(ctxt->myDoc); 14212 ctxt->myDoc = NULL; 14213 } 14214 xmlFreeParserCtxt(ctxt); 14215 14216 return ret; 14217} 14218#endif /* LIBXML_SAX1_ENABLED */ 14219 14220/************************************************************************ 14221 * * 14222 * Front ends when parsing from memory * 14223 * * 14224 ************************************************************************/ 14225 14226/** 14227 * xmlCreateMemoryParserCtxt: 14228 * @buffer: a pointer to a char array 14229 * @size: the size of the array 14230 * 14231 * Create a parser context for an XML in-memory document. 14232 * 14233 * Returns the new parser context or NULL 14234 */ 14235xmlParserCtxtPtr 14236xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14237 xmlParserCtxtPtr ctxt; 14238 xmlParserInputPtr input; 14239 xmlParserInputBufferPtr buf; 14240 14241 if (buffer == NULL) 14242 return(NULL); 14243 if (size <= 0) 14244 return(NULL); 14245 14246 ctxt = xmlNewParserCtxt(); 14247 if (ctxt == NULL) 14248 return(NULL); 14249 14250 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14251 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14252 if (buf == NULL) { 14253 xmlFreeParserCtxt(ctxt); 14254 return(NULL); 14255 } 14256 14257 input = xmlNewInputStream(ctxt); 14258 if (input == NULL) { 14259 xmlFreeParserInputBuffer(buf); 14260 xmlFreeParserCtxt(ctxt); 14261 return(NULL); 14262 } 14263 14264 input->filename = NULL; 14265 input->buf = buf; 14266 xmlBufResetInput(input->buf->buffer, input); 14267 14268 inputPush(ctxt, input); 14269 return(ctxt); 14270} 14271 14272#ifdef LIBXML_SAX1_ENABLED 14273/** 14274 * xmlSAXParseMemoryWithData: 14275 * @sax: the SAX handler block 14276 * @buffer: an pointer to a char array 14277 * @size: the size of the array 14278 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14279 * documents 14280 * @data: the userdata 14281 * 14282 * parse an XML in-memory block and use the given SAX function block 14283 * to handle the parsing callback. If sax is NULL, fallback to the default 14284 * DOM tree building routines. 14285 * 14286 * User data (void *) is stored within the parser context in the 14287 * context's _private member, so it is available nearly everywhere in libxml 14288 * 14289 * Returns the resulting document tree 14290 */ 14291 14292xmlDocPtr 14293xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14294 int size, int recovery, void *data) { 14295 xmlDocPtr ret; 14296 xmlParserCtxtPtr ctxt; 14297 14298 xmlInitParser(); 14299 14300 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14301 if (ctxt == NULL) return(NULL); 14302 if (sax != NULL) { 14303 if (ctxt->sax != NULL) 14304 xmlFree(ctxt->sax); 14305 ctxt->sax = sax; 14306 } 14307 xmlDetectSAX2(ctxt); 14308 if (data!=NULL) { 14309 ctxt->_private=data; 14310 } 14311 14312 ctxt->recovery = recovery; 14313 14314 xmlParseDocument(ctxt); 14315 14316 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14317 else { 14318 ret = NULL; 14319 xmlFreeDoc(ctxt->myDoc); 14320 ctxt->myDoc = NULL; 14321 } 14322 if (sax != NULL) 14323 ctxt->sax = NULL; 14324 xmlFreeParserCtxt(ctxt); 14325 14326 return(ret); 14327} 14328 14329/** 14330 * xmlSAXParseMemory: 14331 * @sax: the SAX handler block 14332 * @buffer: an pointer to a char array 14333 * @size: the size of the array 14334 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14335 * documents 14336 * 14337 * parse an XML in-memory block and use the given SAX function block 14338 * to handle the parsing callback. If sax is NULL, fallback to the default 14339 * DOM tree building routines. 14340 * 14341 * Returns the resulting document tree 14342 */ 14343xmlDocPtr 14344xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14345 int size, int recovery) { 14346 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14347} 14348 14349/** 14350 * xmlParseMemory: 14351 * @buffer: an pointer to a char array 14352 * @size: the size of the array 14353 * 14354 * parse an XML in-memory block and build a tree. 14355 * 14356 * Returns the resulting document tree 14357 */ 14358 14359xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14360 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14361} 14362 14363/** 14364 * xmlRecoverMemory: 14365 * @buffer: an pointer to a char array 14366 * @size: the size of the array 14367 * 14368 * parse an XML in-memory block and build a tree. 14369 * In the case the document is not Well Formed, an attempt to 14370 * build a tree is tried anyway 14371 * 14372 * Returns the resulting document tree or NULL in case of error 14373 */ 14374 14375xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14376 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14377} 14378 14379/** 14380 * xmlSAXUserParseMemory: 14381 * @sax: a SAX handler 14382 * @user_data: The user data returned on SAX callbacks 14383 * @buffer: an in-memory XML document input 14384 * @size: the length of the XML document in bytes 14385 * 14386 * A better SAX parsing routine. 14387 * parse an XML in-memory buffer and call the given SAX handler routines. 14388 * 14389 * Returns 0 in case of success or a error number otherwise 14390 */ 14391int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14392 const char *buffer, int size) { 14393 int ret = 0; 14394 xmlParserCtxtPtr ctxt; 14395 14396 xmlInitParser(); 14397 14398 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14399 if (ctxt == NULL) return -1; 14400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14401 xmlFree(ctxt->sax); 14402 ctxt->sax = sax; 14403 xmlDetectSAX2(ctxt); 14404 14405 if (user_data != NULL) 14406 ctxt->userData = user_data; 14407 14408 xmlParseDocument(ctxt); 14409 14410 if (ctxt->wellFormed) 14411 ret = 0; 14412 else { 14413 if (ctxt->errNo != 0) 14414 ret = ctxt->errNo; 14415 else 14416 ret = -1; 14417 } 14418 if (sax != NULL) 14419 ctxt->sax = NULL; 14420 if (ctxt->myDoc != NULL) { 14421 xmlFreeDoc(ctxt->myDoc); 14422 ctxt->myDoc = NULL; 14423 } 14424 xmlFreeParserCtxt(ctxt); 14425 14426 return ret; 14427} 14428#endif /* LIBXML_SAX1_ENABLED */ 14429 14430/** 14431 * xmlCreateDocParserCtxt: 14432 * @cur: a pointer to an array of xmlChar 14433 * 14434 * Creates a parser context for an XML in-memory document. 14435 * 14436 * Returns the new parser context or NULL 14437 */ 14438xmlParserCtxtPtr 14439xmlCreateDocParserCtxt(const xmlChar *cur) { 14440 int len; 14441 14442 if (cur == NULL) 14443 return(NULL); 14444 len = xmlStrlen(cur); 14445 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14446} 14447 14448#ifdef LIBXML_SAX1_ENABLED 14449/** 14450 * xmlSAXParseDoc: 14451 * @sax: the SAX handler block 14452 * @cur: a pointer to an array of xmlChar 14453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14454 * documents 14455 * 14456 * parse an XML in-memory document and build a tree. 14457 * It use the given SAX function block to handle the parsing callback. 14458 * If sax is NULL, fallback to the default DOM tree building routines. 14459 * 14460 * Returns the resulting document tree 14461 */ 14462 14463xmlDocPtr 14464xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14465 xmlDocPtr ret; 14466 xmlParserCtxtPtr ctxt; 14467 xmlSAXHandlerPtr oldsax = NULL; 14468 14469 if (cur == NULL) return(NULL); 14470 14471 14472 ctxt = xmlCreateDocParserCtxt(cur); 14473 if (ctxt == NULL) return(NULL); 14474 if (sax != NULL) { 14475 oldsax = ctxt->sax; 14476 ctxt->sax = sax; 14477 ctxt->userData = NULL; 14478 } 14479 xmlDetectSAX2(ctxt); 14480 14481 xmlParseDocument(ctxt); 14482 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14483 else { 14484 ret = NULL; 14485 xmlFreeDoc(ctxt->myDoc); 14486 ctxt->myDoc = NULL; 14487 } 14488 if (sax != NULL) 14489 ctxt->sax = oldsax; 14490 xmlFreeParserCtxt(ctxt); 14491 14492 return(ret); 14493} 14494 14495/** 14496 * xmlParseDoc: 14497 * @cur: a pointer to an array of xmlChar 14498 * 14499 * parse an XML in-memory document and build a tree. 14500 * 14501 * Returns the resulting document tree 14502 */ 14503 14504xmlDocPtr 14505xmlParseDoc(const xmlChar *cur) { 14506 return(xmlSAXParseDoc(NULL, cur, 0)); 14507} 14508#endif /* LIBXML_SAX1_ENABLED */ 14509 14510#ifdef LIBXML_LEGACY_ENABLED 14511/************************************************************************ 14512 * * 14513 * Specific function to keep track of entities references * 14514 * and used by the XSLT debugger * 14515 * * 14516 ************************************************************************/ 14517 14518static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14519 14520/** 14521 * xmlAddEntityReference: 14522 * @ent : A valid entity 14523 * @firstNode : A valid first node for children of entity 14524 * @lastNode : A valid last node of children entity 14525 * 14526 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14527 */ 14528static void 14529xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14530 xmlNodePtr lastNode) 14531{ 14532 if (xmlEntityRefFunc != NULL) { 14533 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14534 } 14535} 14536 14537 14538/** 14539 * xmlSetEntityReferenceFunc: 14540 * @func: A valid function 14541 * 14542 * Set the function to call call back when a xml reference has been made 14543 */ 14544void 14545xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14546{ 14547 xmlEntityRefFunc = func; 14548} 14549#endif /* LIBXML_LEGACY_ENABLED */ 14550 14551/************************************************************************ 14552 * * 14553 * Miscellaneous * 14554 * * 14555 ************************************************************************/ 14556 14557#ifdef LIBXML_XPATH_ENABLED 14558#include <libxml/xpath.h> 14559#endif 14560 14561extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14562static int xmlParserInitialized = 0; 14563 14564/** 14565 * xmlInitParser: 14566 * 14567 * Initialization function for the XML parser. 14568 * This is not reentrant. Call once before processing in case of 14569 * use in multithreaded programs. 14570 */ 14571 14572void 14573xmlInitParser(void) { 14574 if (xmlParserInitialized != 0) 14575 return; 14576 14577#ifdef LIBXML_THREAD_ENABLED 14578 __xmlGlobalInitMutexLock(); 14579 if (xmlParserInitialized == 0) { 14580#endif 14581 xmlInitThreads(); 14582 xmlInitGlobals(); 14583 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14584 (xmlGenericError == NULL)) 14585 initGenericErrorDefaultFunc(NULL); 14586 xmlInitMemory(); 14587 xmlInitializeDict(); 14588 xmlInitCharEncodingHandlers(); 14589 xmlDefaultSAXHandlerInit(); 14590 xmlRegisterDefaultInputCallbacks(); 14591#ifdef LIBXML_OUTPUT_ENABLED 14592 xmlRegisterDefaultOutputCallbacks(); 14593#endif /* LIBXML_OUTPUT_ENABLED */ 14594#ifdef LIBXML_HTML_ENABLED 14595 htmlInitAutoClose(); 14596 htmlDefaultSAXHandlerInit(); 14597#endif 14598#ifdef LIBXML_XPATH_ENABLED 14599 xmlXPathInit(); 14600#endif 14601 xmlParserInitialized = 1; 14602#ifdef LIBXML_THREAD_ENABLED 14603 } 14604 __xmlGlobalInitMutexUnlock(); 14605#endif 14606} 14607 14608/** 14609 * xmlCleanupParser: 14610 * 14611 * This function name is somewhat misleading. It does not clean up 14612 * parser state, it cleans up memory allocated by the library itself. 14613 * It is a cleanup function for the XML library. It tries to reclaim all 14614 * related global memory allocated for the library processing. 14615 * It doesn't deallocate any document related memory. One should 14616 * call xmlCleanupParser() only when the process has finished using 14617 * the library and all XML/HTML documents built with it. 14618 * See also xmlInitParser() which has the opposite function of preparing 14619 * the library for operations. 14620 * 14621 * WARNING: if your application is multithreaded or has plugin support 14622 * calling this may crash the application if another thread or 14623 * a plugin is still using libxml2. It's sometimes very hard to 14624 * guess if libxml2 is in use in the application, some libraries 14625 * or plugins may use it without notice. In case of doubt abstain 14626 * from calling this function or do it just before calling exit() 14627 * to avoid leak reports from valgrind ! 14628 */ 14629 14630void 14631xmlCleanupParser(void) { 14632 if (!xmlParserInitialized) 14633 return; 14634 14635 xmlCleanupCharEncodingHandlers(); 14636#ifdef LIBXML_CATALOG_ENABLED 14637 xmlCatalogCleanup(); 14638#endif 14639 xmlDictCleanup(); 14640 xmlCleanupInputCallbacks(); 14641#ifdef LIBXML_OUTPUT_ENABLED 14642 xmlCleanupOutputCallbacks(); 14643#endif 14644#ifdef LIBXML_SCHEMAS_ENABLED 14645 xmlSchemaCleanupTypes(); 14646 xmlRelaxNGCleanupTypes(); 14647#endif 14648 xmlCleanupGlobals(); 14649 xmlResetLastError(); 14650 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14651 xmlCleanupMemory(); 14652 xmlParserInitialized = 0; 14653} 14654 14655/************************************************************************ 14656 * * 14657 * New set (2.6.0) of simpler and more flexible APIs * 14658 * * 14659 ************************************************************************/ 14660 14661/** 14662 * DICT_FREE: 14663 * @str: a string 14664 * 14665 * Free a string if it is not owned by the "dict" dictionnary in the 14666 * current scope 14667 */ 14668#define DICT_FREE(str) \ 14669 if ((str) && ((!dict) || \ 14670 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14671 xmlFree((char *)(str)); 14672 14673/** 14674 * xmlCtxtReset: 14675 * @ctxt: an XML parser context 14676 * 14677 * Reset a parser context 14678 */ 14679void 14680xmlCtxtReset(xmlParserCtxtPtr ctxt) 14681{ 14682 xmlParserInputPtr input; 14683 xmlDictPtr dict; 14684 14685 if (ctxt == NULL) 14686 return; 14687 14688 dict = ctxt->dict; 14689 14690 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14691 xmlFreeInputStream(input); 14692 } 14693 ctxt->inputNr = 0; 14694 ctxt->input = NULL; 14695 14696 ctxt->spaceNr = 0; 14697 if (ctxt->spaceTab != NULL) { 14698 ctxt->spaceTab[0] = -1; 14699 ctxt->space = &ctxt->spaceTab[0]; 14700 } else { 14701 ctxt->space = NULL; 14702 } 14703 14704 14705 ctxt->nodeNr = 0; 14706 ctxt->node = NULL; 14707 14708 ctxt->nameNr = 0; 14709 ctxt->name = NULL; 14710 14711 DICT_FREE(ctxt->version); 14712 ctxt->version = NULL; 14713 DICT_FREE(ctxt->encoding); 14714 ctxt->encoding = NULL; 14715 DICT_FREE(ctxt->directory); 14716 ctxt->directory = NULL; 14717 DICT_FREE(ctxt->extSubURI); 14718 ctxt->extSubURI = NULL; 14719 DICT_FREE(ctxt->extSubSystem); 14720 ctxt->extSubSystem = NULL; 14721 if (ctxt->myDoc != NULL) 14722 xmlFreeDoc(ctxt->myDoc); 14723 ctxt->myDoc = NULL; 14724 14725 ctxt->standalone = -1; 14726 ctxt->hasExternalSubset = 0; 14727 ctxt->hasPErefs = 0; 14728 ctxt->html = 0; 14729 ctxt->external = 0; 14730 ctxt->instate = XML_PARSER_START; 14731 ctxt->token = 0; 14732 14733 ctxt->wellFormed = 1; 14734 ctxt->nsWellFormed = 1; 14735 ctxt->disableSAX = 0; 14736 ctxt->valid = 1; 14737#if 0 14738 ctxt->vctxt.userData = ctxt; 14739 ctxt->vctxt.error = xmlParserValidityError; 14740 ctxt->vctxt.warning = xmlParserValidityWarning; 14741#endif 14742 ctxt->record_info = 0; 14743 ctxt->nbChars = 0; 14744 ctxt->checkIndex = 0; 14745 ctxt->inSubset = 0; 14746 ctxt->errNo = XML_ERR_OK; 14747 ctxt->depth = 0; 14748 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14749 ctxt->catalogs = NULL; 14750 ctxt->nbentities = 0; 14751 ctxt->sizeentities = 0; 14752 xmlInitNodeInfoSeq(&ctxt->node_seq); 14753 14754 if (ctxt->attsDefault != NULL) { 14755 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14756 ctxt->attsDefault = NULL; 14757 } 14758 if (ctxt->attsSpecial != NULL) { 14759 xmlHashFree(ctxt->attsSpecial, NULL); 14760 ctxt->attsSpecial = NULL; 14761 } 14762 14763#ifdef LIBXML_CATALOG_ENABLED 14764 if (ctxt->catalogs != NULL) 14765 xmlCatalogFreeLocal(ctxt->catalogs); 14766#endif 14767 if (ctxt->lastError.code != XML_ERR_OK) 14768 xmlResetError(&ctxt->lastError); 14769} 14770 14771/** 14772 * xmlCtxtResetPush: 14773 * @ctxt: an XML parser context 14774 * @chunk: a pointer to an array of chars 14775 * @size: number of chars in the array 14776 * @filename: an optional file name or URI 14777 * @encoding: the document encoding, or NULL 14778 * 14779 * Reset a push parser context 14780 * 14781 * Returns 0 in case of success and 1 in case of error 14782 */ 14783int 14784xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14785 int size, const char *filename, const char *encoding) 14786{ 14787 xmlParserInputPtr inputStream; 14788 xmlParserInputBufferPtr buf; 14789 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14790 14791 if (ctxt == NULL) 14792 return(1); 14793 14794 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14795 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14796 14797 buf = xmlAllocParserInputBuffer(enc); 14798 if (buf == NULL) 14799 return(1); 14800 14801 if (ctxt == NULL) { 14802 xmlFreeParserInputBuffer(buf); 14803 return(1); 14804 } 14805 14806 xmlCtxtReset(ctxt); 14807 14808 if (ctxt->pushTab == NULL) { 14809 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14810 sizeof(xmlChar *)); 14811 if (ctxt->pushTab == NULL) { 14812 xmlErrMemory(ctxt, NULL); 14813 xmlFreeParserInputBuffer(buf); 14814 return(1); 14815 } 14816 } 14817 14818 if (filename == NULL) { 14819 ctxt->directory = NULL; 14820 } else { 14821 ctxt->directory = xmlParserGetDirectory(filename); 14822 } 14823 14824 inputStream = xmlNewInputStream(ctxt); 14825 if (inputStream == NULL) { 14826 xmlFreeParserInputBuffer(buf); 14827 return(1); 14828 } 14829 14830 if (filename == NULL) 14831 inputStream->filename = NULL; 14832 else 14833 inputStream->filename = (char *) 14834 xmlCanonicPath((const xmlChar *) filename); 14835 inputStream->buf = buf; 14836 xmlBufResetInput(buf->buffer, inputStream); 14837 14838 inputPush(ctxt, inputStream); 14839 14840 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14841 (ctxt->input->buf != NULL)) { 14842 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14843 size_t cur = ctxt->input->cur - ctxt->input->base; 14844 14845 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14846 14847 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14848#ifdef DEBUG_PUSH 14849 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14850#endif 14851 } 14852 14853 if (encoding != NULL) { 14854 xmlCharEncodingHandlerPtr hdlr; 14855 14856 if (ctxt->encoding != NULL) 14857 xmlFree((xmlChar *) ctxt->encoding); 14858 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14859 14860 hdlr = xmlFindCharEncodingHandler(encoding); 14861 if (hdlr != NULL) { 14862 xmlSwitchToEncoding(ctxt, hdlr); 14863 } else { 14864 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14865 "Unsupported encoding %s\n", BAD_CAST encoding); 14866 } 14867 } else if (enc != XML_CHAR_ENCODING_NONE) { 14868 xmlSwitchEncoding(ctxt, enc); 14869 } 14870 14871 return(0); 14872} 14873 14874 14875/** 14876 * xmlCtxtUseOptionsInternal: 14877 * @ctxt: an XML parser context 14878 * @options: a combination of xmlParserOption 14879 * @encoding: the user provided encoding to use 14880 * 14881 * Applies the options to the parser context 14882 * 14883 * Returns 0 in case of success, the set of unknown or unimplemented options 14884 * in case of error. 14885 */ 14886static int 14887xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14888{ 14889 if (ctxt == NULL) 14890 return(-1); 14891 if (encoding != NULL) { 14892 if (ctxt->encoding != NULL) 14893 xmlFree((xmlChar *) ctxt->encoding); 14894 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14895 } 14896 if (options & XML_PARSE_RECOVER) { 14897 ctxt->recovery = 1; 14898 options -= XML_PARSE_RECOVER; 14899 ctxt->options |= XML_PARSE_RECOVER; 14900 } else 14901 ctxt->recovery = 0; 14902 if (options & XML_PARSE_DTDLOAD) { 14903 ctxt->loadsubset = XML_DETECT_IDS; 14904 options -= XML_PARSE_DTDLOAD; 14905 ctxt->options |= XML_PARSE_DTDLOAD; 14906 } else 14907 ctxt->loadsubset = 0; 14908 if (options & XML_PARSE_DTDATTR) { 14909 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14910 options -= XML_PARSE_DTDATTR; 14911 ctxt->options |= XML_PARSE_DTDATTR; 14912 } 14913 if (options & XML_PARSE_NOENT) { 14914 ctxt->replaceEntities = 1; 14915 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14916 options -= XML_PARSE_NOENT; 14917 ctxt->options |= XML_PARSE_NOENT; 14918 } else 14919 ctxt->replaceEntities = 0; 14920 if (options & XML_PARSE_PEDANTIC) { 14921 ctxt->pedantic = 1; 14922 options -= XML_PARSE_PEDANTIC; 14923 ctxt->options |= XML_PARSE_PEDANTIC; 14924 } else 14925 ctxt->pedantic = 0; 14926 if (options & XML_PARSE_NOBLANKS) { 14927 ctxt->keepBlanks = 0; 14928 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14929 options -= XML_PARSE_NOBLANKS; 14930 ctxt->options |= XML_PARSE_NOBLANKS; 14931 } else 14932 ctxt->keepBlanks = 1; 14933 if (options & XML_PARSE_DTDVALID) { 14934 ctxt->validate = 1; 14935 if (options & XML_PARSE_NOWARNING) 14936 ctxt->vctxt.warning = NULL; 14937 if (options & XML_PARSE_NOERROR) 14938 ctxt->vctxt.error = NULL; 14939 options -= XML_PARSE_DTDVALID; 14940 ctxt->options |= XML_PARSE_DTDVALID; 14941 } else 14942 ctxt->validate = 0; 14943 if (options & XML_PARSE_NOWARNING) { 14944 ctxt->sax->warning = NULL; 14945 options -= XML_PARSE_NOWARNING; 14946 } 14947 if (options & XML_PARSE_NOERROR) { 14948 ctxt->sax->error = NULL; 14949 ctxt->sax->fatalError = NULL; 14950 options -= XML_PARSE_NOERROR; 14951 } 14952#ifdef LIBXML_SAX1_ENABLED 14953 if (options & XML_PARSE_SAX1) { 14954 ctxt->sax->startElement = xmlSAX2StartElement; 14955 ctxt->sax->endElement = xmlSAX2EndElement; 14956 ctxt->sax->startElementNs = NULL; 14957 ctxt->sax->endElementNs = NULL; 14958 ctxt->sax->initialized = 1; 14959 options -= XML_PARSE_SAX1; 14960 ctxt->options |= XML_PARSE_SAX1; 14961 } 14962#endif /* LIBXML_SAX1_ENABLED */ 14963 if (options & XML_PARSE_NODICT) { 14964 ctxt->dictNames = 0; 14965 options -= XML_PARSE_NODICT; 14966 ctxt->options |= XML_PARSE_NODICT; 14967 } else { 14968 ctxt->dictNames = 1; 14969 } 14970 if (options & XML_PARSE_NOCDATA) { 14971 ctxt->sax->cdataBlock = NULL; 14972 options -= XML_PARSE_NOCDATA; 14973 ctxt->options |= XML_PARSE_NOCDATA; 14974 } 14975 if (options & XML_PARSE_NSCLEAN) { 14976 ctxt->options |= XML_PARSE_NSCLEAN; 14977 options -= XML_PARSE_NSCLEAN; 14978 } 14979 if (options & XML_PARSE_NONET) { 14980 ctxt->options |= XML_PARSE_NONET; 14981 options -= XML_PARSE_NONET; 14982 } 14983 if (options & XML_PARSE_COMPACT) { 14984 ctxt->options |= XML_PARSE_COMPACT; 14985 options -= XML_PARSE_COMPACT; 14986 } 14987 if (options & XML_PARSE_OLD10) { 14988 ctxt->options |= XML_PARSE_OLD10; 14989 options -= XML_PARSE_OLD10; 14990 } 14991 if (options & XML_PARSE_NOBASEFIX) { 14992 ctxt->options |= XML_PARSE_NOBASEFIX; 14993 options -= XML_PARSE_NOBASEFIX; 14994 } 14995 if (options & XML_PARSE_HUGE) { 14996 ctxt->options |= XML_PARSE_HUGE; 14997 options -= XML_PARSE_HUGE; 14998 if (ctxt->dict != NULL) 14999 xmlDictSetLimit(ctxt->dict, 0); 15000 } 15001 if (options & XML_PARSE_OLDSAX) { 15002 ctxt->options |= XML_PARSE_OLDSAX; 15003 options -= XML_PARSE_OLDSAX; 15004 } 15005 if (options & XML_PARSE_IGNORE_ENC) { 15006 ctxt->options |= XML_PARSE_IGNORE_ENC; 15007 options -= XML_PARSE_IGNORE_ENC; 15008 } 15009 if (options & XML_PARSE_BIG_LINES) { 15010 ctxt->options |= XML_PARSE_BIG_LINES; 15011 options -= XML_PARSE_BIG_LINES; 15012 } 15013 ctxt->linenumbers = 1; 15014 return (options); 15015} 15016 15017/** 15018 * xmlCtxtUseOptions: 15019 * @ctxt: an XML parser context 15020 * @options: a combination of xmlParserOption 15021 * 15022 * Applies the options to the parser context 15023 * 15024 * Returns 0 in case of success, the set of unknown or unimplemented options 15025 * in case of error. 15026 */ 15027int 15028xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15029{ 15030 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15031} 15032 15033/** 15034 * xmlDoRead: 15035 * @ctxt: an XML parser context 15036 * @URL: the base URL to use for the document 15037 * @encoding: the document encoding, or NULL 15038 * @options: a combination of xmlParserOption 15039 * @reuse: keep the context for reuse 15040 * 15041 * Common front-end for the xmlRead functions 15042 * 15043 * Returns the resulting document tree or NULL 15044 */ 15045static xmlDocPtr 15046xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15047 int options, int reuse) 15048{ 15049 xmlDocPtr ret; 15050 15051 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15052 if (encoding != NULL) { 15053 xmlCharEncodingHandlerPtr hdlr; 15054 15055 hdlr = xmlFindCharEncodingHandler(encoding); 15056 if (hdlr != NULL) 15057 xmlSwitchToEncoding(ctxt, hdlr); 15058 } 15059 if ((URL != NULL) && (ctxt->input != NULL) && 15060 (ctxt->input->filename == NULL)) 15061 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15062 xmlParseDocument(ctxt); 15063 if ((ctxt->wellFormed) || ctxt->recovery) 15064 ret = ctxt->myDoc; 15065 else { 15066 ret = NULL; 15067 if (ctxt->myDoc != NULL) { 15068 xmlFreeDoc(ctxt->myDoc); 15069 } 15070 } 15071 ctxt->myDoc = NULL; 15072 if (!reuse) { 15073 xmlFreeParserCtxt(ctxt); 15074 } 15075 15076 return (ret); 15077} 15078 15079/** 15080 * xmlReadDoc: 15081 * @cur: a pointer to a zero terminated string 15082 * @URL: the base URL to use for the document 15083 * @encoding: the document encoding, or NULL 15084 * @options: a combination of xmlParserOption 15085 * 15086 * parse an XML in-memory document and build a tree. 15087 * 15088 * Returns the resulting document tree 15089 */ 15090xmlDocPtr 15091xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15092{ 15093 xmlParserCtxtPtr ctxt; 15094 15095 if (cur == NULL) 15096 return (NULL); 15097 15098 ctxt = xmlCreateDocParserCtxt(cur); 15099 if (ctxt == NULL) 15100 return (NULL); 15101 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15102} 15103 15104/** 15105 * xmlReadFile: 15106 * @filename: a file or URL 15107 * @encoding: the document encoding, or NULL 15108 * @options: a combination of xmlParserOption 15109 * 15110 * parse an XML file from the filesystem or the network. 15111 * 15112 * Returns the resulting document tree 15113 */ 15114xmlDocPtr 15115xmlReadFile(const char *filename, const char *encoding, int options) 15116{ 15117 xmlParserCtxtPtr ctxt; 15118 15119 ctxt = xmlCreateURLParserCtxt(filename, options); 15120 if (ctxt == NULL) 15121 return (NULL); 15122 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15123} 15124 15125/** 15126 * xmlReadMemory: 15127 * @buffer: a pointer to a char array 15128 * @size: the size of the array 15129 * @URL: the base URL to use for the document 15130 * @encoding: the document encoding, or NULL 15131 * @options: a combination of xmlParserOption 15132 * 15133 * parse an XML in-memory document and build a tree. 15134 * 15135 * Returns the resulting document tree 15136 */ 15137xmlDocPtr 15138xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15139{ 15140 xmlParserCtxtPtr ctxt; 15141 15142 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15143 if (ctxt == NULL) 15144 return (NULL); 15145 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15146} 15147 15148/** 15149 * xmlReadFd: 15150 * @fd: an open file descriptor 15151 * @URL: the base URL to use for the document 15152 * @encoding: the document encoding, or NULL 15153 * @options: a combination of xmlParserOption 15154 * 15155 * parse an XML from a file descriptor and build a tree. 15156 * NOTE that the file descriptor will not be closed when the 15157 * reader is closed or reset. 15158 * 15159 * Returns the resulting document tree 15160 */ 15161xmlDocPtr 15162xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15163{ 15164 xmlParserCtxtPtr ctxt; 15165 xmlParserInputBufferPtr input; 15166 xmlParserInputPtr stream; 15167 15168 if (fd < 0) 15169 return (NULL); 15170 15171 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15172 if (input == NULL) 15173 return (NULL); 15174 input->closecallback = NULL; 15175 ctxt = xmlNewParserCtxt(); 15176 if (ctxt == NULL) { 15177 xmlFreeParserInputBuffer(input); 15178 return (NULL); 15179 } 15180 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15181 if (stream == NULL) { 15182 xmlFreeParserInputBuffer(input); 15183 xmlFreeParserCtxt(ctxt); 15184 return (NULL); 15185 } 15186 inputPush(ctxt, stream); 15187 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15188} 15189 15190/** 15191 * xmlReadIO: 15192 * @ioread: an I/O read function 15193 * @ioclose: an I/O close function 15194 * @ioctx: an I/O handler 15195 * @URL: the base URL to use for the document 15196 * @encoding: the document encoding, or NULL 15197 * @options: a combination of xmlParserOption 15198 * 15199 * parse an XML document from I/O functions and source and build a tree. 15200 * 15201 * Returns the resulting document tree 15202 */ 15203xmlDocPtr 15204xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15205 void *ioctx, const char *URL, const char *encoding, int options) 15206{ 15207 xmlParserCtxtPtr ctxt; 15208 xmlParserInputBufferPtr input; 15209 xmlParserInputPtr stream; 15210 15211 if (ioread == NULL) 15212 return (NULL); 15213 15214 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15215 XML_CHAR_ENCODING_NONE); 15216 if (input == NULL) { 15217 if (ioclose != NULL) 15218 ioclose(ioctx); 15219 return (NULL); 15220 } 15221 ctxt = xmlNewParserCtxt(); 15222 if (ctxt == NULL) { 15223 xmlFreeParserInputBuffer(input); 15224 return (NULL); 15225 } 15226 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15227 if (stream == NULL) { 15228 xmlFreeParserInputBuffer(input); 15229 xmlFreeParserCtxt(ctxt); 15230 return (NULL); 15231 } 15232 inputPush(ctxt, stream); 15233 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15234} 15235 15236/** 15237 * xmlCtxtReadDoc: 15238 * @ctxt: an XML parser context 15239 * @cur: a pointer to a zero terminated string 15240 * @URL: the base URL to use for the document 15241 * @encoding: the document encoding, or NULL 15242 * @options: a combination of xmlParserOption 15243 * 15244 * parse an XML in-memory document and build a tree. 15245 * This reuses the existing @ctxt parser context 15246 * 15247 * Returns the resulting document tree 15248 */ 15249xmlDocPtr 15250xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15251 const char *URL, const char *encoding, int options) 15252{ 15253 xmlParserInputPtr stream; 15254 15255 if (cur == NULL) 15256 return (NULL); 15257 if (ctxt == NULL) 15258 return (NULL); 15259 15260 xmlCtxtReset(ctxt); 15261 15262 stream = xmlNewStringInputStream(ctxt, cur); 15263 if (stream == NULL) { 15264 return (NULL); 15265 } 15266 inputPush(ctxt, stream); 15267 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15268} 15269 15270/** 15271 * xmlCtxtReadFile: 15272 * @ctxt: an XML parser context 15273 * @filename: a file or URL 15274 * @encoding: the document encoding, or NULL 15275 * @options: a combination of xmlParserOption 15276 * 15277 * parse an XML file from the filesystem or the network. 15278 * This reuses the existing @ctxt parser context 15279 * 15280 * Returns the resulting document tree 15281 */ 15282xmlDocPtr 15283xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15284 const char *encoding, int options) 15285{ 15286 xmlParserInputPtr stream; 15287 15288 if (filename == NULL) 15289 return (NULL); 15290 if (ctxt == NULL) 15291 return (NULL); 15292 15293 xmlCtxtReset(ctxt); 15294 15295 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15296 if (stream == NULL) { 15297 return (NULL); 15298 } 15299 inputPush(ctxt, stream); 15300 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15301} 15302 15303/** 15304 * xmlCtxtReadMemory: 15305 * @ctxt: an XML parser context 15306 * @buffer: a pointer to a char array 15307 * @size: the size of the array 15308 * @URL: the base URL to use for the document 15309 * @encoding: the document encoding, or NULL 15310 * @options: a combination of xmlParserOption 15311 * 15312 * parse an XML in-memory document and build a tree. 15313 * This reuses the existing @ctxt parser context 15314 * 15315 * Returns the resulting document tree 15316 */ 15317xmlDocPtr 15318xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15319 const char *URL, const char *encoding, int options) 15320{ 15321 xmlParserInputBufferPtr input; 15322 xmlParserInputPtr stream; 15323 15324 if (ctxt == NULL) 15325 return (NULL); 15326 if (buffer == NULL) 15327 return (NULL); 15328 15329 xmlCtxtReset(ctxt); 15330 15331 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15332 if (input == NULL) { 15333 return(NULL); 15334 } 15335 15336 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15337 if (stream == NULL) { 15338 xmlFreeParserInputBuffer(input); 15339 return(NULL); 15340 } 15341 15342 inputPush(ctxt, stream); 15343 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15344} 15345 15346/** 15347 * xmlCtxtReadFd: 15348 * @ctxt: an XML parser context 15349 * @fd: an open file descriptor 15350 * @URL: the base URL to use for the document 15351 * @encoding: the document encoding, or NULL 15352 * @options: a combination of xmlParserOption 15353 * 15354 * parse an XML from a file descriptor and build a tree. 15355 * This reuses the existing @ctxt parser context 15356 * NOTE that the file descriptor will not be closed when the 15357 * reader is closed or reset. 15358 * 15359 * Returns the resulting document tree 15360 */ 15361xmlDocPtr 15362xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15363 const char *URL, const char *encoding, int options) 15364{ 15365 xmlParserInputBufferPtr input; 15366 xmlParserInputPtr stream; 15367 15368 if (fd < 0) 15369 return (NULL); 15370 if (ctxt == NULL) 15371 return (NULL); 15372 15373 xmlCtxtReset(ctxt); 15374 15375 15376 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15377 if (input == NULL) 15378 return (NULL); 15379 input->closecallback = NULL; 15380 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15381 if (stream == NULL) { 15382 xmlFreeParserInputBuffer(input); 15383 return (NULL); 15384 } 15385 inputPush(ctxt, stream); 15386 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15387} 15388 15389/** 15390 * xmlCtxtReadIO: 15391 * @ctxt: an XML parser context 15392 * @ioread: an I/O read function 15393 * @ioclose: an I/O close function 15394 * @ioctx: an I/O handler 15395 * @URL: the base URL to use for the document 15396 * @encoding: the document encoding, or NULL 15397 * @options: a combination of xmlParserOption 15398 * 15399 * parse an XML document from I/O functions and source and build a tree. 15400 * This reuses the existing @ctxt parser context 15401 * 15402 * Returns the resulting document tree 15403 */ 15404xmlDocPtr 15405xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15406 xmlInputCloseCallback ioclose, void *ioctx, 15407 const char *URL, 15408 const char *encoding, int options) 15409{ 15410 xmlParserInputBufferPtr input; 15411 xmlParserInputPtr stream; 15412 15413 if (ioread == NULL) 15414 return (NULL); 15415 if (ctxt == NULL) 15416 return (NULL); 15417 15418 xmlCtxtReset(ctxt); 15419 15420 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15421 XML_CHAR_ENCODING_NONE); 15422 if (input == NULL) { 15423 if (ioclose != NULL) 15424 ioclose(ioctx); 15425 return (NULL); 15426 } 15427 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15428 if (stream == NULL) { 15429 xmlFreeParserInputBuffer(input); 15430 return (NULL); 15431 } 15432 inputPush(ctxt, stream); 15433 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15434} 15435 15436#define bottom_parser 15437#include "elfgcchack.h" 15438