parser.c revision 2135fc2d96dffb7795f6bfa74eaff99f1f04a56a
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60#ifdef LIBXML_SCHEMAS_ENABLED 61#include <libxml/xmlschemastypes.h> 62#include <libxml/relaxng.h> 63#endif 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67#ifdef HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70#ifdef HAVE_SYS_STAT_H 71#include <sys/stat.h> 72#endif 73#ifdef HAVE_FCNTL_H 74#include <fcntl.h> 75#endif 76#ifdef HAVE_UNISTD_H 77#include <unistd.h> 78#endif 79#ifdef HAVE_ZLIB_H 80#include <zlib.h> 81#endif 82 83/** 84 * xmlParserMaxDepth: 85 * 86 * arbitrary depth limit for the XML documents that we allow to 87 * process. This is not a limitation of the parser but a safety 88 * boundary feature. 89 */ 90unsigned int xmlParserMaxDepth = 1024; 91 92#define SAX2 1 93 94#define XML_PARSER_BIG_BUFFER_SIZE 300 95#define XML_PARSER_BUFFER_SIZE 100 96 97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 98 99/* 100 * List of XML prefixed PI allowed by W3C specs 101 */ 102 103static const char *xmlW3CPIs[] = { 104 "xml-stylesheet", 105 NULL 106}; 107 108 109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 111 const xmlChar **str); 112 113static xmlParserErrors 114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 115 xmlSAXHandlerPtr sax, 116 void *user_data, int depth, const xmlChar *URL, 117 const xmlChar *ID, xmlNodePtr *list); 118 119#ifdef LIBXML_LEGACY_ENABLED 120static void 121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 122 xmlNodePtr lastNode); 123#endif /* LIBXML_LEGACY_ENABLED */ 124 125static xmlParserErrors 126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 127 const xmlChar *string, void *user_data, xmlNodePtr *lst); 128 129static int 130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 131 132/************************************************************************ 133 * * 134 * Some factorized error routines * 135 * * 136 ************************************************************************/ 137 138/** 139 * xmlErrAttributeDup: 140 * @ctxt: an XML parser context 141 * @prefix: the attribute prefix 142 * @localname: the attribute localname 143 * 144 * Handle a redefinition of attribute error 145 */ 146static void 147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 148 const xmlChar * localname) 149{ 150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 151 (ctxt->instate == XML_PARSER_EOF)) 152 return; 153 if (ctxt != NULL) 154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 155 if (prefix == NULL) 156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 157 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 158 (const char *) localname, NULL, NULL, 0, 0, 159 "Attribute %s redefined\n", localname); 160 else 161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 162 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 163 (const char *) prefix, (const char *) localname, 164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 165 localname); 166 if (ctxt != NULL) { 167 ctxt->wellFormed = 0; 168 if (ctxt->recovery == 0) 169 ctxt->disableSAX = 1; 170 } 171} 172 173/** 174 * xmlFatalErr: 175 * @ctxt: an XML parser context 176 * @error: the error number 177 * @extra: extra information string 178 * 179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 180 */ 181static void 182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 183{ 184 const char *errmsg; 185 186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 187 (ctxt->instate == XML_PARSER_EOF)) 188 return; 189 switch (error) { 190 case XML_ERR_INVALID_HEX_CHARREF: 191 errmsg = "CharRef: invalid hexadecimal value\n"; 192 break; 193 case XML_ERR_INVALID_DEC_CHARREF: 194 errmsg = "CharRef: invalid decimal value\n"; 195 break; 196 case XML_ERR_INVALID_CHARREF: 197 errmsg = "CharRef: invalid value\n"; 198 break; 199 case XML_ERR_INTERNAL_ERROR: 200 errmsg = "internal error"; 201 break; 202 case XML_ERR_PEREF_AT_EOF: 203 errmsg = "PEReference at end of document\n"; 204 break; 205 case XML_ERR_PEREF_IN_PROLOG: 206 errmsg = "PEReference in prolog\n"; 207 break; 208 case XML_ERR_PEREF_IN_EPILOG: 209 errmsg = "PEReference in epilog\n"; 210 break; 211 case XML_ERR_PEREF_NO_NAME: 212 errmsg = "PEReference: no name\n"; 213 break; 214 case XML_ERR_PEREF_SEMICOL_MISSING: 215 errmsg = "PEReference: expecting ';'\n"; 216 break; 217 case XML_ERR_ENTITY_LOOP: 218 errmsg = "Detected an entity reference loop\n"; 219 break; 220 case XML_ERR_ENTITY_NOT_STARTED: 221 errmsg = "EntityValue: \" or ' expected\n"; 222 break; 223 case XML_ERR_ENTITY_PE_INTERNAL: 224 errmsg = "PEReferences forbidden in internal subset\n"; 225 break; 226 case XML_ERR_ENTITY_NOT_FINISHED: 227 errmsg = "EntityValue: \" or ' expected\n"; 228 break; 229 case XML_ERR_ATTRIBUTE_NOT_STARTED: 230 errmsg = "AttValue: \" or ' expected\n"; 231 break; 232 case XML_ERR_LT_IN_ATTRIBUTE: 233 errmsg = "Unescaped '<' not allowed in attributes values\n"; 234 break; 235 case XML_ERR_LITERAL_NOT_STARTED: 236 errmsg = "SystemLiteral \" or ' expected\n"; 237 break; 238 case XML_ERR_LITERAL_NOT_FINISHED: 239 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 240 break; 241 case XML_ERR_MISPLACED_CDATA_END: 242 errmsg = "Sequence ']]>' not allowed in content\n"; 243 break; 244 case XML_ERR_URI_REQUIRED: 245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 246 break; 247 case XML_ERR_PUBID_REQUIRED: 248 errmsg = "PUBLIC, the Public Identifier is missing\n"; 249 break; 250 case XML_ERR_HYPHEN_IN_COMMENT: 251 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 252 break; 253 case XML_ERR_PI_NOT_STARTED: 254 errmsg = "xmlParsePI : no target name\n"; 255 break; 256 case XML_ERR_RESERVED_XML_NAME: 257 errmsg = "Invalid PI name\n"; 258 break; 259 case XML_ERR_NOTATION_NOT_STARTED: 260 errmsg = "NOTATION: Name expected here\n"; 261 break; 262 case XML_ERR_NOTATION_NOT_FINISHED: 263 errmsg = "'>' required to close NOTATION declaration\n"; 264 break; 265 case XML_ERR_VALUE_REQUIRED: 266 errmsg = "Entity value required\n"; 267 break; 268 case XML_ERR_URI_FRAGMENT: 269 errmsg = "Fragment not allowed"; 270 break; 271 case XML_ERR_ATTLIST_NOT_STARTED: 272 errmsg = "'(' required to start ATTLIST enumeration\n"; 273 break; 274 case XML_ERR_NMTOKEN_REQUIRED: 275 errmsg = "NmToken expected in ATTLIST enumeration\n"; 276 break; 277 case XML_ERR_ATTLIST_NOT_FINISHED: 278 errmsg = "')' required to finish ATTLIST enumeration\n"; 279 break; 280 case XML_ERR_MIXED_NOT_STARTED: 281 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 282 break; 283 case XML_ERR_PCDATA_REQUIRED: 284 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 285 break; 286 case XML_ERR_ELEMCONTENT_NOT_STARTED: 287 errmsg = "ContentDecl : Name or '(' expected\n"; 288 break; 289 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 290 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 291 break; 292 case XML_ERR_PEREF_IN_INT_SUBSET: 293 errmsg = 294 "PEReference: forbidden within markup decl in internal subset\n"; 295 break; 296 case XML_ERR_GT_REQUIRED: 297 errmsg = "expected '>'\n"; 298 break; 299 case XML_ERR_CONDSEC_INVALID: 300 errmsg = "XML conditional section '[' expected\n"; 301 break; 302 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 303 errmsg = "Content error in the external subset\n"; 304 break; 305 case XML_ERR_CONDSEC_INVALID_KEYWORD: 306 errmsg = 307 "conditional section INCLUDE or IGNORE keyword expected\n"; 308 break; 309 case XML_ERR_CONDSEC_NOT_FINISHED: 310 errmsg = "XML conditional section not closed\n"; 311 break; 312 case XML_ERR_XMLDECL_NOT_STARTED: 313 errmsg = "Text declaration '<?xml' required\n"; 314 break; 315 case XML_ERR_XMLDECL_NOT_FINISHED: 316 errmsg = "parsing XML declaration: '?>' expected\n"; 317 break; 318 case XML_ERR_EXT_ENTITY_STANDALONE: 319 errmsg = "external parsed entities cannot be standalone\n"; 320 break; 321 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 322 errmsg = "EntityRef: expecting ';'\n"; 323 break; 324 case XML_ERR_DOCTYPE_NOT_FINISHED: 325 errmsg = "DOCTYPE improperly terminated\n"; 326 break; 327 case XML_ERR_LTSLASH_REQUIRED: 328 errmsg = "EndTag: '</' not found\n"; 329 break; 330 case XML_ERR_EQUAL_REQUIRED: 331 errmsg = "expected '='\n"; 332 break; 333 case XML_ERR_STRING_NOT_CLOSED: 334 errmsg = "String not closed expecting \" or '\n"; 335 break; 336 case XML_ERR_STRING_NOT_STARTED: 337 errmsg = "String not started expecting ' or \"\n"; 338 break; 339 case XML_ERR_ENCODING_NAME: 340 errmsg = "Invalid XML encoding name\n"; 341 break; 342 case XML_ERR_STANDALONE_VALUE: 343 errmsg = "standalone accepts only 'yes' or 'no'\n"; 344 break; 345 case XML_ERR_DOCUMENT_EMPTY: 346 errmsg = "Document is empty\n"; 347 break; 348 case XML_ERR_DOCUMENT_END: 349 errmsg = "Extra content at the end of the document\n"; 350 break; 351 case XML_ERR_NOT_WELL_BALANCED: 352 errmsg = "chunk is not well balanced\n"; 353 break; 354 case XML_ERR_EXTRA_CONTENT: 355 errmsg = "extra content at the end of well balanced chunk\n"; 356 break; 357 case XML_ERR_VERSION_MISSING: 358 errmsg = "Malformed declaration expecting version\n"; 359 break; 360#if 0 361 case: 362 errmsg = "\n"; 363 break; 364#endif 365 default: 366 errmsg = "Unregistered error message\n"; 367 } 368 if (ctxt != NULL) 369 ctxt->errNo = error; 370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 372 info); 373 if (ctxt != NULL) { 374 ctxt->wellFormed = 0; 375 if (ctxt->recovery == 0) 376 ctxt->disableSAX = 1; 377 } 378} 379 380/** 381 * xmlFatalErrMsg: 382 * @ctxt: an XML parser context 383 * @error: the error number 384 * @msg: the error message 385 * 386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 387 */ 388static void 389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 390 const char *msg) 391{ 392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 393 (ctxt->instate == XML_PARSER_EOF)) 394 return; 395 if (ctxt != NULL) 396 ctxt->errNo = error; 397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 399 if (ctxt != NULL) { 400 ctxt->wellFormed = 0; 401 if (ctxt->recovery == 0) 402 ctxt->disableSAX = 1; 403 } 404} 405 406/** 407 * xmlWarningMsg: 408 * @ctxt: an XML parser context 409 * @error: the error number 410 * @msg: the error message 411 * @str1: extra data 412 * @str2: extra data 413 * 414 * Handle a warning. 415 */ 416static void 417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 418 const char *msg, const xmlChar *str1, const xmlChar *str2) 419{ 420 xmlStructuredErrorFunc schannel = NULL; 421 422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 423 (ctxt->instate == XML_PARSER_EOF)) 424 return; 425 if ((ctxt != NULL) && (ctxt->sax != NULL) && 426 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 427 schannel = ctxt->sax->serror; 428 __xmlRaiseError(schannel, 429 (ctxt->sax) ? ctxt->sax->warning : NULL, 430 ctxt->userData, 431 ctxt, NULL, XML_FROM_PARSER, error, 432 XML_ERR_WARNING, NULL, 0, 433 (const char *) str1, (const char *) str2, NULL, 0, 0, 434 msg, (const char *) str1, (const char *) str2); 435} 436 437/** 438 * xmlValidityError: 439 * @ctxt: an XML parser context 440 * @error: the error number 441 * @msg: the error message 442 * @str1: extra data 443 * 444 * Handle a validity error. 445 */ 446static void 447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 448 const char *msg, const xmlChar *str1) 449{ 450 xmlStructuredErrorFunc schannel = NULL; 451 452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 453 (ctxt->instate == XML_PARSER_EOF)) 454 return; 455 if (ctxt != NULL) { 456 ctxt->errNo = error; 457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 458 schannel = ctxt->sax->serror; 459 } 460 __xmlRaiseError(schannel, 461 ctxt->vctxt.error, ctxt->vctxt.userData, 462 ctxt, NULL, XML_FROM_DTD, error, 463 XML_ERR_ERROR, NULL, 0, (const char *) str1, 464 NULL, NULL, 0, 0, 465 msg, (const char *) str1); 466 if (ctxt != NULL) { 467 ctxt->valid = 0; 468 } 469} 470 471/** 472 * xmlFatalErrMsgInt: 473 * @ctxt: an XML parser context 474 * @error: the error number 475 * @msg: the error message 476 * @val: an integer value 477 * 478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 479 */ 480static void 481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 482 const char *msg, int val) 483{ 484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 485 (ctxt->instate == XML_PARSER_EOF)) 486 return; 487 if (ctxt != NULL) 488 ctxt->errNo = error; 489 __xmlRaiseError(NULL, NULL, NULL, 490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 492 if (ctxt != NULL) { 493 ctxt->wellFormed = 0; 494 if (ctxt->recovery == 0) 495 ctxt->disableSAX = 1; 496 } 497} 498 499/** 500 * xmlFatalErrMsgStrIntStr: 501 * @ctxt: an XML parser context 502 * @error: the error number 503 * @msg: the error message 504 * @str1: an string info 505 * @val: an integer value 506 * @str2: an string info 507 * 508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 509 */ 510static void 511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 512 const char *msg, const xmlChar *str1, int val, 513 const xmlChar *str2) 514{ 515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 516 (ctxt->instate == XML_PARSER_EOF)) 517 return; 518 if (ctxt != NULL) 519 ctxt->errNo = error; 520 __xmlRaiseError(NULL, NULL, NULL, 521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 522 NULL, 0, (const char *) str1, (const char *) str2, 523 NULL, val, 0, msg, str1, val, str2); 524 if (ctxt != NULL) { 525 ctxt->wellFormed = 0; 526 if (ctxt->recovery == 0) 527 ctxt->disableSAX = 1; 528 } 529} 530 531/** 532 * xmlFatalErrMsgStr: 533 * @ctxt: an XML parser context 534 * @error: the error number 535 * @msg: the error message 536 * @val: a string value 537 * 538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 539 */ 540static void 541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 542 const char *msg, const xmlChar * val) 543{ 544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 545 (ctxt->instate == XML_PARSER_EOF)) 546 return; 547 if (ctxt != NULL) 548 ctxt->errNo = error; 549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 550 XML_FROM_PARSER, error, XML_ERR_FATAL, 551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 552 val); 553 if (ctxt != NULL) { 554 ctxt->wellFormed = 0; 555 if (ctxt->recovery == 0) 556 ctxt->disableSAX = 1; 557 } 558} 559 560/** 561 * xmlErrMsgStr: 562 * @ctxt: an XML parser context 563 * @error: the error number 564 * @msg: the error message 565 * @val: a string value 566 * 567 * Handle a non fatal parser error 568 */ 569static void 570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 571 const char *msg, const xmlChar * val) 572{ 573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 574 (ctxt->instate == XML_PARSER_EOF)) 575 return; 576 if (ctxt != NULL) 577 ctxt->errNo = error; 578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 579 XML_FROM_PARSER, error, XML_ERR_ERROR, 580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 581 val); 582} 583 584/** 585 * xmlNsErr: 586 * @ctxt: an XML parser context 587 * @error: the error number 588 * @msg: the message 589 * @info1: extra information string 590 * @info2: extra information string 591 * 592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 593 */ 594static void 595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 596 const char *msg, 597 const xmlChar * info1, const xmlChar * info2, 598 const xmlChar * info3) 599{ 600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 601 (ctxt->instate == XML_PARSER_EOF)) 602 return; 603 if (ctxt != NULL) 604 ctxt->errNo = error; 605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 606 XML_ERR_ERROR, NULL, 0, (const char *) info1, 607 (const char *) info2, (const char *) info3, 0, 0, msg, 608 info1, info2, info3); 609 if (ctxt != NULL) 610 ctxt->nsWellFormed = 0; 611} 612 613/************************************************************************ 614 * * 615 * Library wide options * 616 * * 617 ************************************************************************/ 618 619/** 620 * xmlHasFeature: 621 * @feature: the feature to be examined 622 * 623 * Examines if the library has been compiled with a given feature. 624 * 625 * Returns a non-zero value if the feature exist, otherwise zero. 626 * Returns zero (0) if the feature does not exist or an unknown 627 * unknown feature is requested, non-zero otherwise. 628 */ 629int 630xmlHasFeature(xmlFeature feature) 631{ 632 switch (feature) { 633 case XML_WITH_THREAD: 634#ifdef LIBXML_THREAD_ENABLED 635 return(1); 636#else 637 return(0); 638#endif 639 case XML_WITH_TREE: 640#ifdef LIBXML_TREE_ENABLED 641 return(1); 642#else 643 return(0); 644#endif 645 case XML_WITH_OUTPUT: 646#ifdef LIBXML_OUTPUT_ENABLED 647 return(1); 648#else 649 return(0); 650#endif 651 case XML_WITH_PUSH: 652#ifdef LIBXML_PUSH_ENABLED 653 return(1); 654#else 655 return(0); 656#endif 657 case XML_WITH_READER: 658#ifdef LIBXML_READER_ENABLED 659 return(1); 660#else 661 return(0); 662#endif 663 case XML_WITH_PATTERN: 664#ifdef LIBXML_PATTERN_ENABLED 665 return(1); 666#else 667 return(0); 668#endif 669 case XML_WITH_WRITER: 670#ifdef LIBXML_WRITER_ENABLED 671 return(1); 672#else 673 return(0); 674#endif 675 case XML_WITH_SAX1: 676#ifdef LIBXML_SAX1_ENABLED 677 return(1); 678#else 679 return(0); 680#endif 681 case XML_WITH_FTP: 682#ifdef LIBXML_FTP_ENABLED 683 return(1); 684#else 685 return(0); 686#endif 687 case XML_WITH_HTTP: 688#ifdef LIBXML_HTTP_ENABLED 689 return(1); 690#else 691 return(0); 692#endif 693 case XML_WITH_VALID: 694#ifdef LIBXML_VALID_ENABLED 695 return(1); 696#else 697 return(0); 698#endif 699 case XML_WITH_HTML: 700#ifdef LIBXML_HTML_ENABLED 701 return(1); 702#else 703 return(0); 704#endif 705 case XML_WITH_LEGACY: 706#ifdef LIBXML_LEGACY_ENABLED 707 return(1); 708#else 709 return(0); 710#endif 711 case XML_WITH_C14N: 712#ifdef LIBXML_C14N_ENABLED 713 return(1); 714#else 715 return(0); 716#endif 717 case XML_WITH_CATALOG: 718#ifdef LIBXML_CATALOG_ENABLED 719 return(1); 720#else 721 return(0); 722#endif 723 case XML_WITH_XPATH: 724#ifdef LIBXML_XPATH_ENABLED 725 return(1); 726#else 727 return(0); 728#endif 729 case XML_WITH_XPTR: 730#ifdef LIBXML_XPTR_ENABLED 731 return(1); 732#else 733 return(0); 734#endif 735 case XML_WITH_XINCLUDE: 736#ifdef LIBXML_XINCLUDE_ENABLED 737 return(1); 738#else 739 return(0); 740#endif 741 case XML_WITH_ICONV: 742#ifdef LIBXML_ICONV_ENABLED 743 return(1); 744#else 745 return(0); 746#endif 747 case XML_WITH_ISO8859X: 748#ifdef LIBXML_ISO8859X_ENABLED 749 return(1); 750#else 751 return(0); 752#endif 753 case XML_WITH_UNICODE: 754#ifdef LIBXML_UNICODE_ENABLED 755 return(1); 756#else 757 return(0); 758#endif 759 case XML_WITH_REGEXP: 760#ifdef LIBXML_REGEXP_ENABLED 761 return(1); 762#else 763 return(0); 764#endif 765 case XML_WITH_AUTOMATA: 766#ifdef LIBXML_AUTOMATA_ENABLED 767 return(1); 768#else 769 return(0); 770#endif 771 case XML_WITH_EXPR: 772#ifdef LIBXML_EXPR_ENABLED 773 return(1); 774#else 775 return(0); 776#endif 777 case XML_WITH_SCHEMAS: 778#ifdef LIBXML_SCHEMAS_ENABLED 779 return(1); 780#else 781 return(0); 782#endif 783 case XML_WITH_SCHEMATRON: 784#ifdef LIBXML_SCHEMATRON_ENABLED 785 return(1); 786#else 787 return(0); 788#endif 789 case XML_WITH_MODULES: 790#ifdef LIBXML_MODULES_ENABLED 791 return(1); 792#else 793 return(0); 794#endif 795 case XML_WITH_DEBUG: 796#ifdef LIBXML_DEBUG_ENABLED 797 return(1); 798#else 799 return(0); 800#endif 801 case XML_WITH_DEBUG_MEM: 802#ifdef DEBUG_MEMORY_LOCATION 803 return(1); 804#else 805 return(0); 806#endif 807 case XML_WITH_DEBUG_RUN: 808#ifdef LIBXML_DEBUG_RUNTIME 809 return(1); 810#else 811 return(0); 812#endif 813 case XML_WITH_ZLIB: 814#ifdef LIBXML_ZLIB_ENABLED 815 return(1); 816#else 817 return(0); 818#endif 819 default: 820 break; 821 } 822 return(0); 823} 824 825/************************************************************************ 826 * * 827 * SAX2 defaulted attributes handling * 828 * * 829 ************************************************************************/ 830 831/** 832 * xmlDetectSAX2: 833 * @ctxt: an XML parser context 834 * 835 * Do the SAX2 detection and specific intialization 836 */ 837static void 838xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 839 if (ctxt == NULL) return; 840#ifdef LIBXML_SAX1_ENABLED 841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 842 ((ctxt->sax->startElementNs != NULL) || 843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 844#else 845 ctxt->sax2 = 1; 846#endif /* LIBXML_SAX1_ENABLED */ 847 848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 852 (ctxt->str_xml_ns == NULL)) { 853 xmlErrMemory(ctxt, NULL); 854 } 855} 856 857typedef struct _xmlDefAttrs xmlDefAttrs; 858typedef xmlDefAttrs *xmlDefAttrsPtr; 859struct _xmlDefAttrs { 860 int nbAttrs; /* number of defaulted attributes on that element */ 861 int maxAttrs; /* the size of the array */ 862 const xmlChar *values[4]; /* array of localname/prefix/values */ 863}; 864 865/** 866 * xmlAttrNormalizeSpace: 867 * @src: the source string 868 * @dst: the target string 869 * 870 * Normalize the space in non CDATA attribute values: 871 * If the attribute type is not CDATA, then the XML processor MUST further 872 * process the normalized attribute value by discarding any leading and 873 * trailing space (#x20) characters, and by replacing sequences of space 874 * (#x20) characters by a single space (#x20) character. 875 * Note that the size of dst need to be at least src, and if one doesn't need 876 * to preserve dst (and it doesn't come from a dictionary or read-only) then 877 * passing src as dst is just fine. 878 * 879 * Returns a pointer to the normalized value (dst) or NULL if no conversion 880 * is needed. 881 */ 882static xmlChar * 883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 884{ 885 if ((src == NULL) || (dst == NULL)) 886 return(NULL); 887 888 while (*src == 0x20) src++; 889 while (*src != 0) { 890 if (*src == 0x20) { 891 while (*src == 0x20) src++; 892 if (*src != 0) 893 *dst++ = 0x20; 894 } else { 895 *dst++ = *src++; 896 } 897 } 898 *dst = 0; 899 if (dst == src) 900 return(NULL); 901 return(dst); 902} 903 904/** 905 * xmlAttrNormalizeSpace2: 906 * @src: the source string 907 * 908 * Normalize the space in non CDATA attribute values, a slightly more complex 909 * front end to avoid allocation problems when running on attribute values 910 * coming from the input. 911 * 912 * Returns a pointer to the normalized value (dst) or NULL if no conversion 913 * is needed. 914 */ 915static const xmlChar * 916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) 917{ 918 int i; 919 int remove_head = 0; 920 int need_realloc = 0; 921 const xmlChar *cur; 922 923 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 924 return(NULL); 925 i = *len; 926 if (i <= 0) 927 return(NULL); 928 929 cur = src; 930 while (*cur == 0x20) { 931 cur++; 932 remove_head++; 933 } 934 while (*cur != 0) { 935 if (*cur == 0x20) { 936 cur++; 937 if ((*cur == 0x20) || (*cur == 0)) { 938 need_realloc = 1; 939 break; 940 } 941 } else 942 cur++; 943 } 944 if (need_realloc) { 945 xmlChar *ret; 946 947 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 948 if (ret == NULL) { 949 xmlErrMemory(ctxt, NULL); 950 return(NULL); 951 } 952 xmlAttrNormalizeSpace(ret, ret); 953 *len = (int) strlen((const char *)ret); 954 return(ret); 955 } else if (remove_head) { 956 *len -= remove_head; 957 return(src + remove_head); 958 } 959 return(NULL); 960} 961 962/** 963 * xmlAddDefAttrs: 964 * @ctxt: an XML parser context 965 * @fullname: the element fullname 966 * @fullattr: the attribute fullname 967 * @value: the attribute value 968 * 969 * Add a defaulted attribute for an element 970 */ 971static void 972xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 973 const xmlChar *fullname, 974 const xmlChar *fullattr, 975 const xmlChar *value) { 976 xmlDefAttrsPtr defaults; 977 int len; 978 const xmlChar *name; 979 const xmlChar *prefix; 980 981 /* 982 * Allows to detect attribute redefinitions 983 */ 984 if (ctxt->attsSpecial != NULL) { 985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 986 return; 987 } 988 989 if (ctxt->attsDefault == NULL) { 990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 991 if (ctxt->attsDefault == NULL) 992 goto mem_error; 993 } 994 995 /* 996 * split the element name into prefix:localname , the string found 997 * are within the DTD and then not associated to namespace names. 998 */ 999 name = xmlSplitQName3(fullname, &len); 1000 if (name == NULL) { 1001 name = xmlDictLookup(ctxt->dict, fullname, -1); 1002 prefix = NULL; 1003 } else { 1004 name = xmlDictLookup(ctxt->dict, name, -1); 1005 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1006 } 1007 1008 /* 1009 * make sure there is some storage 1010 */ 1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1012 if (defaults == NULL) { 1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1014 (4 * 4) * sizeof(const xmlChar *)); 1015 if (defaults == NULL) 1016 goto mem_error; 1017 defaults->nbAttrs = 0; 1018 defaults->maxAttrs = 4; 1019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1020 defaults, NULL) < 0) { 1021 xmlFree(defaults); 1022 goto mem_error; 1023 } 1024 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1025 xmlDefAttrsPtr temp; 1026 1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 1029 if (temp == NULL) 1030 goto mem_error; 1031 defaults = temp; 1032 defaults->maxAttrs *= 2; 1033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1034 defaults, NULL) < 0) { 1035 xmlFree(defaults); 1036 goto mem_error; 1037 } 1038 } 1039 1040 /* 1041 * Split the element name into prefix:localname , the string found 1042 * are within the DTD and hen not associated to namespace names. 1043 */ 1044 name = xmlSplitQName3(fullattr, &len); 1045 if (name == NULL) { 1046 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1047 prefix = NULL; 1048 } else { 1049 name = xmlDictLookup(ctxt->dict, name, -1); 1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1051 } 1052 1053 defaults->values[4 * defaults->nbAttrs] = name; 1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 1055 /* intern the string and precompute the end */ 1056 len = xmlStrlen(value); 1057 value = xmlDictLookup(ctxt->dict, value, len); 1058 defaults->values[4 * defaults->nbAttrs + 2] = value; 1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 1060 defaults->nbAttrs++; 1061 1062 return; 1063 1064mem_error: 1065 xmlErrMemory(ctxt, NULL); 1066 return; 1067} 1068 1069/** 1070 * xmlAddSpecialAttr: 1071 * @ctxt: an XML parser context 1072 * @fullname: the element fullname 1073 * @fullattr: the attribute fullname 1074 * @type: the attribute type 1075 * 1076 * Register this attribute type 1077 */ 1078static void 1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1080 const xmlChar *fullname, 1081 const xmlChar *fullattr, 1082 int type) 1083{ 1084 if (ctxt->attsSpecial == NULL) { 1085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1086 if (ctxt->attsSpecial == NULL) 1087 goto mem_error; 1088 } 1089 1090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1091 return; 1092 1093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1094 (void *) (long) type); 1095 return; 1096 1097mem_error: 1098 xmlErrMemory(ctxt, NULL); 1099 return; 1100} 1101 1102/** 1103 * xmlCleanSpecialAttrCallback: 1104 * 1105 * Removes CDATA attributes from the special attribute table 1106 */ 1107static void 1108xmlCleanSpecialAttrCallback(void *payload, void *data, 1109 const xmlChar *fullname, const xmlChar *fullattr, 1110 const xmlChar *unused ATTRIBUTE_UNUSED) { 1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1112 1113 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1115 } 1116} 1117 1118/** 1119 * xmlCleanSpecialAttr: 1120 * @ctxt: an XML parser context 1121 * 1122 * Trim the list of attributes defined to remove all those of type 1123 * CDATA as they are not special. This call should be done when finishing 1124 * to parse the DTD and before starting to parse the document root. 1125 */ 1126static void 1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1128{ 1129 if (ctxt->attsSpecial == NULL) 1130 return; 1131 1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1133 1134 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1135 xmlHashFree(ctxt->attsSpecial, NULL); 1136 ctxt->attsSpecial = NULL; 1137 } 1138 return; 1139} 1140 1141/** 1142 * xmlCheckLanguageID: 1143 * @lang: pointer to the string value 1144 * 1145 * Checks that the value conforms to the LanguageID production: 1146 * 1147 * NOTE: this is somewhat deprecated, those productions were removed from 1148 * the XML Second edition. 1149 * 1150 * [33] LanguageID ::= Langcode ('-' Subcode)* 1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1155 * [38] Subcode ::= ([a-z] | [A-Z])+ 1156 * 1157 * Returns 1 if correct 0 otherwise 1158 **/ 1159int 1160xmlCheckLanguageID(const xmlChar * lang) 1161{ 1162 const xmlChar *cur = lang; 1163 1164 if (cur == NULL) 1165 return (0); 1166 if (((cur[0] == 'i') && (cur[1] == '-')) || 1167 ((cur[0] == 'I') && (cur[1] == '-'))) { 1168 /* 1169 * IANA code 1170 */ 1171 cur += 2; 1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1173 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1174 cur++; 1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1176 ((cur[0] == 'X') && (cur[1] == '-'))) { 1177 /* 1178 * User code 1179 */ 1180 cur += 2; 1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1182 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1183 cur++; 1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1186 /* 1187 * ISO639 1188 */ 1189 cur++; 1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1191 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1192 cur++; 1193 else 1194 return (0); 1195 } else 1196 return (0); 1197 while (cur[0] != 0) { /* non input consuming */ 1198 if (cur[0] != '-') 1199 return (0); 1200 cur++; 1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1202 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1203 cur++; 1204 else 1205 return (0); 1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1207 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1208 cur++; 1209 } 1210 return (1); 1211} 1212 1213/************************************************************************ 1214 * * 1215 * Parser stacks related functions and macros * 1216 * * 1217 ************************************************************************/ 1218 1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1220 const xmlChar ** str); 1221 1222#ifdef SAX2 1223/** 1224 * nsPush: 1225 * @ctxt: an XML parser context 1226 * @prefix: the namespace prefix or NULL 1227 * @URL: the namespace name 1228 * 1229 * Pushes a new parser namespace on top of the ns stack 1230 * 1231 * Returns -1 in case of error, -2 if the namespace should be discarded 1232 * and the index in the stack otherwise. 1233 */ 1234static int 1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1236{ 1237 if (ctxt->options & XML_PARSE_NSCLEAN) { 1238 int i; 1239 for (i = 0;i < ctxt->nsNr;i += 2) { 1240 if (ctxt->nsTab[i] == prefix) { 1241 /* in scope */ 1242 if (ctxt->nsTab[i + 1] == URL) 1243 return(-2); 1244 /* out of scope keep it */ 1245 break; 1246 } 1247 } 1248 } 1249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1250 ctxt->nsMax = 10; 1251 ctxt->nsNr = 0; 1252 ctxt->nsTab = (const xmlChar **) 1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1254 if (ctxt->nsTab == NULL) { 1255 xmlErrMemory(ctxt, NULL); 1256 ctxt->nsMax = 0; 1257 return (-1); 1258 } 1259 } else if (ctxt->nsNr >= ctxt->nsMax) { 1260 const xmlChar ** tmp; 1261 ctxt->nsMax *= 2; 1262 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1263 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1264 if (tmp == NULL) { 1265 xmlErrMemory(ctxt, NULL); 1266 ctxt->nsMax /= 2; 1267 return (-1); 1268 } 1269 ctxt->nsTab = tmp; 1270 } 1271 ctxt->nsTab[ctxt->nsNr++] = prefix; 1272 ctxt->nsTab[ctxt->nsNr++] = URL; 1273 return (ctxt->nsNr); 1274} 1275/** 1276 * nsPop: 1277 * @ctxt: an XML parser context 1278 * @nr: the number to pop 1279 * 1280 * Pops the top @nr parser prefix/namespace from the ns stack 1281 * 1282 * Returns the number of namespaces removed 1283 */ 1284static int 1285nsPop(xmlParserCtxtPtr ctxt, int nr) 1286{ 1287 int i; 1288 1289 if (ctxt->nsTab == NULL) return(0); 1290 if (ctxt->nsNr < nr) { 1291 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1292 nr = ctxt->nsNr; 1293 } 1294 if (ctxt->nsNr <= 0) 1295 return (0); 1296 1297 for (i = 0;i < nr;i++) { 1298 ctxt->nsNr--; 1299 ctxt->nsTab[ctxt->nsNr] = NULL; 1300 } 1301 return(nr); 1302} 1303#endif 1304 1305static int 1306xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1307 const xmlChar **atts; 1308 int *attallocs; 1309 int maxatts; 1310 1311 if (ctxt->atts == NULL) { 1312 maxatts = 55; /* allow for 10 attrs by default */ 1313 atts = (const xmlChar **) 1314 xmlMalloc(maxatts * sizeof(xmlChar *)); 1315 if (atts == NULL) goto mem_error; 1316 ctxt->atts = atts; 1317 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1318 if (attallocs == NULL) goto mem_error; 1319 ctxt->attallocs = attallocs; 1320 ctxt->maxatts = maxatts; 1321 } else if (nr + 5 > ctxt->maxatts) { 1322 maxatts = (nr + 5) * 2; 1323 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1324 maxatts * sizeof(const xmlChar *)); 1325 if (atts == NULL) goto mem_error; 1326 ctxt->atts = atts; 1327 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1328 (maxatts / 5) * sizeof(int)); 1329 if (attallocs == NULL) goto mem_error; 1330 ctxt->attallocs = attallocs; 1331 ctxt->maxatts = maxatts; 1332 } 1333 return(ctxt->maxatts); 1334mem_error: 1335 xmlErrMemory(ctxt, NULL); 1336 return(-1); 1337} 1338 1339/** 1340 * inputPush: 1341 * @ctxt: an XML parser context 1342 * @value: the parser input 1343 * 1344 * Pushes a new parser input on top of the input stack 1345 * 1346 * Returns 0 in case of error, the index in the stack otherwise 1347 */ 1348int 1349inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1350{ 1351 if ((ctxt == NULL) || (value == NULL)) 1352 return(0); 1353 if (ctxt->inputNr >= ctxt->inputMax) { 1354 ctxt->inputMax *= 2; 1355 ctxt->inputTab = 1356 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1357 ctxt->inputMax * 1358 sizeof(ctxt->inputTab[0])); 1359 if (ctxt->inputTab == NULL) { 1360 xmlErrMemory(ctxt, NULL); 1361 return (0); 1362 } 1363 } 1364 ctxt->inputTab[ctxt->inputNr] = value; 1365 ctxt->input = value; 1366 return (ctxt->inputNr++); 1367} 1368/** 1369 * inputPop: 1370 * @ctxt: an XML parser context 1371 * 1372 * Pops the top parser input from the input stack 1373 * 1374 * Returns the input just removed 1375 */ 1376xmlParserInputPtr 1377inputPop(xmlParserCtxtPtr ctxt) 1378{ 1379 xmlParserInputPtr ret; 1380 1381 if (ctxt == NULL) 1382 return(NULL); 1383 if (ctxt->inputNr <= 0) 1384 return (NULL); 1385 ctxt->inputNr--; 1386 if (ctxt->inputNr > 0) 1387 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1388 else 1389 ctxt->input = NULL; 1390 ret = ctxt->inputTab[ctxt->inputNr]; 1391 ctxt->inputTab[ctxt->inputNr] = NULL; 1392 return (ret); 1393} 1394/** 1395 * nodePush: 1396 * @ctxt: an XML parser context 1397 * @value: the element node 1398 * 1399 * Pushes a new element node on top of the node stack 1400 * 1401 * Returns 0 in case of error, the index in the stack otherwise 1402 */ 1403int 1404nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1405{ 1406 if (ctxt == NULL) return(0); 1407 if (ctxt->nodeNr >= ctxt->nodeMax) { 1408 xmlNodePtr *tmp; 1409 1410 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1411 ctxt->nodeMax * 2 * 1412 sizeof(ctxt->nodeTab[0])); 1413 if (tmp == NULL) { 1414 xmlErrMemory(ctxt, NULL); 1415 return (0); 1416 } 1417 ctxt->nodeTab = tmp; 1418 ctxt->nodeMax *= 2; 1419 } 1420 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 1421 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1422 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 1423 xmlParserMaxDepth); 1424 ctxt->instate = XML_PARSER_EOF; 1425 return(0); 1426 } 1427 ctxt->nodeTab[ctxt->nodeNr] = value; 1428 ctxt->node = value; 1429 return (ctxt->nodeNr++); 1430} 1431/** 1432 * nodePop: 1433 * @ctxt: an XML parser context 1434 * 1435 * Pops the top element node from the node stack 1436 * 1437 * Returns the node just removed 1438 */ 1439xmlNodePtr 1440nodePop(xmlParserCtxtPtr ctxt) 1441{ 1442 xmlNodePtr ret; 1443 1444 if (ctxt == NULL) return(NULL); 1445 if (ctxt->nodeNr <= 0) 1446 return (NULL); 1447 ctxt->nodeNr--; 1448 if (ctxt->nodeNr > 0) 1449 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1450 else 1451 ctxt->node = NULL; 1452 ret = ctxt->nodeTab[ctxt->nodeNr]; 1453 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1454 return (ret); 1455} 1456 1457#ifdef LIBXML_PUSH_ENABLED 1458/** 1459 * nameNsPush: 1460 * @ctxt: an XML parser context 1461 * @value: the element name 1462 * @prefix: the element prefix 1463 * @URI: the element namespace name 1464 * 1465 * Pushes a new element name/prefix/URL on top of the name stack 1466 * 1467 * Returns -1 in case of error, the index in the stack otherwise 1468 */ 1469static int 1470nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1471 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1472{ 1473 if (ctxt->nameNr >= ctxt->nameMax) { 1474 const xmlChar * *tmp; 1475 void **tmp2; 1476 ctxt->nameMax *= 2; 1477 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1478 ctxt->nameMax * 1479 sizeof(ctxt->nameTab[0])); 1480 if (tmp == NULL) { 1481 ctxt->nameMax /= 2; 1482 goto mem_error; 1483 } 1484 ctxt->nameTab = tmp; 1485 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1486 ctxt->nameMax * 3 * 1487 sizeof(ctxt->pushTab[0])); 1488 if (tmp2 == NULL) { 1489 ctxt->nameMax /= 2; 1490 goto mem_error; 1491 } 1492 ctxt->pushTab = tmp2; 1493 } 1494 ctxt->nameTab[ctxt->nameNr] = value; 1495 ctxt->name = value; 1496 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1497 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1498 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1499 return (ctxt->nameNr++); 1500mem_error: 1501 xmlErrMemory(ctxt, NULL); 1502 return (-1); 1503} 1504/** 1505 * nameNsPop: 1506 * @ctxt: an XML parser context 1507 * 1508 * Pops the top element/prefix/URI name from the name stack 1509 * 1510 * Returns the name just removed 1511 */ 1512static const xmlChar * 1513nameNsPop(xmlParserCtxtPtr ctxt) 1514{ 1515 const xmlChar *ret; 1516 1517 if (ctxt->nameNr <= 0) 1518 return (NULL); 1519 ctxt->nameNr--; 1520 if (ctxt->nameNr > 0) 1521 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1522 else 1523 ctxt->name = NULL; 1524 ret = ctxt->nameTab[ctxt->nameNr]; 1525 ctxt->nameTab[ctxt->nameNr] = NULL; 1526 return (ret); 1527} 1528#endif /* LIBXML_PUSH_ENABLED */ 1529 1530/** 1531 * namePush: 1532 * @ctxt: an XML parser context 1533 * @value: the element name 1534 * 1535 * Pushes a new element name on top of the name stack 1536 * 1537 * Returns -1 in case of error, the index in the stack otherwise 1538 */ 1539int 1540namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1541{ 1542 if (ctxt == NULL) return (-1); 1543 1544 if (ctxt->nameNr >= ctxt->nameMax) { 1545 const xmlChar * *tmp; 1546 ctxt->nameMax *= 2; 1547 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1548 ctxt->nameMax * 1549 sizeof(ctxt->nameTab[0])); 1550 if (tmp == NULL) { 1551 ctxt->nameMax /= 2; 1552 goto mem_error; 1553 } 1554 ctxt->nameTab = tmp; 1555 } 1556 ctxt->nameTab[ctxt->nameNr] = value; 1557 ctxt->name = value; 1558 return (ctxt->nameNr++); 1559mem_error: 1560 xmlErrMemory(ctxt, NULL); 1561 return (-1); 1562} 1563/** 1564 * namePop: 1565 * @ctxt: an XML parser context 1566 * 1567 * Pops the top element name from the name stack 1568 * 1569 * Returns the name just removed 1570 */ 1571const xmlChar * 1572namePop(xmlParserCtxtPtr ctxt) 1573{ 1574 const xmlChar *ret; 1575 1576 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1577 return (NULL); 1578 ctxt->nameNr--; 1579 if (ctxt->nameNr > 0) 1580 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1581 else 1582 ctxt->name = NULL; 1583 ret = ctxt->nameTab[ctxt->nameNr]; 1584 ctxt->nameTab[ctxt->nameNr] = NULL; 1585 return (ret); 1586} 1587 1588static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1589 if (ctxt->spaceNr >= ctxt->spaceMax) { 1590 int *tmp; 1591 1592 ctxt->spaceMax *= 2; 1593 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1594 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1595 if (tmp == NULL) { 1596 xmlErrMemory(ctxt, NULL); 1597 return(0); 1598 } 1599 ctxt->spaceTab = tmp; 1600 } 1601 ctxt->spaceTab[ctxt->spaceNr] = val; 1602 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1603 return(ctxt->spaceNr++); 1604} 1605 1606static int spacePop(xmlParserCtxtPtr ctxt) { 1607 int ret; 1608 if (ctxt->spaceNr <= 0) return(0); 1609 ctxt->spaceNr--; 1610 if (ctxt->spaceNr > 0) 1611 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1612 else 1613 ctxt->space = &ctxt->spaceTab[0]; 1614 ret = ctxt->spaceTab[ctxt->spaceNr]; 1615 ctxt->spaceTab[ctxt->spaceNr] = -1; 1616 return(ret); 1617} 1618 1619/* 1620 * Macros for accessing the content. Those should be used only by the parser, 1621 * and not exported. 1622 * 1623 * Dirty macros, i.e. one often need to make assumption on the context to 1624 * use them 1625 * 1626 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1627 * To be used with extreme caution since operations consuming 1628 * characters may move the input buffer to a different location ! 1629 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1630 * This should be used internally by the parser 1631 * only to compare to ASCII values otherwise it would break when 1632 * running with UTF-8 encoding. 1633 * RAW same as CUR but in the input buffer, bypass any token 1634 * extraction that may have been done 1635 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1636 * to compare on ASCII based substring. 1637 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1638 * strings without newlines within the parser. 1639 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1640 * defined char within the parser. 1641 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1642 * 1643 * NEXT Skip to the next character, this does the proper decoding 1644 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1645 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1646 * CUR_CHAR(l) returns the current unicode character (int), set l 1647 * to the number of xmlChars used for the encoding [0-5]. 1648 * CUR_SCHAR same but operate on a string instead of the context 1649 * COPY_BUF copy the current unicode char to the target buffer, increment 1650 * the index 1651 * GROW, SHRINK handling of input buffers 1652 */ 1653 1654#define RAW (*ctxt->input->cur) 1655#define CUR (*ctxt->input->cur) 1656#define NXT(val) ctxt->input->cur[(val)] 1657#define CUR_PTR ctxt->input->cur 1658 1659#define CMP4( s, c1, c2, c3, c4 ) \ 1660 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1661 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1662#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1663 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1664#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1665 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1666#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1667 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1668#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1669 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1670#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1671 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1672 ((unsigned char *) s)[ 8 ] == c9 ) 1673#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1674 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1675 ((unsigned char *) s)[ 9 ] == c10 ) 1676 1677#define SKIP(val) do { \ 1678 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1679 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1680 if ((*ctxt->input->cur == 0) && \ 1681 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1682 xmlPopInput(ctxt); \ 1683 } while (0) 1684 1685#define SKIPL(val) do { \ 1686 int skipl; \ 1687 for(skipl=0; skipl<val; skipl++) { \ 1688 if (*(ctxt->input->cur) == '\n') { \ 1689 ctxt->input->line++; ctxt->input->col = 1; \ 1690 } else ctxt->input->col++; \ 1691 ctxt->nbChars++; \ 1692 ctxt->input->cur++; \ 1693 } \ 1694 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1695 if ((*ctxt->input->cur == 0) && \ 1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1697 xmlPopInput(ctxt); \ 1698 } while (0) 1699 1700#define SHRINK if ((ctxt->progressive == 0) && \ 1701 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1702 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1703 xmlSHRINK (ctxt); 1704 1705static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1706 xmlParserInputShrink(ctxt->input); 1707 if ((*ctxt->input->cur == 0) && 1708 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1709 xmlPopInput(ctxt); 1710 } 1711 1712#define GROW if ((ctxt->progressive == 0) && \ 1713 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1714 xmlGROW (ctxt); 1715 1716static void xmlGROW (xmlParserCtxtPtr ctxt) { 1717 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1718 if ((*ctxt->input->cur == 0) && 1719 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1720 xmlPopInput(ctxt); 1721} 1722 1723#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1724 1725#define NEXT xmlNextChar(ctxt) 1726 1727#define NEXT1 { \ 1728 ctxt->input->col++; \ 1729 ctxt->input->cur++; \ 1730 ctxt->nbChars++; \ 1731 if (*ctxt->input->cur == 0) \ 1732 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1733 } 1734 1735#define NEXTL(l) do { \ 1736 if (*(ctxt->input->cur) == '\n') { \ 1737 ctxt->input->line++; ctxt->input->col = 1; \ 1738 } else ctxt->input->col++; \ 1739 ctxt->input->cur += l; \ 1740 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1741 } while (0) 1742 1743#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1744#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1745 1746#define COPY_BUF(l,b,i,v) \ 1747 if (l == 1) b[i++] = (xmlChar) v; \ 1748 else i += xmlCopyCharMultiByte(&b[i],v) 1749 1750/** 1751 * xmlSkipBlankChars: 1752 * @ctxt: the XML parser context 1753 * 1754 * skip all blanks character found at that point in the input streams. 1755 * It pops up finished entities in the process if allowable at that point. 1756 * 1757 * Returns the number of space chars skipped 1758 */ 1759 1760int 1761xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1762 int res = 0; 1763 1764 /* 1765 * It's Okay to use CUR/NEXT here since all the blanks are on 1766 * the ASCII range. 1767 */ 1768 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1769 const xmlChar *cur; 1770 /* 1771 * if we are in the document content, go really fast 1772 */ 1773 cur = ctxt->input->cur; 1774 while (IS_BLANK_CH(*cur)) { 1775 if (*cur == '\n') { 1776 ctxt->input->line++; ctxt->input->col = 1; 1777 } 1778 cur++; 1779 res++; 1780 if (*cur == 0) { 1781 ctxt->input->cur = cur; 1782 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1783 cur = ctxt->input->cur; 1784 } 1785 } 1786 ctxt->input->cur = cur; 1787 } else { 1788 int cur; 1789 do { 1790 cur = CUR; 1791 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1792 NEXT; 1793 cur = CUR; 1794 res++; 1795 } 1796 while ((cur == 0) && (ctxt->inputNr > 1) && 1797 (ctxt->instate != XML_PARSER_COMMENT)) { 1798 xmlPopInput(ctxt); 1799 cur = CUR; 1800 } 1801 /* 1802 * Need to handle support of entities branching here 1803 */ 1804 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1805 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1806 } 1807 return(res); 1808} 1809 1810/************************************************************************ 1811 * * 1812 * Commodity functions to handle entities * 1813 * * 1814 ************************************************************************/ 1815 1816/** 1817 * xmlPopInput: 1818 * @ctxt: an XML parser context 1819 * 1820 * xmlPopInput: the current input pointed by ctxt->input came to an end 1821 * pop it and return the next char. 1822 * 1823 * Returns the current xmlChar in the parser context 1824 */ 1825xmlChar 1826xmlPopInput(xmlParserCtxtPtr ctxt) { 1827 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1828 if (xmlParserDebugEntities) 1829 xmlGenericError(xmlGenericErrorContext, 1830 "Popping input %d\n", ctxt->inputNr); 1831 xmlFreeInputStream(inputPop(ctxt)); 1832 if ((*ctxt->input->cur == 0) && 1833 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1834 return(xmlPopInput(ctxt)); 1835 return(CUR); 1836} 1837 1838/** 1839 * xmlPushInput: 1840 * @ctxt: an XML parser context 1841 * @input: an XML parser input fragment (entity, XML fragment ...). 1842 * 1843 * xmlPushInput: switch to a new input stream which is stacked on top 1844 * of the previous one(s). 1845 */ 1846void 1847xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1848 if (input == NULL) return; 1849 1850 if (xmlParserDebugEntities) { 1851 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1852 xmlGenericError(xmlGenericErrorContext, 1853 "%s(%d): ", ctxt->input->filename, 1854 ctxt->input->line); 1855 xmlGenericError(xmlGenericErrorContext, 1856 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1857 } 1858 inputPush(ctxt, input); 1859 GROW; 1860} 1861 1862/** 1863 * xmlParseCharRef: 1864 * @ctxt: an XML parser context 1865 * 1866 * parse Reference declarations 1867 * 1868 * [66] CharRef ::= '&#' [0-9]+ ';' | 1869 * '&#x' [0-9a-fA-F]+ ';' 1870 * 1871 * [ WFC: Legal Character ] 1872 * Characters referred to using character references must match the 1873 * production for Char. 1874 * 1875 * Returns the value parsed (as an int), 0 in case of error 1876 */ 1877int 1878xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1879 unsigned int val = 0; 1880 int count = 0; 1881 unsigned int outofrange = 0; 1882 1883 /* 1884 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1885 */ 1886 if ((RAW == '&') && (NXT(1) == '#') && 1887 (NXT(2) == 'x')) { 1888 SKIP(3); 1889 GROW; 1890 while (RAW != ';') { /* loop blocked by count */ 1891 if (count++ > 20) { 1892 count = 0; 1893 GROW; 1894 } 1895 if ((RAW >= '0') && (RAW <= '9')) 1896 val = val * 16 + (CUR - '0'); 1897 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1898 val = val * 16 + (CUR - 'a') + 10; 1899 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1900 val = val * 16 + (CUR - 'A') + 10; 1901 else { 1902 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1903 val = 0; 1904 break; 1905 } 1906 if (val > 0x10FFFF) 1907 outofrange = val; 1908 1909 NEXT; 1910 count++; 1911 } 1912 if (RAW == ';') { 1913 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1914 ctxt->input->col++; 1915 ctxt->nbChars ++; 1916 ctxt->input->cur++; 1917 } 1918 } else if ((RAW == '&') && (NXT(1) == '#')) { 1919 SKIP(2); 1920 GROW; 1921 while (RAW != ';') { /* loop blocked by count */ 1922 if (count++ > 20) { 1923 count = 0; 1924 GROW; 1925 } 1926 if ((RAW >= '0') && (RAW <= '9')) 1927 val = val * 10 + (CUR - '0'); 1928 else { 1929 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1930 val = 0; 1931 break; 1932 } 1933 if (val > 0x10FFFF) 1934 outofrange = val; 1935 1936 NEXT; 1937 count++; 1938 } 1939 if (RAW == ';') { 1940 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1941 ctxt->input->col++; 1942 ctxt->nbChars ++; 1943 ctxt->input->cur++; 1944 } 1945 } else { 1946 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1947 } 1948 1949 /* 1950 * [ WFC: Legal Character ] 1951 * Characters referred to using character references must match the 1952 * production for Char. 1953 */ 1954 if ((IS_CHAR(val) && (outofrange == 0))) { 1955 return(val); 1956 } else { 1957 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1958 "xmlParseCharRef: invalid xmlChar value %d\n", 1959 val); 1960 } 1961 return(0); 1962} 1963 1964/** 1965 * xmlParseStringCharRef: 1966 * @ctxt: an XML parser context 1967 * @str: a pointer to an index in the string 1968 * 1969 * parse Reference declarations, variant parsing from a string rather 1970 * than an an input flow. 1971 * 1972 * [66] CharRef ::= '&#' [0-9]+ ';' | 1973 * '&#x' [0-9a-fA-F]+ ';' 1974 * 1975 * [ WFC: Legal Character ] 1976 * Characters referred to using character references must match the 1977 * production for Char. 1978 * 1979 * Returns the value parsed (as an int), 0 in case of error, str will be 1980 * updated to the current value of the index 1981 */ 1982static int 1983xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1984 const xmlChar *ptr; 1985 xmlChar cur; 1986 unsigned int val = 0; 1987 unsigned int outofrange = 0; 1988 1989 if ((str == NULL) || (*str == NULL)) return(0); 1990 ptr = *str; 1991 cur = *ptr; 1992 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1993 ptr += 3; 1994 cur = *ptr; 1995 while (cur != ';') { /* Non input consuming loop */ 1996 if ((cur >= '0') && (cur <= '9')) 1997 val = val * 16 + (cur - '0'); 1998 else if ((cur >= 'a') && (cur <= 'f')) 1999 val = val * 16 + (cur - 'a') + 10; 2000 else if ((cur >= 'A') && (cur <= 'F')) 2001 val = val * 16 + (cur - 'A') + 10; 2002 else { 2003 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2004 val = 0; 2005 break; 2006 } 2007 if (val > 0x10FFFF) 2008 outofrange = val; 2009 2010 ptr++; 2011 cur = *ptr; 2012 } 2013 if (cur == ';') 2014 ptr++; 2015 } else if ((cur == '&') && (ptr[1] == '#')){ 2016 ptr += 2; 2017 cur = *ptr; 2018 while (cur != ';') { /* Non input consuming loops */ 2019 if ((cur >= '0') && (cur <= '9')) 2020 val = val * 10 + (cur - '0'); 2021 else { 2022 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2023 val = 0; 2024 break; 2025 } 2026 if (val > 0x10FFFF) 2027 outofrange = val; 2028 2029 ptr++; 2030 cur = *ptr; 2031 } 2032 if (cur == ';') 2033 ptr++; 2034 } else { 2035 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2036 return(0); 2037 } 2038 *str = ptr; 2039 2040 /* 2041 * [ WFC: Legal Character ] 2042 * Characters referred to using character references must match the 2043 * production for Char. 2044 */ 2045 if ((IS_CHAR(val) && (outofrange == 0))) { 2046 return(val); 2047 } else { 2048 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2049 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2050 val); 2051 } 2052 return(0); 2053} 2054 2055/** 2056 * xmlNewBlanksWrapperInputStream: 2057 * @ctxt: an XML parser context 2058 * @entity: an Entity pointer 2059 * 2060 * Create a new input stream for wrapping 2061 * blanks around a PEReference 2062 * 2063 * Returns the new input stream or NULL 2064 */ 2065 2066static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2067 2068static xmlParserInputPtr 2069xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2070 xmlParserInputPtr input; 2071 xmlChar *buffer; 2072 size_t length; 2073 if (entity == NULL) { 2074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2075 "xmlNewBlanksWrapperInputStream entity\n"); 2076 return(NULL); 2077 } 2078 if (xmlParserDebugEntities) 2079 xmlGenericError(xmlGenericErrorContext, 2080 "new blanks wrapper for entity: %s\n", entity->name); 2081 input = xmlNewInputStream(ctxt); 2082 if (input == NULL) { 2083 return(NULL); 2084 } 2085 length = xmlStrlen(entity->name) + 5; 2086 buffer = xmlMallocAtomic(length); 2087 if (buffer == NULL) { 2088 xmlErrMemory(ctxt, NULL); 2089 xmlFree(input); 2090 return(NULL); 2091 } 2092 buffer [0] = ' '; 2093 buffer [1] = '%'; 2094 buffer [length-3] = ';'; 2095 buffer [length-2] = ' '; 2096 buffer [length-1] = 0; 2097 memcpy(buffer + 2, entity->name, length - 5); 2098 input->free = deallocblankswrapper; 2099 input->base = buffer; 2100 input->cur = buffer; 2101 input->length = length; 2102 input->end = &buffer[length]; 2103 return(input); 2104} 2105 2106/** 2107 * xmlParserHandlePEReference: 2108 * @ctxt: the parser context 2109 * 2110 * [69] PEReference ::= '%' Name ';' 2111 * 2112 * [ WFC: No Recursion ] 2113 * A parsed entity must not contain a recursive 2114 * reference to itself, either directly or indirectly. 2115 * 2116 * [ WFC: Entity Declared ] 2117 * In a document without any DTD, a document with only an internal DTD 2118 * subset which contains no parameter entity references, or a document 2119 * with "standalone='yes'", ... ... The declaration of a parameter 2120 * entity must precede any reference to it... 2121 * 2122 * [ VC: Entity Declared ] 2123 * In a document with an external subset or external parameter entities 2124 * with "standalone='no'", ... ... The declaration of a parameter entity 2125 * must precede any reference to it... 2126 * 2127 * [ WFC: In DTD ] 2128 * Parameter-entity references may only appear in the DTD. 2129 * NOTE: misleading but this is handled. 2130 * 2131 * A PEReference may have been detected in the current input stream 2132 * the handling is done accordingly to 2133 * http://www.w3.org/TR/REC-xml#entproc 2134 * i.e. 2135 * - Included in literal in entity values 2136 * - Included as Parameter Entity reference within DTDs 2137 */ 2138void 2139xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2140 const xmlChar *name; 2141 xmlEntityPtr entity = NULL; 2142 xmlParserInputPtr input; 2143 2144 if (RAW != '%') return; 2145 switch(ctxt->instate) { 2146 case XML_PARSER_CDATA_SECTION: 2147 return; 2148 case XML_PARSER_COMMENT: 2149 return; 2150 case XML_PARSER_START_TAG: 2151 return; 2152 case XML_PARSER_END_TAG: 2153 return; 2154 case XML_PARSER_EOF: 2155 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2156 return; 2157 case XML_PARSER_PROLOG: 2158 case XML_PARSER_START: 2159 case XML_PARSER_MISC: 2160 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2161 return; 2162 case XML_PARSER_ENTITY_DECL: 2163 case XML_PARSER_CONTENT: 2164 case XML_PARSER_ATTRIBUTE_VALUE: 2165 case XML_PARSER_PI: 2166 case XML_PARSER_SYSTEM_LITERAL: 2167 case XML_PARSER_PUBLIC_LITERAL: 2168 /* we just ignore it there */ 2169 return; 2170 case XML_PARSER_EPILOG: 2171 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2172 return; 2173 case XML_PARSER_ENTITY_VALUE: 2174 /* 2175 * NOTE: in the case of entity values, we don't do the 2176 * substitution here since we need the literal 2177 * entity value to be able to save the internal 2178 * subset of the document. 2179 * This will be handled by xmlStringDecodeEntities 2180 */ 2181 return; 2182 case XML_PARSER_DTD: 2183 /* 2184 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2185 * In the internal DTD subset, parameter-entity references 2186 * can occur only where markup declarations can occur, not 2187 * within markup declarations. 2188 * In that case this is handled in xmlParseMarkupDecl 2189 */ 2190 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2191 return; 2192 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2193 return; 2194 break; 2195 case XML_PARSER_IGNORE: 2196 return; 2197 } 2198 2199 NEXT; 2200 name = xmlParseName(ctxt); 2201 if (xmlParserDebugEntities) 2202 xmlGenericError(xmlGenericErrorContext, 2203 "PEReference: %s\n", name); 2204 if (name == NULL) { 2205 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2206 } else { 2207 if (RAW == ';') { 2208 NEXT; 2209 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2210 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2211 if (entity == NULL) { 2212 2213 /* 2214 * [ WFC: Entity Declared ] 2215 * In a document without any DTD, a document with only an 2216 * internal DTD subset which contains no parameter entity 2217 * references, or a document with "standalone='yes'", ... 2218 * ... The declaration of a parameter entity must precede 2219 * any reference to it... 2220 */ 2221 if ((ctxt->standalone == 1) || 2222 ((ctxt->hasExternalSubset == 0) && 2223 (ctxt->hasPErefs == 0))) { 2224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2225 "PEReference: %%%s; not found\n", name); 2226 } else { 2227 /* 2228 * [ VC: Entity Declared ] 2229 * In a document with an external subset or external 2230 * parameter entities with "standalone='no'", ... 2231 * ... The declaration of a parameter entity must precede 2232 * any reference to it... 2233 */ 2234 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2235 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2236 "PEReference: %%%s; not found\n", 2237 name); 2238 } else 2239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2240 "PEReference: %%%s; not found\n", 2241 name, NULL); 2242 ctxt->valid = 0; 2243 } 2244 } else if (ctxt->input->free != deallocblankswrapper) { 2245 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2246 xmlPushInput(ctxt, input); 2247 } else { 2248 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2249 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2250 xmlChar start[4]; 2251 xmlCharEncoding enc; 2252 2253 /* 2254 * handle the extra spaces added before and after 2255 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2256 * this is done independently. 2257 */ 2258 input = xmlNewEntityInputStream(ctxt, entity); 2259 xmlPushInput(ctxt, input); 2260 2261 /* 2262 * Get the 4 first bytes and decode the charset 2263 * if enc != XML_CHAR_ENCODING_NONE 2264 * plug some encoding conversion routines. 2265 * Note that, since we may have some non-UTF8 2266 * encoding (like UTF16, bug 135229), the 'length' 2267 * is not known, but we can calculate based upon 2268 * the amount of data in the buffer. 2269 */ 2270 GROW 2271 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2272 start[0] = RAW; 2273 start[1] = NXT(1); 2274 start[2] = NXT(2); 2275 start[3] = NXT(3); 2276 enc = xmlDetectCharEncoding(start, 4); 2277 if (enc != XML_CHAR_ENCODING_NONE) { 2278 xmlSwitchEncoding(ctxt, enc); 2279 } 2280 } 2281 2282 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2283 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2284 (IS_BLANK_CH(NXT(5)))) { 2285 xmlParseTextDecl(ctxt); 2286 } 2287 } else { 2288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2289 "PEReference: %s is not a parameter entity\n", 2290 name); 2291 } 2292 } 2293 } else { 2294 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2295 } 2296 } 2297} 2298 2299/* 2300 * Macro used to grow the current buffer. 2301 */ 2302#define growBuffer(buffer) { \ 2303 xmlChar *tmp; \ 2304 buffer##_size *= 2; \ 2305 tmp = (xmlChar *) \ 2306 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2307 if (tmp == NULL) goto mem_error; \ 2308 buffer = tmp; \ 2309} 2310 2311/** 2312 * xmlStringLenDecodeEntities: 2313 * @ctxt: the parser context 2314 * @str: the input string 2315 * @len: the string length 2316 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2317 * @end: an end marker xmlChar, 0 if none 2318 * @end2: an end marker xmlChar, 0 if none 2319 * @end3: an end marker xmlChar, 0 if none 2320 * 2321 * Takes a entity string content and process to do the adequate substitutions. 2322 * 2323 * [67] Reference ::= EntityRef | CharRef 2324 * 2325 * [69] PEReference ::= '%' Name ';' 2326 * 2327 * Returns A newly allocated string with the substitution done. The caller 2328 * must deallocate it ! 2329 */ 2330xmlChar * 2331xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2332 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2333 xmlChar *buffer = NULL; 2334 int buffer_size = 0; 2335 2336 xmlChar *current = NULL; 2337 xmlChar *rep = NULL; 2338 const xmlChar *last; 2339 xmlEntityPtr ent; 2340 int c,l; 2341 int nbchars = 0; 2342 2343 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2344 return(NULL); 2345 last = str + len; 2346 2347 if (ctxt->depth > 40) { 2348 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2349 return(NULL); 2350 } 2351 2352 /* 2353 * allocate a translation buffer. 2354 */ 2355 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2356 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2357 if (buffer == NULL) goto mem_error; 2358 2359 /* 2360 * OK loop until we reach one of the ending char or a size limit. 2361 * we are operating on already parsed values. 2362 */ 2363 if (str < last) 2364 c = CUR_SCHAR(str, l); 2365 else 2366 c = 0; 2367 while ((c != 0) && (c != end) && /* non input consuming loop */ 2368 (c != end2) && (c != end3)) { 2369 2370 if (c == 0) break; 2371 if ((c == '&') && (str[1] == '#')) { 2372 int val = xmlParseStringCharRef(ctxt, &str); 2373 if (val != 0) { 2374 COPY_BUF(0,buffer,nbchars,val); 2375 } 2376 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2377 growBuffer(buffer); 2378 } 2379 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2380 if (xmlParserDebugEntities) 2381 xmlGenericError(xmlGenericErrorContext, 2382 "String decoding Entity Reference: %.30s\n", 2383 str); 2384 ent = xmlParseStringEntityRef(ctxt, &str); 2385 if ((ent != NULL) && 2386 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2387 if (ent->content != NULL) { 2388 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2389 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2390 growBuffer(buffer); 2391 } 2392 } else { 2393 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2394 "predefined entity has no content\n"); 2395 } 2396 } else if ((ent != NULL) && (ent->content != NULL)) { 2397 ctxt->depth++; 2398 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2399 0, 0, 0); 2400 ctxt->depth--; 2401 if (rep != NULL) { 2402 current = rep; 2403 while (*current != 0) { /* non input consuming loop */ 2404 buffer[nbchars++] = *current++; 2405 if (nbchars > 2406 buffer_size - XML_PARSER_BUFFER_SIZE) { 2407 growBuffer(buffer); 2408 } 2409 } 2410 xmlFree(rep); 2411 rep = NULL; 2412 } 2413 } else if (ent != NULL) { 2414 int i = xmlStrlen(ent->name); 2415 const xmlChar *cur = ent->name; 2416 2417 buffer[nbchars++] = '&'; 2418 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2419 growBuffer(buffer); 2420 } 2421 for (;i > 0;i--) 2422 buffer[nbchars++] = *cur++; 2423 buffer[nbchars++] = ';'; 2424 } 2425 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2426 if (xmlParserDebugEntities) 2427 xmlGenericError(xmlGenericErrorContext, 2428 "String decoding PE Reference: %.30s\n", str); 2429 ent = xmlParseStringPEReference(ctxt, &str); 2430 if (ent != NULL) { 2431 if (ent->content == NULL) { 2432 if (xmlLoadEntityContent(ctxt, ent) < 0) { 2433 } 2434 } 2435 ctxt->depth++; 2436 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2437 0, 0, 0); 2438 ctxt->depth--; 2439 if (rep != NULL) { 2440 current = rep; 2441 while (*current != 0) { /* non input consuming loop */ 2442 buffer[nbchars++] = *current++; 2443 if (nbchars > 2444 buffer_size - XML_PARSER_BUFFER_SIZE) { 2445 growBuffer(buffer); 2446 } 2447 } 2448 xmlFree(rep); 2449 rep = NULL; 2450 } 2451 } 2452 } else { 2453 COPY_BUF(l,buffer,nbchars,c); 2454 str += l; 2455 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2456 growBuffer(buffer); 2457 } 2458 } 2459 if (str < last) 2460 c = CUR_SCHAR(str, l); 2461 else 2462 c = 0; 2463 } 2464 buffer[nbchars++] = 0; 2465 return(buffer); 2466 2467mem_error: 2468 xmlErrMemory(ctxt, NULL); 2469 if (rep != NULL) 2470 xmlFree(rep); 2471 if (buffer != NULL) 2472 xmlFree(buffer); 2473 return(NULL); 2474} 2475 2476/** 2477 * xmlStringDecodeEntities: 2478 * @ctxt: the parser context 2479 * @str: the input string 2480 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2481 * @end: an end marker xmlChar, 0 if none 2482 * @end2: an end marker xmlChar, 0 if none 2483 * @end3: an end marker xmlChar, 0 if none 2484 * 2485 * Takes a entity string content and process to do the adequate substitutions. 2486 * 2487 * [67] Reference ::= EntityRef | CharRef 2488 * 2489 * [69] PEReference ::= '%' Name ';' 2490 * 2491 * Returns A newly allocated string with the substitution done. The caller 2492 * must deallocate it ! 2493 */ 2494xmlChar * 2495xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2496 xmlChar end, xmlChar end2, xmlChar end3) { 2497 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2498 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2499 end, end2, end3)); 2500} 2501 2502/************************************************************************ 2503 * * 2504 * Commodity functions, cleanup needed ? * 2505 * * 2506 ************************************************************************/ 2507 2508/** 2509 * areBlanks: 2510 * @ctxt: an XML parser context 2511 * @str: a xmlChar * 2512 * @len: the size of @str 2513 * @blank_chars: we know the chars are blanks 2514 * 2515 * Is this a sequence of blank chars that one can ignore ? 2516 * 2517 * Returns 1 if ignorable 0 otherwise. 2518 */ 2519 2520static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2521 int blank_chars) { 2522 int i, ret; 2523 xmlNodePtr lastChild; 2524 2525 /* 2526 * Don't spend time trying to differentiate them, the same callback is 2527 * used ! 2528 */ 2529 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2530 return(0); 2531 2532 /* 2533 * Check for xml:space value. 2534 */ 2535 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2536 (*(ctxt->space) == -2)) 2537 return(0); 2538 2539 /* 2540 * Check that the string is made of blanks 2541 */ 2542 if (blank_chars == 0) { 2543 for (i = 0;i < len;i++) 2544 if (!(IS_BLANK_CH(str[i]))) return(0); 2545 } 2546 2547 /* 2548 * Look if the element is mixed content in the DTD if available 2549 */ 2550 if (ctxt->node == NULL) return(0); 2551 if (ctxt->myDoc != NULL) { 2552 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2553 if (ret == 0) return(1); 2554 if (ret == 1) return(0); 2555 } 2556 2557 /* 2558 * Otherwise, heuristic :-\ 2559 */ 2560 if ((RAW != '<') && (RAW != 0xD)) return(0); 2561 if ((ctxt->node->children == NULL) && 2562 (RAW == '<') && (NXT(1) == '/')) return(0); 2563 2564 lastChild = xmlGetLastChild(ctxt->node); 2565 if (lastChild == NULL) { 2566 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2567 (ctxt->node->content != NULL)) return(0); 2568 } else if (xmlNodeIsText(lastChild)) 2569 return(0); 2570 else if ((ctxt->node->children != NULL) && 2571 (xmlNodeIsText(ctxt->node->children))) 2572 return(0); 2573 return(1); 2574} 2575 2576/************************************************************************ 2577 * * 2578 * Extra stuff for namespace support * 2579 * Relates to http://www.w3.org/TR/WD-xml-names * 2580 * * 2581 ************************************************************************/ 2582 2583/** 2584 * xmlSplitQName: 2585 * @ctxt: an XML parser context 2586 * @name: an XML parser context 2587 * @prefix: a xmlChar ** 2588 * 2589 * parse an UTF8 encoded XML qualified name string 2590 * 2591 * [NS 5] QName ::= (Prefix ':')? LocalPart 2592 * 2593 * [NS 6] Prefix ::= NCName 2594 * 2595 * [NS 7] LocalPart ::= NCName 2596 * 2597 * Returns the local part, and prefix is updated 2598 * to get the Prefix if any. 2599 */ 2600 2601xmlChar * 2602xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2603 xmlChar buf[XML_MAX_NAMELEN + 5]; 2604 xmlChar *buffer = NULL; 2605 int len = 0; 2606 int max = XML_MAX_NAMELEN; 2607 xmlChar *ret = NULL; 2608 const xmlChar *cur = name; 2609 int c; 2610 2611 if (prefix == NULL) return(NULL); 2612 *prefix = NULL; 2613 2614 if (cur == NULL) return(NULL); 2615 2616#ifndef XML_XML_NAMESPACE 2617 /* xml: prefix is not really a namespace */ 2618 if ((cur[0] == 'x') && (cur[1] == 'm') && 2619 (cur[2] == 'l') && (cur[3] == ':')) 2620 return(xmlStrdup(name)); 2621#endif 2622 2623 /* nasty but well=formed */ 2624 if (cur[0] == ':') 2625 return(xmlStrdup(name)); 2626 2627 c = *cur++; 2628 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2629 buf[len++] = c; 2630 c = *cur++; 2631 } 2632 if (len >= max) { 2633 /* 2634 * Okay someone managed to make a huge name, so he's ready to pay 2635 * for the processing speed. 2636 */ 2637 max = len * 2; 2638 2639 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2640 if (buffer == NULL) { 2641 xmlErrMemory(ctxt, NULL); 2642 return(NULL); 2643 } 2644 memcpy(buffer, buf, len); 2645 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2646 if (len + 10 > max) { 2647 xmlChar *tmp; 2648 2649 max *= 2; 2650 tmp = (xmlChar *) xmlRealloc(buffer, 2651 max * sizeof(xmlChar)); 2652 if (tmp == NULL) { 2653 xmlFree(buffer); 2654 xmlErrMemory(ctxt, NULL); 2655 return(NULL); 2656 } 2657 buffer = tmp; 2658 } 2659 buffer[len++] = c; 2660 c = *cur++; 2661 } 2662 buffer[len] = 0; 2663 } 2664 2665 if ((c == ':') && (*cur == 0)) { 2666 if (buffer != NULL) 2667 xmlFree(buffer); 2668 *prefix = NULL; 2669 return(xmlStrdup(name)); 2670 } 2671 2672 if (buffer == NULL) 2673 ret = xmlStrndup(buf, len); 2674 else { 2675 ret = buffer; 2676 buffer = NULL; 2677 max = XML_MAX_NAMELEN; 2678 } 2679 2680 2681 if (c == ':') { 2682 c = *cur; 2683 *prefix = ret; 2684 if (c == 0) { 2685 return(xmlStrndup(BAD_CAST "", 0)); 2686 } 2687 len = 0; 2688 2689 /* 2690 * Check that the first character is proper to start 2691 * a new name 2692 */ 2693 if (!(((c >= 0x61) && (c <= 0x7A)) || 2694 ((c >= 0x41) && (c <= 0x5A)) || 2695 (c == '_') || (c == ':'))) { 2696 int l; 2697 int first = CUR_SCHAR(cur, l); 2698 2699 if (!IS_LETTER(first) && (first != '_')) { 2700 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2701 "Name %s is not XML Namespace compliant\n", 2702 name); 2703 } 2704 } 2705 cur++; 2706 2707 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2708 buf[len++] = c; 2709 c = *cur++; 2710 } 2711 if (len >= max) { 2712 /* 2713 * Okay someone managed to make a huge name, so he's ready to pay 2714 * for the processing speed. 2715 */ 2716 max = len * 2; 2717 2718 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2719 if (buffer == NULL) { 2720 xmlErrMemory(ctxt, NULL); 2721 return(NULL); 2722 } 2723 memcpy(buffer, buf, len); 2724 while (c != 0) { /* tested bigname2.xml */ 2725 if (len + 10 > max) { 2726 xmlChar *tmp; 2727 2728 max *= 2; 2729 tmp = (xmlChar *) xmlRealloc(buffer, 2730 max * sizeof(xmlChar)); 2731 if (tmp == NULL) { 2732 xmlErrMemory(ctxt, NULL); 2733 xmlFree(buffer); 2734 return(NULL); 2735 } 2736 buffer = tmp; 2737 } 2738 buffer[len++] = c; 2739 c = *cur++; 2740 } 2741 buffer[len] = 0; 2742 } 2743 2744 if (buffer == NULL) 2745 ret = xmlStrndup(buf, len); 2746 else { 2747 ret = buffer; 2748 } 2749 } 2750 2751 return(ret); 2752} 2753 2754/************************************************************************ 2755 * * 2756 * The parser itself * 2757 * Relates to http://www.w3.org/TR/REC-xml * 2758 * * 2759 ************************************************************************/ 2760 2761static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2762static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2763 int *len, int *alloc, int normalize); 2764 2765/** 2766 * xmlParseName: 2767 * @ctxt: an XML parser context 2768 * 2769 * parse an XML name. 2770 * 2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2772 * CombiningChar | Extender 2773 * 2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2775 * 2776 * [6] Names ::= Name (#x20 Name)* 2777 * 2778 * Returns the Name parsed or NULL 2779 */ 2780 2781const xmlChar * 2782xmlParseName(xmlParserCtxtPtr ctxt) { 2783 const xmlChar *in; 2784 const xmlChar *ret; 2785 int count = 0; 2786 2787 GROW; 2788 2789 /* 2790 * Accelerator for simple ASCII names 2791 */ 2792 in = ctxt->input->cur; 2793 if (((*in >= 0x61) && (*in <= 0x7A)) || 2794 ((*in >= 0x41) && (*in <= 0x5A)) || 2795 (*in == '_') || (*in == ':')) { 2796 in++; 2797 while (((*in >= 0x61) && (*in <= 0x7A)) || 2798 ((*in >= 0x41) && (*in <= 0x5A)) || 2799 ((*in >= 0x30) && (*in <= 0x39)) || 2800 (*in == '_') || (*in == '-') || 2801 (*in == ':') || (*in == '.')) 2802 in++; 2803 if ((*in > 0) && (*in < 0x80)) { 2804 count = in - ctxt->input->cur; 2805 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2806 ctxt->input->cur = in; 2807 ctxt->nbChars += count; 2808 ctxt->input->col += count; 2809 if (ret == NULL) 2810 xmlErrMemory(ctxt, NULL); 2811 return(ret); 2812 } 2813 } 2814 return(xmlParseNameComplex(ctxt)); 2815} 2816 2817/** 2818 * xmlParseNameAndCompare: 2819 * @ctxt: an XML parser context 2820 * 2821 * parse an XML name and compares for match 2822 * (specialized for endtag parsing) 2823 * 2824 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2825 * and the name for mismatch 2826 */ 2827 2828static const xmlChar * 2829xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2830 register const xmlChar *cmp = other; 2831 register const xmlChar *in; 2832 const xmlChar *ret; 2833 2834 GROW; 2835 2836 in = ctxt->input->cur; 2837 while (*in != 0 && *in == *cmp) { 2838 ++in; 2839 ++cmp; 2840 ctxt->input->col++; 2841 } 2842 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2843 /* success */ 2844 ctxt->input->cur = in; 2845 return (const xmlChar*) 1; 2846 } 2847 /* failure (or end of input buffer), check with full function */ 2848 ret = xmlParseName (ctxt); 2849 /* strings coming from the dictionnary direct compare possible */ 2850 if (ret == other) { 2851 return (const xmlChar*) 1; 2852 } 2853 return ret; 2854} 2855 2856static const xmlChar * 2857xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2858 int len = 0, l; 2859 int c; 2860 int count = 0; 2861 2862 /* 2863 * Handler for more complex cases 2864 */ 2865 GROW; 2866 c = CUR_CHAR(l); 2867 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2868 (!IS_LETTER(c) && (c != '_') && 2869 (c != ':'))) { 2870 return(NULL); 2871 } 2872 2873 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2874 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2875 (c == '.') || (c == '-') || 2876 (c == '_') || (c == ':') || 2877 (IS_COMBINING(c)) || 2878 (IS_EXTENDER(c)))) { 2879 if (count++ > 100) { 2880 count = 0; 2881 GROW; 2882 } 2883 len += l; 2884 NEXTL(l); 2885 c = CUR_CHAR(l); 2886 } 2887 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 2888 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 2889 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2890} 2891 2892/** 2893 * xmlParseStringName: 2894 * @ctxt: an XML parser context 2895 * @str: a pointer to the string pointer (IN/OUT) 2896 * 2897 * parse an XML name. 2898 * 2899 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2900 * CombiningChar | Extender 2901 * 2902 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2903 * 2904 * [6] Names ::= Name (#x20 Name)* 2905 * 2906 * Returns the Name parsed or NULL. The @str pointer 2907 * is updated to the current location in the string. 2908 */ 2909 2910static xmlChar * 2911xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2912 xmlChar buf[XML_MAX_NAMELEN + 5]; 2913 const xmlChar *cur = *str; 2914 int len = 0, l; 2915 int c; 2916 2917 c = CUR_SCHAR(cur, l); 2918 if (!IS_LETTER(c) && (c != '_') && 2919 (c != ':')) { 2920 return(NULL); 2921 } 2922 2923 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2924 (c == '.') || (c == '-') || 2925 (c == '_') || (c == ':') || 2926 (IS_COMBINING(c)) || 2927 (IS_EXTENDER(c))) { 2928 COPY_BUF(l,buf,len,c); 2929 cur += l; 2930 c = CUR_SCHAR(cur, l); 2931 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2932 /* 2933 * Okay someone managed to make a huge name, so he's ready to pay 2934 * for the processing speed. 2935 */ 2936 xmlChar *buffer; 2937 int max = len * 2; 2938 2939 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2940 if (buffer == NULL) { 2941 xmlErrMemory(ctxt, NULL); 2942 return(NULL); 2943 } 2944 memcpy(buffer, buf, len); 2945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2946 /* test bigentname.xml */ 2947 (c == '.') || (c == '-') || 2948 (c == '_') || (c == ':') || 2949 (IS_COMBINING(c)) || 2950 (IS_EXTENDER(c))) { 2951 if (len + 10 > max) { 2952 xmlChar *tmp; 2953 max *= 2; 2954 tmp = (xmlChar *) xmlRealloc(buffer, 2955 max * sizeof(xmlChar)); 2956 if (tmp == NULL) { 2957 xmlErrMemory(ctxt, NULL); 2958 xmlFree(buffer); 2959 return(NULL); 2960 } 2961 buffer = tmp; 2962 } 2963 COPY_BUF(l,buffer,len,c); 2964 cur += l; 2965 c = CUR_SCHAR(cur, l); 2966 } 2967 buffer[len] = 0; 2968 *str = cur; 2969 return(buffer); 2970 } 2971 } 2972 *str = cur; 2973 return(xmlStrndup(buf, len)); 2974} 2975 2976/** 2977 * xmlParseNmtoken: 2978 * @ctxt: an XML parser context 2979 * 2980 * parse an XML Nmtoken. 2981 * 2982 * [7] Nmtoken ::= (NameChar)+ 2983 * 2984 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 2985 * 2986 * Returns the Nmtoken parsed or NULL 2987 */ 2988 2989xmlChar * 2990xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2991 xmlChar buf[XML_MAX_NAMELEN + 5]; 2992 int len = 0, l; 2993 int c; 2994 int count = 0; 2995 2996 GROW; 2997 c = CUR_CHAR(l); 2998 2999 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 3000 (c == '.') || (c == '-') || 3001 (c == '_') || (c == ':') || 3002 (IS_COMBINING(c)) || 3003 (IS_EXTENDER(c))) { 3004 if (count++ > 100) { 3005 count = 0; 3006 GROW; 3007 } 3008 COPY_BUF(l,buf,len,c); 3009 NEXTL(l); 3010 c = CUR_CHAR(l); 3011 if (len >= XML_MAX_NAMELEN) { 3012 /* 3013 * Okay someone managed to make a huge token, so he's ready to pay 3014 * for the processing speed. 3015 */ 3016 xmlChar *buffer; 3017 int max = len * 2; 3018 3019 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3020 if (buffer == NULL) { 3021 xmlErrMemory(ctxt, NULL); 3022 return(NULL); 3023 } 3024 memcpy(buffer, buf, len); 3025 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 3026 (c == '.') || (c == '-') || 3027 (c == '_') || (c == ':') || 3028 (IS_COMBINING(c)) || 3029 (IS_EXTENDER(c))) { 3030 if (count++ > 100) { 3031 count = 0; 3032 GROW; 3033 } 3034 if (len + 10 > max) { 3035 xmlChar *tmp; 3036 3037 max *= 2; 3038 tmp = (xmlChar *) xmlRealloc(buffer, 3039 max * sizeof(xmlChar)); 3040 if (tmp == NULL) { 3041 xmlErrMemory(ctxt, NULL); 3042 xmlFree(buffer); 3043 return(NULL); 3044 } 3045 buffer = tmp; 3046 } 3047 COPY_BUF(l,buffer,len,c); 3048 NEXTL(l); 3049 c = CUR_CHAR(l); 3050 } 3051 buffer[len] = 0; 3052 return(buffer); 3053 } 3054 } 3055 if (len == 0) 3056 return(NULL); 3057 return(xmlStrndup(buf, len)); 3058} 3059 3060/** 3061 * xmlParseEntityValue: 3062 * @ctxt: an XML parser context 3063 * @orig: if non-NULL store a copy of the original entity value 3064 * 3065 * parse a value for ENTITY declarations 3066 * 3067 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3068 * "'" ([^%&'] | PEReference | Reference)* "'" 3069 * 3070 * Returns the EntityValue parsed with reference substituted or NULL 3071 */ 3072 3073xmlChar * 3074xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3075 xmlChar *buf = NULL; 3076 int len = 0; 3077 int size = XML_PARSER_BUFFER_SIZE; 3078 int c, l; 3079 xmlChar stop; 3080 xmlChar *ret = NULL; 3081 const xmlChar *cur = NULL; 3082 xmlParserInputPtr input; 3083 3084 if (RAW == '"') stop = '"'; 3085 else if (RAW == '\'') stop = '\''; 3086 else { 3087 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3088 return(NULL); 3089 } 3090 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3091 if (buf == NULL) { 3092 xmlErrMemory(ctxt, NULL); 3093 return(NULL); 3094 } 3095 3096 /* 3097 * The content of the entity definition is copied in a buffer. 3098 */ 3099 3100 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3101 input = ctxt->input; 3102 GROW; 3103 NEXT; 3104 c = CUR_CHAR(l); 3105 /* 3106 * NOTE: 4.4.5 Included in Literal 3107 * When a parameter entity reference appears in a literal entity 3108 * value, ... a single or double quote character in the replacement 3109 * text is always treated as a normal data character and will not 3110 * terminate the literal. 3111 * In practice it means we stop the loop only when back at parsing 3112 * the initial entity and the quote is found 3113 */ 3114 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3115 (ctxt->input != input))) { 3116 if (len + 5 >= size) { 3117 xmlChar *tmp; 3118 3119 size *= 2; 3120 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3121 if (tmp == NULL) { 3122 xmlErrMemory(ctxt, NULL); 3123 xmlFree(buf); 3124 return(NULL); 3125 } 3126 buf = tmp; 3127 } 3128 COPY_BUF(l,buf,len,c); 3129 NEXTL(l); 3130 /* 3131 * Pop-up of finished entities. 3132 */ 3133 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3134 xmlPopInput(ctxt); 3135 3136 GROW; 3137 c = CUR_CHAR(l); 3138 if (c == 0) { 3139 GROW; 3140 c = CUR_CHAR(l); 3141 } 3142 } 3143 buf[len] = 0; 3144 3145 /* 3146 * Raise problem w.r.t. '&' and '%' being used in non-entities 3147 * reference constructs. Note Charref will be handled in 3148 * xmlStringDecodeEntities() 3149 */ 3150 cur = buf; 3151 while (*cur != 0) { /* non input consuming */ 3152 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3153 xmlChar *name; 3154 xmlChar tmp = *cur; 3155 3156 cur++; 3157 name = xmlParseStringName(ctxt, &cur); 3158 if ((name == NULL) || (*cur != ';')) { 3159 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3160 "EntityValue: '%c' forbidden except for entities references\n", 3161 tmp); 3162 } 3163 if ((tmp == '%') && (ctxt->inSubset == 1) && 3164 (ctxt->inputNr == 1)) { 3165 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3166 } 3167 if (name != NULL) 3168 xmlFree(name); 3169 if (*cur == 0) 3170 break; 3171 } 3172 cur++; 3173 } 3174 3175 /* 3176 * Then PEReference entities are substituted. 3177 */ 3178 if (c != stop) { 3179 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3180 xmlFree(buf); 3181 } else { 3182 NEXT; 3183 /* 3184 * NOTE: 4.4.7 Bypassed 3185 * When a general entity reference appears in the EntityValue in 3186 * an entity declaration, it is bypassed and left as is. 3187 * so XML_SUBSTITUTE_REF is not set here. 3188 */ 3189 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3190 0, 0, 0); 3191 if (orig != NULL) 3192 *orig = buf; 3193 else 3194 xmlFree(buf); 3195 } 3196 3197 return(ret); 3198} 3199 3200/** 3201 * xmlParseAttValueComplex: 3202 * @ctxt: an XML parser context 3203 * @len: the resulting attribute len 3204 * @normalize: wether to apply the inner normalization 3205 * 3206 * parse a value for an attribute, this is the fallback function 3207 * of xmlParseAttValue() when the attribute parsing requires handling 3208 * of non-ASCII characters, or normalization compaction. 3209 * 3210 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3211 */ 3212static xmlChar * 3213xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3214 xmlChar limit = 0; 3215 xmlChar *buf = NULL; 3216 xmlChar *rep = NULL; 3217 int len = 0; 3218 int buf_size = 0; 3219 int c, l, in_space = 0; 3220 xmlChar *current = NULL; 3221 xmlEntityPtr ent; 3222 3223 if (NXT(0) == '"') { 3224 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3225 limit = '"'; 3226 NEXT; 3227 } else if (NXT(0) == '\'') { 3228 limit = '\''; 3229 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3230 NEXT; 3231 } else { 3232 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3233 return(NULL); 3234 } 3235 3236 /* 3237 * allocate a translation buffer. 3238 */ 3239 buf_size = XML_PARSER_BUFFER_SIZE; 3240 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3241 if (buf == NULL) goto mem_error; 3242 3243 /* 3244 * OK loop until we reach one of the ending char or a size limit. 3245 */ 3246 c = CUR_CHAR(l); 3247 while ((NXT(0) != limit) && /* checked */ 3248 (IS_CHAR(c)) && (c != '<')) { 3249 if (c == 0) break; 3250 if (c == '&') { 3251 in_space = 0; 3252 if (NXT(1) == '#') { 3253 int val = xmlParseCharRef(ctxt); 3254 3255 if (val == '&') { 3256 if (ctxt->replaceEntities) { 3257 if (len > buf_size - 10) { 3258 growBuffer(buf); 3259 } 3260 buf[len++] = '&'; 3261 } else { 3262 /* 3263 * The reparsing will be done in xmlStringGetNodeList() 3264 * called by the attribute() function in SAX.c 3265 */ 3266 if (len > buf_size - 10) { 3267 growBuffer(buf); 3268 } 3269 buf[len++] = '&'; 3270 buf[len++] = '#'; 3271 buf[len++] = '3'; 3272 buf[len++] = '8'; 3273 buf[len++] = ';'; 3274 } 3275 } else if (val != 0) { 3276 if (len > buf_size - 10) { 3277 growBuffer(buf); 3278 } 3279 len += xmlCopyChar(0, &buf[len], val); 3280 } 3281 } else { 3282 ent = xmlParseEntityRef(ctxt); 3283 if ((ent != NULL) && 3284 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3285 if (len > buf_size - 10) { 3286 growBuffer(buf); 3287 } 3288 if ((ctxt->replaceEntities == 0) && 3289 (ent->content[0] == '&')) { 3290 buf[len++] = '&'; 3291 buf[len++] = '#'; 3292 buf[len++] = '3'; 3293 buf[len++] = '8'; 3294 buf[len++] = ';'; 3295 } else { 3296 buf[len++] = ent->content[0]; 3297 } 3298 } else if ((ent != NULL) && 3299 (ctxt->replaceEntities != 0)) { 3300 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3301 rep = xmlStringDecodeEntities(ctxt, ent->content, 3302 XML_SUBSTITUTE_REF, 3303 0, 0, 0); 3304 if (rep != NULL) { 3305 current = rep; 3306 while (*current != 0) { /* non input consuming */ 3307 buf[len++] = *current++; 3308 if (len > buf_size - 10) { 3309 growBuffer(buf); 3310 } 3311 } 3312 xmlFree(rep); 3313 rep = NULL; 3314 } 3315 } else { 3316 if (len > buf_size - 10) { 3317 growBuffer(buf); 3318 } 3319 if (ent->content != NULL) 3320 buf[len++] = ent->content[0]; 3321 } 3322 } else if (ent != NULL) { 3323 int i = xmlStrlen(ent->name); 3324 const xmlChar *cur = ent->name; 3325 3326 /* 3327 * This may look absurd but is needed to detect 3328 * entities problems 3329 */ 3330 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3331 (ent->content != NULL)) { 3332 rep = xmlStringDecodeEntities(ctxt, ent->content, 3333 XML_SUBSTITUTE_REF, 0, 0, 0); 3334 if (rep != NULL) { 3335 xmlFree(rep); 3336 rep = NULL; 3337 } 3338 } 3339 3340 /* 3341 * Just output the reference 3342 */ 3343 buf[len++] = '&'; 3344 if (len > buf_size - i - 10) { 3345 growBuffer(buf); 3346 } 3347 for (;i > 0;i--) 3348 buf[len++] = *cur++; 3349 buf[len++] = ';'; 3350 } 3351 } 3352 } else { 3353 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3354 if ((len != 0) || (!normalize)) { 3355 if ((!normalize) || (!in_space)) { 3356 COPY_BUF(l,buf,len,0x20); 3357 if (len > buf_size - 10) { 3358 growBuffer(buf); 3359 } 3360 } 3361 in_space = 1; 3362 } 3363 } else { 3364 in_space = 0; 3365 COPY_BUF(l,buf,len,c); 3366 if (len > buf_size - 10) { 3367 growBuffer(buf); 3368 } 3369 } 3370 NEXTL(l); 3371 } 3372 GROW; 3373 c = CUR_CHAR(l); 3374 } 3375 if ((in_space) && (normalize)) { 3376 while (buf[len - 1] == 0x20) len--; 3377 } 3378 buf[len] = 0; 3379 if (RAW == '<') { 3380 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3381 } else if (RAW != limit) { 3382 if ((c != 0) && (!IS_CHAR(c))) { 3383 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3384 "invalid character in attribute value\n"); 3385 } else { 3386 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3387 "AttValue: ' expected\n"); 3388 } 3389 } else 3390 NEXT; 3391 if (attlen != NULL) *attlen = len; 3392 return(buf); 3393 3394mem_error: 3395 xmlErrMemory(ctxt, NULL); 3396 if (buf != NULL) 3397 xmlFree(buf); 3398 if (rep != NULL) 3399 xmlFree(rep); 3400 return(NULL); 3401} 3402 3403/** 3404 * xmlParseAttValue: 3405 * @ctxt: an XML parser context 3406 * 3407 * parse a value for an attribute 3408 * Note: the parser won't do substitution of entities here, this 3409 * will be handled later in xmlStringGetNodeList 3410 * 3411 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3412 * "'" ([^<&'] | Reference)* "'" 3413 * 3414 * 3.3.3 Attribute-Value Normalization: 3415 * Before the value of an attribute is passed to the application or 3416 * checked for validity, the XML processor must normalize it as follows: 3417 * - a character reference is processed by appending the referenced 3418 * character to the attribute value 3419 * - an entity reference is processed by recursively processing the 3420 * replacement text of the entity 3421 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3422 * appending #x20 to the normalized value, except that only a single 3423 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3424 * parsed entity or the literal entity value of an internal parsed entity 3425 * - other characters are processed by appending them to the normalized value 3426 * If the declared value is not CDATA, then the XML processor must further 3427 * process the normalized attribute value by discarding any leading and 3428 * trailing space (#x20) characters, and by replacing sequences of space 3429 * (#x20) characters by a single space (#x20) character. 3430 * All attributes for which no declaration has been read should be treated 3431 * by a non-validating parser as if declared CDATA. 3432 * 3433 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3434 */ 3435 3436 3437xmlChar * 3438xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3439 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3440 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3441} 3442 3443/** 3444 * xmlParseSystemLiteral: 3445 * @ctxt: an XML parser context 3446 * 3447 * parse an XML Literal 3448 * 3449 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3450 * 3451 * Returns the SystemLiteral parsed or NULL 3452 */ 3453 3454xmlChar * 3455xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3456 xmlChar *buf = NULL; 3457 int len = 0; 3458 int size = XML_PARSER_BUFFER_SIZE; 3459 int cur, l; 3460 xmlChar stop; 3461 int state = ctxt->instate; 3462 int count = 0; 3463 3464 SHRINK; 3465 if (RAW == '"') { 3466 NEXT; 3467 stop = '"'; 3468 } else if (RAW == '\'') { 3469 NEXT; 3470 stop = '\''; 3471 } else { 3472 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3473 return(NULL); 3474 } 3475 3476 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3477 if (buf == NULL) { 3478 xmlErrMemory(ctxt, NULL); 3479 return(NULL); 3480 } 3481 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3482 cur = CUR_CHAR(l); 3483 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3484 if (len + 5 >= size) { 3485 xmlChar *tmp; 3486 3487 size *= 2; 3488 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3489 if (tmp == NULL) { 3490 xmlFree(buf); 3491 xmlErrMemory(ctxt, NULL); 3492 ctxt->instate = (xmlParserInputState) state; 3493 return(NULL); 3494 } 3495 buf = tmp; 3496 } 3497 count++; 3498 if (count > 50) { 3499 GROW; 3500 count = 0; 3501 } 3502 COPY_BUF(l,buf,len,cur); 3503 NEXTL(l); 3504 cur = CUR_CHAR(l); 3505 if (cur == 0) { 3506 GROW; 3507 SHRINK; 3508 cur = CUR_CHAR(l); 3509 } 3510 } 3511 buf[len] = 0; 3512 ctxt->instate = (xmlParserInputState) state; 3513 if (!IS_CHAR(cur)) { 3514 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3515 } else { 3516 NEXT; 3517 } 3518 return(buf); 3519} 3520 3521/** 3522 * xmlParsePubidLiteral: 3523 * @ctxt: an XML parser context 3524 * 3525 * parse an XML public literal 3526 * 3527 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3528 * 3529 * Returns the PubidLiteral parsed or NULL. 3530 */ 3531 3532xmlChar * 3533xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3534 xmlChar *buf = NULL; 3535 int len = 0; 3536 int size = XML_PARSER_BUFFER_SIZE; 3537 xmlChar cur; 3538 xmlChar stop; 3539 int count = 0; 3540 xmlParserInputState oldstate = ctxt->instate; 3541 3542 SHRINK; 3543 if (RAW == '"') { 3544 NEXT; 3545 stop = '"'; 3546 } else if (RAW == '\'') { 3547 NEXT; 3548 stop = '\''; 3549 } else { 3550 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3551 return(NULL); 3552 } 3553 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3554 if (buf == NULL) { 3555 xmlErrMemory(ctxt, NULL); 3556 return(NULL); 3557 } 3558 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3559 cur = CUR; 3560 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3561 if (len + 1 >= size) { 3562 xmlChar *tmp; 3563 3564 size *= 2; 3565 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3566 if (tmp == NULL) { 3567 xmlErrMemory(ctxt, NULL); 3568 xmlFree(buf); 3569 return(NULL); 3570 } 3571 buf = tmp; 3572 } 3573 buf[len++] = cur; 3574 count++; 3575 if (count > 50) { 3576 GROW; 3577 count = 0; 3578 } 3579 NEXT; 3580 cur = CUR; 3581 if (cur == 0) { 3582 GROW; 3583 SHRINK; 3584 cur = CUR; 3585 } 3586 } 3587 buf[len] = 0; 3588 if (cur != stop) { 3589 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3590 } else { 3591 NEXT; 3592 } 3593 ctxt->instate = oldstate; 3594 return(buf); 3595} 3596 3597void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3598 3599/* 3600 * used for the test in the inner loop of the char data testing 3601 */ 3602static const unsigned char test_char_data[256] = { 3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3604 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3607 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 3608 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 3609 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 3610 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 3611 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 3612 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 3613 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 3614 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 3615 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 3616 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 3617 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 3618 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3632 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3633 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3634 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 3635}; 3636 3637/** 3638 * xmlParseCharData: 3639 * @ctxt: an XML parser context 3640 * @cdata: int indicating whether we are within a CDATA section 3641 * 3642 * parse a CharData section. 3643 * if we are within a CDATA section ']]>' marks an end of section. 3644 * 3645 * The right angle bracket (>) may be represented using the string ">", 3646 * and must, for compatibility, be escaped using ">" or a character 3647 * reference when it appears in the string "]]>" in content, when that 3648 * string is not marking the end of a CDATA section. 3649 * 3650 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3651 */ 3652 3653void 3654xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3655 const xmlChar *in; 3656 int nbchar = 0; 3657 int line = ctxt->input->line; 3658 int col = ctxt->input->col; 3659 int ccol; 3660 3661 SHRINK; 3662 GROW; 3663 /* 3664 * Accelerated common case where input don't need to be 3665 * modified before passing it to the handler. 3666 */ 3667 if (!cdata) { 3668 in = ctxt->input->cur; 3669 do { 3670get_more_space: 3671 while (*in == 0x20) { in++; ctxt->input->col++; } 3672 if (*in == 0xA) { 3673 do { 3674 ctxt->input->line++; ctxt->input->col = 1; 3675 in++; 3676 } while (*in == 0xA); 3677 goto get_more_space; 3678 } 3679 if (*in == '<') { 3680 nbchar = in - ctxt->input->cur; 3681 if (nbchar > 0) { 3682 const xmlChar *tmp = ctxt->input->cur; 3683 ctxt->input->cur = in; 3684 3685 if ((ctxt->sax != NULL) && 3686 (ctxt->sax->ignorableWhitespace != 3687 ctxt->sax->characters)) { 3688 if (areBlanks(ctxt, tmp, nbchar, 1)) { 3689 if (ctxt->sax->ignorableWhitespace != NULL) 3690 ctxt->sax->ignorableWhitespace(ctxt->userData, 3691 tmp, nbchar); 3692 } else { 3693 if (ctxt->sax->characters != NULL) 3694 ctxt->sax->characters(ctxt->userData, 3695 tmp, nbchar); 3696 if (*ctxt->space == -1) 3697 *ctxt->space = -2; 3698 } 3699 } else if ((ctxt->sax != NULL) && 3700 (ctxt->sax->characters != NULL)) { 3701 ctxt->sax->characters(ctxt->userData, 3702 tmp, nbchar); 3703 } 3704 } 3705 return; 3706 } 3707 3708get_more: 3709 ccol = ctxt->input->col; 3710 while (test_char_data[*in]) { 3711 in++; 3712 ccol++; 3713 } 3714 ctxt->input->col = ccol; 3715 if (*in == 0xA) { 3716 do { 3717 ctxt->input->line++; ctxt->input->col = 1; 3718 in++; 3719 } while (*in == 0xA); 3720 goto get_more; 3721 } 3722 if (*in == ']') { 3723 if ((in[1] == ']') && (in[2] == '>')) { 3724 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3725 ctxt->input->cur = in; 3726 return; 3727 } 3728 in++; 3729 ctxt->input->col++; 3730 goto get_more; 3731 } 3732 nbchar = in - ctxt->input->cur; 3733 if (nbchar > 0) { 3734 if ((ctxt->sax != NULL) && 3735 (ctxt->sax->ignorableWhitespace != 3736 ctxt->sax->characters) && 3737 (IS_BLANK_CH(*ctxt->input->cur))) { 3738 const xmlChar *tmp = ctxt->input->cur; 3739 ctxt->input->cur = in; 3740 3741 if (areBlanks(ctxt, tmp, nbchar, 0)) { 3742 if (ctxt->sax->ignorableWhitespace != NULL) 3743 ctxt->sax->ignorableWhitespace(ctxt->userData, 3744 tmp, nbchar); 3745 } else { 3746 if (ctxt->sax->characters != NULL) 3747 ctxt->sax->characters(ctxt->userData, 3748 tmp, nbchar); 3749 if (*ctxt->space == -1) 3750 *ctxt->space = -2; 3751 } 3752 line = ctxt->input->line; 3753 col = ctxt->input->col; 3754 } else if (ctxt->sax != NULL) { 3755 if (ctxt->sax->characters != NULL) 3756 ctxt->sax->characters(ctxt->userData, 3757 ctxt->input->cur, nbchar); 3758 line = ctxt->input->line; 3759 col = ctxt->input->col; 3760 } 3761 } 3762 ctxt->input->cur = in; 3763 if (*in == 0xD) { 3764 in++; 3765 if (*in == 0xA) { 3766 ctxt->input->cur = in; 3767 in++; 3768 ctxt->input->line++; ctxt->input->col = 1; 3769 continue; /* while */ 3770 } 3771 in--; 3772 } 3773 if (*in == '<') { 3774 return; 3775 } 3776 if (*in == '&') { 3777 return; 3778 } 3779 SHRINK; 3780 GROW; 3781 in = ctxt->input->cur; 3782 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3783 nbchar = 0; 3784 } 3785 ctxt->input->line = line; 3786 ctxt->input->col = col; 3787 xmlParseCharDataComplex(ctxt, cdata); 3788} 3789 3790/** 3791 * xmlParseCharDataComplex: 3792 * @ctxt: an XML parser context 3793 * @cdata: int indicating whether we are within a CDATA section 3794 * 3795 * parse a CharData section.this is the fallback function 3796 * of xmlParseCharData() when the parsing requires handling 3797 * of non-ASCII characters. 3798 */ 3799void 3800xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3801 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3802 int nbchar = 0; 3803 int cur, l; 3804 int count = 0; 3805 3806 SHRINK; 3807 GROW; 3808 cur = CUR_CHAR(l); 3809 while ((cur != '<') && /* checked */ 3810 (cur != '&') && 3811 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3812 if ((cur == ']') && (NXT(1) == ']') && 3813 (NXT(2) == '>')) { 3814 if (cdata) break; 3815 else { 3816 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3817 } 3818 } 3819 COPY_BUF(l,buf,nbchar,cur); 3820 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3821 buf[nbchar] = 0; 3822 3823 /* 3824 * OK the segment is to be consumed as chars. 3825 */ 3826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3827 if (areBlanks(ctxt, buf, nbchar, 0)) { 3828 if (ctxt->sax->ignorableWhitespace != NULL) 3829 ctxt->sax->ignorableWhitespace(ctxt->userData, 3830 buf, nbchar); 3831 } else { 3832 if (ctxt->sax->characters != NULL) 3833 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3834 if ((ctxt->sax->characters != 3835 ctxt->sax->ignorableWhitespace) && 3836 (*ctxt->space == -1)) 3837 *ctxt->space = -2; 3838 } 3839 } 3840 nbchar = 0; 3841 } 3842 count++; 3843 if (count > 50) { 3844 GROW; 3845 count = 0; 3846 } 3847 NEXTL(l); 3848 cur = CUR_CHAR(l); 3849 } 3850 if (nbchar != 0) { 3851 buf[nbchar] = 0; 3852 /* 3853 * OK the segment is to be consumed as chars. 3854 */ 3855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3856 if (areBlanks(ctxt, buf, nbchar, 0)) { 3857 if (ctxt->sax->ignorableWhitespace != NULL) 3858 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3859 } else { 3860 if (ctxt->sax->characters != NULL) 3861 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3862 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 3863 (*ctxt->space == -1)) 3864 *ctxt->space = -2; 3865 } 3866 } 3867 } 3868 if ((cur != 0) && (!IS_CHAR(cur))) { 3869 /* Generate the error and skip the offending character */ 3870 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 3871 "PCDATA invalid Char value %d\n", 3872 cur); 3873 NEXTL(l); 3874 } 3875} 3876 3877/** 3878 * xmlParseExternalID: 3879 * @ctxt: an XML parser context 3880 * @publicID: a xmlChar** receiving PubidLiteral 3881 * @strict: indicate whether we should restrict parsing to only 3882 * production [75], see NOTE below 3883 * 3884 * Parse an External ID or a Public ID 3885 * 3886 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3887 * 'PUBLIC' S PubidLiteral S SystemLiteral 3888 * 3889 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3890 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3891 * 3892 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3893 * 3894 * Returns the function returns SystemLiteral and in the second 3895 * case publicID receives PubidLiteral, is strict is off 3896 * it is possible to return NULL and have publicID set. 3897 */ 3898 3899xmlChar * 3900xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3901 xmlChar *URI = NULL; 3902 3903 SHRINK; 3904 3905 *publicID = NULL; 3906 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3907 SKIP(6); 3908 if (!IS_BLANK_CH(CUR)) { 3909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3910 "Space required after 'SYSTEM'\n"); 3911 } 3912 SKIP_BLANKS; 3913 URI = xmlParseSystemLiteral(ctxt); 3914 if (URI == NULL) { 3915 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3916 } 3917 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3918 SKIP(6); 3919 if (!IS_BLANK_CH(CUR)) { 3920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3921 "Space required after 'PUBLIC'\n"); 3922 } 3923 SKIP_BLANKS; 3924 *publicID = xmlParsePubidLiteral(ctxt); 3925 if (*publicID == NULL) { 3926 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3927 } 3928 if (strict) { 3929 /* 3930 * We don't handle [83] so "S SystemLiteral" is required. 3931 */ 3932 if (!IS_BLANK_CH(CUR)) { 3933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3934 "Space required after the Public Identifier\n"); 3935 } 3936 } else { 3937 /* 3938 * We handle [83] so we return immediately, if 3939 * "S SystemLiteral" is not detected. From a purely parsing 3940 * point of view that's a nice mess. 3941 */ 3942 const xmlChar *ptr; 3943 GROW; 3944 3945 ptr = CUR_PTR; 3946 if (!IS_BLANK_CH(*ptr)) return(NULL); 3947 3948 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3949 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3950 } 3951 SKIP_BLANKS; 3952 URI = xmlParseSystemLiteral(ctxt); 3953 if (URI == NULL) { 3954 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3955 } 3956 } 3957 return(URI); 3958} 3959 3960/** 3961 * xmlParseCommentComplex: 3962 * @ctxt: an XML parser context 3963 * @buf: the already parsed part of the buffer 3964 * @len: number of bytes filles in the buffer 3965 * @size: allocated size of the buffer 3966 * 3967 * Skip an XML (SGML) comment <!-- .... --> 3968 * The spec says that "For compatibility, the string "--" (double-hyphen) 3969 * must not occur within comments. " 3970 * This is the slow routine in case the accelerator for ascii didn't work 3971 * 3972 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3973 */ 3974static void 3975xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 3976 int q, ql; 3977 int r, rl; 3978 int cur, l; 3979 xmlParserInputPtr input = ctxt->input; 3980 int count = 0; 3981 3982 if (buf == NULL) { 3983 len = 0; 3984 size = XML_PARSER_BUFFER_SIZE; 3985 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3986 if (buf == NULL) { 3987 xmlErrMemory(ctxt, NULL); 3988 return; 3989 } 3990 } 3991 GROW; /* Assure there's enough input data */ 3992 q = CUR_CHAR(ql); 3993 if (q == 0) 3994 goto not_terminated; 3995 if (!IS_CHAR(q)) { 3996 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 3997 "xmlParseComment: invalid xmlChar value %d\n", 3998 q); 3999 xmlFree (buf); 4000 return; 4001 } 4002 NEXTL(ql); 4003 r = CUR_CHAR(rl); 4004 if (r == 0) 4005 goto not_terminated; 4006 if (!IS_CHAR(r)) { 4007 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4008 "xmlParseComment: invalid xmlChar value %d\n", 4009 q); 4010 xmlFree (buf); 4011 return; 4012 } 4013 NEXTL(rl); 4014 cur = CUR_CHAR(l); 4015 if (cur == 0) 4016 goto not_terminated; 4017 while (IS_CHAR(cur) && /* checked */ 4018 ((cur != '>') || 4019 (r != '-') || (q != '-'))) { 4020 if ((r == '-') && (q == '-')) { 4021 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4022 } 4023 if (len + 5 >= size) { 4024 xmlChar *new_buf; 4025 size *= 2; 4026 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4027 if (new_buf == NULL) { 4028 xmlFree (buf); 4029 xmlErrMemory(ctxt, NULL); 4030 return; 4031 } 4032 buf = new_buf; 4033 } 4034 COPY_BUF(ql,buf,len,q); 4035 q = r; 4036 ql = rl; 4037 r = cur; 4038 rl = l; 4039 4040 count++; 4041 if (count > 50) { 4042 GROW; 4043 count = 0; 4044 } 4045 NEXTL(l); 4046 cur = CUR_CHAR(l); 4047 if (cur == 0) { 4048 SHRINK; 4049 GROW; 4050 cur = CUR_CHAR(l); 4051 } 4052 } 4053 buf[len] = 0; 4054 if (cur == 0) { 4055 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4056 "Comment not terminated \n<!--%.50s\n", buf); 4057 } else if (!IS_CHAR(cur)) { 4058 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4059 "xmlParseComment: invalid xmlChar value %d\n", 4060 cur); 4061 } else { 4062 if (input != ctxt->input) { 4063 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4064 "Comment doesn't start and stop in the same entity\n"); 4065 } 4066 NEXT; 4067 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4068 (!ctxt->disableSAX)) 4069 ctxt->sax->comment(ctxt->userData, buf); 4070 } 4071 xmlFree(buf); 4072 return; 4073not_terminated: 4074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4075 "Comment not terminated\n", NULL); 4076 xmlFree(buf); 4077 return; 4078} 4079 4080/** 4081 * xmlParseComment: 4082 * @ctxt: an XML parser context 4083 * 4084 * Skip an XML (SGML) comment <!-- .... --> 4085 * The spec says that "For compatibility, the string "--" (double-hyphen) 4086 * must not occur within comments. " 4087 * 4088 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4089 */ 4090void 4091xmlParseComment(xmlParserCtxtPtr ctxt) { 4092 xmlChar *buf = NULL; 4093 int size = XML_PARSER_BUFFER_SIZE; 4094 int len = 0; 4095 xmlParserInputState state; 4096 const xmlChar *in; 4097 int nbchar = 0, ccol; 4098 4099 /* 4100 * Check that there is a comment right here. 4101 */ 4102 if ((RAW != '<') || (NXT(1) != '!') || 4103 (NXT(2) != '-') || (NXT(3) != '-')) return; 4104 4105 state = ctxt->instate; 4106 ctxt->instate = XML_PARSER_COMMENT; 4107 SKIP(4); 4108 SHRINK; 4109 GROW; 4110 4111 /* 4112 * Accelerated common case where input don't need to be 4113 * modified before passing it to the handler. 4114 */ 4115 in = ctxt->input->cur; 4116 do { 4117 if (*in == 0xA) { 4118 do { 4119 ctxt->input->line++; ctxt->input->col = 1; 4120 in++; 4121 } while (*in == 0xA); 4122 } 4123get_more: 4124 ccol = ctxt->input->col; 4125 while (((*in > '-') && (*in <= 0x7F)) || 4126 ((*in >= 0x20) && (*in < '-')) || 4127 (*in == 0x09)) { 4128 in++; 4129 ccol++; 4130 } 4131 ctxt->input->col = ccol; 4132 if (*in == 0xA) { 4133 do { 4134 ctxt->input->line++; ctxt->input->col = 1; 4135 in++; 4136 } while (*in == 0xA); 4137 goto get_more; 4138 } 4139 nbchar = in - ctxt->input->cur; 4140 /* 4141 * save current set of data 4142 */ 4143 if (nbchar > 0) { 4144 if ((ctxt->sax != NULL) && 4145 (ctxt->sax->comment != NULL)) { 4146 if (buf == NULL) { 4147 if ((*in == '-') && (in[1] == '-')) 4148 size = nbchar + 1; 4149 else 4150 size = XML_PARSER_BUFFER_SIZE + nbchar; 4151 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4152 if (buf == NULL) { 4153 xmlErrMemory(ctxt, NULL); 4154 ctxt->instate = state; 4155 return; 4156 } 4157 len = 0; 4158 } else if (len + nbchar + 1 >= size) { 4159 xmlChar *new_buf; 4160 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4161 new_buf = (xmlChar *) xmlRealloc(buf, 4162 size * sizeof(xmlChar)); 4163 if (new_buf == NULL) { 4164 xmlFree (buf); 4165 xmlErrMemory(ctxt, NULL); 4166 ctxt->instate = state; 4167 return; 4168 } 4169 buf = new_buf; 4170 } 4171 memcpy(&buf[len], ctxt->input->cur, nbchar); 4172 len += nbchar; 4173 buf[len] = 0; 4174 } 4175 } 4176 ctxt->input->cur = in; 4177 if (*in == 0xA) { 4178 in++; 4179 ctxt->input->line++; ctxt->input->col = 1; 4180 } 4181 if (*in == 0xD) { 4182 in++; 4183 if (*in == 0xA) { 4184 ctxt->input->cur = in; 4185 in++; 4186 ctxt->input->line++; ctxt->input->col = 1; 4187 continue; /* while */ 4188 } 4189 in--; 4190 } 4191 SHRINK; 4192 GROW; 4193 in = ctxt->input->cur; 4194 if (*in == '-') { 4195 if (in[1] == '-') { 4196 if (in[2] == '>') { 4197 SKIP(3); 4198 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4199 (!ctxt->disableSAX)) { 4200 if (buf != NULL) 4201 ctxt->sax->comment(ctxt->userData, buf); 4202 else 4203 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4204 } 4205 if (buf != NULL) 4206 xmlFree(buf); 4207 ctxt->instate = state; 4208 return; 4209 } 4210 if (buf != NULL) 4211 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4212 "Comment not terminated \n<!--%.50s\n", 4213 buf); 4214 else 4215 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4216 "Comment not terminated \n", NULL); 4217 in++; 4218 ctxt->input->col++; 4219 } 4220 in++; 4221 ctxt->input->col++; 4222 goto get_more; 4223 } 4224 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4225 xmlParseCommentComplex(ctxt, buf, len, size); 4226 ctxt->instate = state; 4227 return; 4228} 4229 4230 4231/** 4232 * xmlParsePITarget: 4233 * @ctxt: an XML parser context 4234 * 4235 * parse the name of a PI 4236 * 4237 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4238 * 4239 * Returns the PITarget name or NULL 4240 */ 4241 4242const xmlChar * 4243xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4244 const xmlChar *name; 4245 4246 name = xmlParseName(ctxt); 4247 if ((name != NULL) && 4248 ((name[0] == 'x') || (name[0] == 'X')) && 4249 ((name[1] == 'm') || (name[1] == 'M')) && 4250 ((name[2] == 'l') || (name[2] == 'L'))) { 4251 int i; 4252 if ((name[0] == 'x') && (name[1] == 'm') && 4253 (name[2] == 'l') && (name[3] == 0)) { 4254 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4255 "XML declaration allowed only at the start of the document\n"); 4256 return(name); 4257 } else if (name[3] == 0) { 4258 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4259 return(name); 4260 } 4261 for (i = 0;;i++) { 4262 if (xmlW3CPIs[i] == NULL) break; 4263 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4264 return(name); 4265 } 4266 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4267 "xmlParsePITarget: invalid name prefix 'xml'\n", 4268 NULL, NULL); 4269 } 4270 return(name); 4271} 4272 4273#ifdef LIBXML_CATALOG_ENABLED 4274/** 4275 * xmlParseCatalogPI: 4276 * @ctxt: an XML parser context 4277 * @catalog: the PI value string 4278 * 4279 * parse an XML Catalog Processing Instruction. 4280 * 4281 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4282 * 4283 * Occurs only if allowed by the user and if happening in the Misc 4284 * part of the document before any doctype informations 4285 * This will add the given catalog to the parsing context in order 4286 * to be used if there is a resolution need further down in the document 4287 */ 4288 4289static void 4290xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4291 xmlChar *URL = NULL; 4292 const xmlChar *tmp, *base; 4293 xmlChar marker; 4294 4295 tmp = catalog; 4296 while (IS_BLANK_CH(*tmp)) tmp++; 4297 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4298 goto error; 4299 tmp += 7; 4300 while (IS_BLANK_CH(*tmp)) tmp++; 4301 if (*tmp != '=') { 4302 return; 4303 } 4304 tmp++; 4305 while (IS_BLANK_CH(*tmp)) tmp++; 4306 marker = *tmp; 4307 if ((marker != '\'') && (marker != '"')) 4308 goto error; 4309 tmp++; 4310 base = tmp; 4311 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4312 if (*tmp == 0) 4313 goto error; 4314 URL = xmlStrndup(base, tmp - base); 4315 tmp++; 4316 while (IS_BLANK_CH(*tmp)) tmp++; 4317 if (*tmp != 0) 4318 goto error; 4319 4320 if (URL != NULL) { 4321 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4322 xmlFree(URL); 4323 } 4324 return; 4325 4326error: 4327 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4328 "Catalog PI syntax error: %s\n", 4329 catalog, NULL); 4330 if (URL != NULL) 4331 xmlFree(URL); 4332} 4333#endif 4334 4335/** 4336 * xmlParsePI: 4337 * @ctxt: an XML parser context 4338 * 4339 * parse an XML Processing Instruction. 4340 * 4341 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4342 * 4343 * The processing is transfered to SAX once parsed. 4344 */ 4345 4346void 4347xmlParsePI(xmlParserCtxtPtr ctxt) { 4348 xmlChar *buf = NULL; 4349 int len = 0; 4350 int size = XML_PARSER_BUFFER_SIZE; 4351 int cur, l; 4352 const xmlChar *target; 4353 xmlParserInputState state; 4354 int count = 0; 4355 4356 if ((RAW == '<') && (NXT(1) == '?')) { 4357 xmlParserInputPtr input = ctxt->input; 4358 state = ctxt->instate; 4359 ctxt->instate = XML_PARSER_PI; 4360 /* 4361 * this is a Processing Instruction. 4362 */ 4363 SKIP(2); 4364 SHRINK; 4365 4366 /* 4367 * Parse the target name and check for special support like 4368 * namespace. 4369 */ 4370 target = xmlParsePITarget(ctxt); 4371 if (target != NULL) { 4372 if ((RAW == '?') && (NXT(1) == '>')) { 4373 if (input != ctxt->input) { 4374 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4375 "PI declaration doesn't start and stop in the same entity\n"); 4376 } 4377 SKIP(2); 4378 4379 /* 4380 * SAX: PI detected. 4381 */ 4382 if ((ctxt->sax) && (!ctxt->disableSAX) && 4383 (ctxt->sax->processingInstruction != NULL)) 4384 ctxt->sax->processingInstruction(ctxt->userData, 4385 target, NULL); 4386 ctxt->instate = state; 4387 return; 4388 } 4389 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4390 if (buf == NULL) { 4391 xmlErrMemory(ctxt, NULL); 4392 ctxt->instate = state; 4393 return; 4394 } 4395 cur = CUR; 4396 if (!IS_BLANK(cur)) { 4397 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4398 "ParsePI: PI %s space expected\n", target); 4399 } 4400 SKIP_BLANKS; 4401 cur = CUR_CHAR(l); 4402 while (IS_CHAR(cur) && /* checked */ 4403 ((cur != '?') || (NXT(1) != '>'))) { 4404 if (len + 5 >= size) { 4405 xmlChar *tmp; 4406 4407 size *= 2; 4408 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4409 if (tmp == NULL) { 4410 xmlErrMemory(ctxt, NULL); 4411 xmlFree(buf); 4412 ctxt->instate = state; 4413 return; 4414 } 4415 buf = tmp; 4416 } 4417 count++; 4418 if (count > 50) { 4419 GROW; 4420 count = 0; 4421 } 4422 COPY_BUF(l,buf,len,cur); 4423 NEXTL(l); 4424 cur = CUR_CHAR(l); 4425 if (cur == 0) { 4426 SHRINK; 4427 GROW; 4428 cur = CUR_CHAR(l); 4429 } 4430 } 4431 buf[len] = 0; 4432 if (cur != '?') { 4433 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4434 "ParsePI: PI %s never end ...\n", target); 4435 } else { 4436 if (input != ctxt->input) { 4437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4438 "PI declaration doesn't start and stop in the same entity\n"); 4439 } 4440 SKIP(2); 4441 4442#ifdef LIBXML_CATALOG_ENABLED 4443 if (((state == XML_PARSER_MISC) || 4444 (state == XML_PARSER_START)) && 4445 (xmlStrEqual(target, XML_CATALOG_PI))) { 4446 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4447 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4448 (allow == XML_CATA_ALLOW_ALL)) 4449 xmlParseCatalogPI(ctxt, buf); 4450 } 4451#endif 4452 4453 4454 /* 4455 * SAX: PI detected. 4456 */ 4457 if ((ctxt->sax) && (!ctxt->disableSAX) && 4458 (ctxt->sax->processingInstruction != NULL)) 4459 ctxt->sax->processingInstruction(ctxt->userData, 4460 target, buf); 4461 } 4462 xmlFree(buf); 4463 } else { 4464 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4465 } 4466 ctxt->instate = state; 4467 } 4468} 4469 4470/** 4471 * xmlParseNotationDecl: 4472 * @ctxt: an XML parser context 4473 * 4474 * parse a notation declaration 4475 * 4476 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4477 * 4478 * Hence there is actually 3 choices: 4479 * 'PUBLIC' S PubidLiteral 4480 * 'PUBLIC' S PubidLiteral S SystemLiteral 4481 * and 'SYSTEM' S SystemLiteral 4482 * 4483 * See the NOTE on xmlParseExternalID(). 4484 */ 4485 4486void 4487xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4488 const xmlChar *name; 4489 xmlChar *Pubid; 4490 xmlChar *Systemid; 4491 4492 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4493 xmlParserInputPtr input = ctxt->input; 4494 SHRINK; 4495 SKIP(10); 4496 if (!IS_BLANK_CH(CUR)) { 4497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4498 "Space required after '<!NOTATION'\n"); 4499 return; 4500 } 4501 SKIP_BLANKS; 4502 4503 name = xmlParseName(ctxt); 4504 if (name == NULL) { 4505 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4506 return; 4507 } 4508 if (!IS_BLANK_CH(CUR)) { 4509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4510 "Space required after the NOTATION name'\n"); 4511 return; 4512 } 4513 SKIP_BLANKS; 4514 4515 /* 4516 * Parse the IDs. 4517 */ 4518 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4519 SKIP_BLANKS; 4520 4521 if (RAW == '>') { 4522 if (input != ctxt->input) { 4523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4524 "Notation declaration doesn't start and stop in the same entity\n"); 4525 } 4526 NEXT; 4527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4528 (ctxt->sax->notationDecl != NULL)) 4529 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4530 } else { 4531 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4532 } 4533 if (Systemid != NULL) xmlFree(Systemid); 4534 if (Pubid != NULL) xmlFree(Pubid); 4535 } 4536} 4537 4538/** 4539 * xmlParseEntityDecl: 4540 * @ctxt: an XML parser context 4541 * 4542 * parse <!ENTITY declarations 4543 * 4544 * [70] EntityDecl ::= GEDecl | PEDecl 4545 * 4546 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4547 * 4548 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4549 * 4550 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4551 * 4552 * [74] PEDef ::= EntityValue | ExternalID 4553 * 4554 * [76] NDataDecl ::= S 'NDATA' S Name 4555 * 4556 * [ VC: Notation Declared ] 4557 * The Name must match the declared name of a notation. 4558 */ 4559 4560void 4561xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4562 const xmlChar *name = NULL; 4563 xmlChar *value = NULL; 4564 xmlChar *URI = NULL, *literal = NULL; 4565 const xmlChar *ndata = NULL; 4566 int isParameter = 0; 4567 xmlChar *orig = NULL; 4568 int skipped; 4569 4570 /* GROW; done in the caller */ 4571 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 4572 xmlParserInputPtr input = ctxt->input; 4573 SHRINK; 4574 SKIP(8); 4575 skipped = SKIP_BLANKS; 4576 if (skipped == 0) { 4577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4578 "Space required after '<!ENTITY'\n"); 4579 } 4580 4581 if (RAW == '%') { 4582 NEXT; 4583 skipped = SKIP_BLANKS; 4584 if (skipped == 0) { 4585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4586 "Space required after '%'\n"); 4587 } 4588 isParameter = 1; 4589 } 4590 4591 name = xmlParseName(ctxt); 4592 if (name == NULL) { 4593 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4594 "xmlParseEntityDecl: no name\n"); 4595 return; 4596 } 4597 skipped = SKIP_BLANKS; 4598 if (skipped == 0) { 4599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4600 "Space required after the entity name\n"); 4601 } 4602 4603 ctxt->instate = XML_PARSER_ENTITY_DECL; 4604 /* 4605 * handle the various case of definitions... 4606 */ 4607 if (isParameter) { 4608 if ((RAW == '"') || (RAW == '\'')) { 4609 value = xmlParseEntityValue(ctxt, &orig); 4610 if (value) { 4611 if ((ctxt->sax != NULL) && 4612 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4613 ctxt->sax->entityDecl(ctxt->userData, name, 4614 XML_INTERNAL_PARAMETER_ENTITY, 4615 NULL, NULL, value); 4616 } 4617 } else { 4618 URI = xmlParseExternalID(ctxt, &literal, 1); 4619 if ((URI == NULL) && (literal == NULL)) { 4620 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4621 } 4622 if (URI) { 4623 xmlURIPtr uri; 4624 4625 uri = xmlParseURI((const char *) URI); 4626 if (uri == NULL) { 4627 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4628 "Invalid URI: %s\n", URI); 4629 /* 4630 * This really ought to be a well formedness error 4631 * but the XML Core WG decided otherwise c.f. issue 4632 * E26 of the XML erratas. 4633 */ 4634 } else { 4635 if (uri->fragment != NULL) { 4636 /* 4637 * Okay this is foolish to block those but not 4638 * invalid URIs. 4639 */ 4640 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4641 } else { 4642 if ((ctxt->sax != NULL) && 4643 (!ctxt->disableSAX) && 4644 (ctxt->sax->entityDecl != NULL)) 4645 ctxt->sax->entityDecl(ctxt->userData, name, 4646 XML_EXTERNAL_PARAMETER_ENTITY, 4647 literal, URI, NULL); 4648 } 4649 xmlFreeURI(uri); 4650 } 4651 } 4652 } 4653 } else { 4654 if ((RAW == '"') || (RAW == '\'')) { 4655 value = xmlParseEntityValue(ctxt, &orig); 4656 if ((ctxt->sax != NULL) && 4657 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4658 ctxt->sax->entityDecl(ctxt->userData, name, 4659 XML_INTERNAL_GENERAL_ENTITY, 4660 NULL, NULL, value); 4661 /* 4662 * For expat compatibility in SAX mode. 4663 */ 4664 if ((ctxt->myDoc == NULL) || 4665 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4666 if (ctxt->myDoc == NULL) { 4667 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4668 if (ctxt->myDoc == NULL) { 4669 xmlErrMemory(ctxt, "New Doc failed"); 4670 return; 4671 } 4672 } 4673 if (ctxt->myDoc->intSubset == NULL) 4674 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4675 BAD_CAST "fake", NULL, NULL); 4676 4677 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4678 NULL, NULL, value); 4679 } 4680 } else { 4681 URI = xmlParseExternalID(ctxt, &literal, 1); 4682 if ((URI == NULL) && (literal == NULL)) { 4683 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4684 } 4685 if (URI) { 4686 xmlURIPtr uri; 4687 4688 uri = xmlParseURI((const char *)URI); 4689 if (uri == NULL) { 4690 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4691 "Invalid URI: %s\n", URI); 4692 /* 4693 * This really ought to be a well formedness error 4694 * but the XML Core WG decided otherwise c.f. issue 4695 * E26 of the XML erratas. 4696 */ 4697 } else { 4698 if (uri->fragment != NULL) { 4699 /* 4700 * Okay this is foolish to block those but not 4701 * invalid URIs. 4702 */ 4703 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4704 } 4705 xmlFreeURI(uri); 4706 } 4707 } 4708 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 4709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4710 "Space required before 'NDATA'\n"); 4711 } 4712 SKIP_BLANKS; 4713 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 4714 SKIP(5); 4715 if (!IS_BLANK_CH(CUR)) { 4716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4717 "Space required after 'NDATA'\n"); 4718 } 4719 SKIP_BLANKS; 4720 ndata = xmlParseName(ctxt); 4721 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4722 (ctxt->sax->unparsedEntityDecl != NULL)) 4723 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4724 literal, URI, ndata); 4725 } else { 4726 if ((ctxt->sax != NULL) && 4727 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4728 ctxt->sax->entityDecl(ctxt->userData, name, 4729 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4730 literal, URI, NULL); 4731 /* 4732 * For expat compatibility in SAX mode. 4733 * assuming the entity repalcement was asked for 4734 */ 4735 if ((ctxt->replaceEntities != 0) && 4736 ((ctxt->myDoc == NULL) || 4737 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4738 if (ctxt->myDoc == NULL) { 4739 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4740 if (ctxt->myDoc == NULL) { 4741 xmlErrMemory(ctxt, "New Doc failed"); 4742 return; 4743 } 4744 } 4745 4746 if (ctxt->myDoc->intSubset == NULL) 4747 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4748 BAD_CAST "fake", NULL, NULL); 4749 xmlSAX2EntityDecl(ctxt, name, 4750 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4751 literal, URI, NULL); 4752 } 4753 } 4754 } 4755 } 4756 SKIP_BLANKS; 4757 if (RAW != '>') { 4758 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4759 "xmlParseEntityDecl: entity %s not terminated\n", name); 4760 } else { 4761 if (input != ctxt->input) { 4762 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4763 "Entity declaration doesn't start and stop in the same entity\n"); 4764 } 4765 NEXT; 4766 } 4767 if (orig != NULL) { 4768 /* 4769 * Ugly mechanism to save the raw entity value. 4770 */ 4771 xmlEntityPtr cur = NULL; 4772 4773 if (isParameter) { 4774 if ((ctxt->sax != NULL) && 4775 (ctxt->sax->getParameterEntity != NULL)) 4776 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4777 } else { 4778 if ((ctxt->sax != NULL) && 4779 (ctxt->sax->getEntity != NULL)) 4780 cur = ctxt->sax->getEntity(ctxt->userData, name); 4781 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4782 cur = xmlSAX2GetEntity(ctxt, name); 4783 } 4784 } 4785 if (cur != NULL) { 4786 if (cur->orig != NULL) 4787 xmlFree(orig); 4788 else 4789 cur->orig = orig; 4790 } else 4791 xmlFree(orig); 4792 } 4793 if (value != NULL) xmlFree(value); 4794 if (URI != NULL) xmlFree(URI); 4795 if (literal != NULL) xmlFree(literal); 4796 } 4797} 4798 4799/** 4800 * xmlParseDefaultDecl: 4801 * @ctxt: an XML parser context 4802 * @value: Receive a possible fixed default value for the attribute 4803 * 4804 * Parse an attribute default declaration 4805 * 4806 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4807 * 4808 * [ VC: Required Attribute ] 4809 * if the default declaration is the keyword #REQUIRED, then the 4810 * attribute must be specified for all elements of the type in the 4811 * attribute-list declaration. 4812 * 4813 * [ VC: Attribute Default Legal ] 4814 * The declared default value must meet the lexical constraints of 4815 * the declared attribute type c.f. xmlValidateAttributeDecl() 4816 * 4817 * [ VC: Fixed Attribute Default ] 4818 * if an attribute has a default value declared with the #FIXED 4819 * keyword, instances of that attribute must match the default value. 4820 * 4821 * [ WFC: No < in Attribute Values ] 4822 * handled in xmlParseAttValue() 4823 * 4824 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4825 * or XML_ATTRIBUTE_FIXED. 4826 */ 4827 4828int 4829xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4830 int val; 4831 xmlChar *ret; 4832 4833 *value = NULL; 4834 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4835 SKIP(9); 4836 return(XML_ATTRIBUTE_REQUIRED); 4837 } 4838 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4839 SKIP(8); 4840 return(XML_ATTRIBUTE_IMPLIED); 4841 } 4842 val = XML_ATTRIBUTE_NONE; 4843 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4844 SKIP(6); 4845 val = XML_ATTRIBUTE_FIXED; 4846 if (!IS_BLANK_CH(CUR)) { 4847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4848 "Space required after '#FIXED'\n"); 4849 } 4850 SKIP_BLANKS; 4851 } 4852 ret = xmlParseAttValue(ctxt); 4853 ctxt->instate = XML_PARSER_DTD; 4854 if (ret == NULL) { 4855 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4856 "Attribute default value declaration error\n"); 4857 } else 4858 *value = ret; 4859 return(val); 4860} 4861 4862/** 4863 * xmlParseNotationType: 4864 * @ctxt: an XML parser context 4865 * 4866 * parse an Notation attribute type. 4867 * 4868 * Note: the leading 'NOTATION' S part has already being parsed... 4869 * 4870 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4871 * 4872 * [ VC: Notation Attributes ] 4873 * Values of this type must match one of the notation names included 4874 * in the declaration; all notation names in the declaration must be declared. 4875 * 4876 * Returns: the notation attribute tree built while parsing 4877 */ 4878 4879xmlEnumerationPtr 4880xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4881 const xmlChar *name; 4882 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4883 4884 if (RAW != '(') { 4885 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4886 return(NULL); 4887 } 4888 SHRINK; 4889 do { 4890 NEXT; 4891 SKIP_BLANKS; 4892 name = xmlParseName(ctxt); 4893 if (name == NULL) { 4894 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4895 "Name expected in NOTATION declaration\n"); 4896 return(ret); 4897 } 4898 cur = xmlCreateEnumeration(name); 4899 if (cur == NULL) return(ret); 4900 if (last == NULL) ret = last = cur; 4901 else { 4902 last->next = cur; 4903 last = cur; 4904 } 4905 SKIP_BLANKS; 4906 } while (RAW == '|'); 4907 if (RAW != ')') { 4908 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4909 if ((last != NULL) && (last != ret)) 4910 xmlFreeEnumeration(last); 4911 return(ret); 4912 } 4913 NEXT; 4914 return(ret); 4915} 4916 4917/** 4918 * xmlParseEnumerationType: 4919 * @ctxt: an XML parser context 4920 * 4921 * parse an Enumeration attribute type. 4922 * 4923 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4924 * 4925 * [ VC: Enumeration ] 4926 * Values of this type must match one of the Nmtoken tokens in 4927 * the declaration 4928 * 4929 * Returns: the enumeration attribute tree built while parsing 4930 */ 4931 4932xmlEnumerationPtr 4933xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4934 xmlChar *name; 4935 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4936 4937 if (RAW != '(') { 4938 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4939 return(NULL); 4940 } 4941 SHRINK; 4942 do { 4943 NEXT; 4944 SKIP_BLANKS; 4945 name = xmlParseNmtoken(ctxt); 4946 if (name == NULL) { 4947 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4948 return(ret); 4949 } 4950 cur = xmlCreateEnumeration(name); 4951 xmlFree(name); 4952 if (cur == NULL) return(ret); 4953 if (last == NULL) ret = last = cur; 4954 else { 4955 last->next = cur; 4956 last = cur; 4957 } 4958 SKIP_BLANKS; 4959 } while (RAW == '|'); 4960 if (RAW != ')') { 4961 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4962 return(ret); 4963 } 4964 NEXT; 4965 return(ret); 4966} 4967 4968/** 4969 * xmlParseEnumeratedType: 4970 * @ctxt: an XML parser context 4971 * @tree: the enumeration tree built while parsing 4972 * 4973 * parse an Enumerated attribute type. 4974 * 4975 * [57] EnumeratedType ::= NotationType | Enumeration 4976 * 4977 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4978 * 4979 * 4980 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4981 */ 4982 4983int 4984xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4985 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4986 SKIP(8); 4987 if (!IS_BLANK_CH(CUR)) { 4988 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4989 "Space required after 'NOTATION'\n"); 4990 return(0); 4991 } 4992 SKIP_BLANKS; 4993 *tree = xmlParseNotationType(ctxt); 4994 if (*tree == NULL) return(0); 4995 return(XML_ATTRIBUTE_NOTATION); 4996 } 4997 *tree = xmlParseEnumerationType(ctxt); 4998 if (*tree == NULL) return(0); 4999 return(XML_ATTRIBUTE_ENUMERATION); 5000} 5001 5002/** 5003 * xmlParseAttributeType: 5004 * @ctxt: an XML parser context 5005 * @tree: the enumeration tree built while parsing 5006 * 5007 * parse the Attribute list def for an element 5008 * 5009 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5010 * 5011 * [55] StringType ::= 'CDATA' 5012 * 5013 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5014 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5015 * 5016 * Validity constraints for attribute values syntax are checked in 5017 * xmlValidateAttributeValue() 5018 * 5019 * [ VC: ID ] 5020 * Values of type ID must match the Name production. A name must not 5021 * appear more than once in an XML document as a value of this type; 5022 * i.e., ID values must uniquely identify the elements which bear them. 5023 * 5024 * [ VC: One ID per Element Type ] 5025 * No element type may have more than one ID attribute specified. 5026 * 5027 * [ VC: ID Attribute Default ] 5028 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5029 * 5030 * [ VC: IDREF ] 5031 * Values of type IDREF must match the Name production, and values 5032 * of type IDREFS must match Names; each IDREF Name must match the value 5033 * of an ID attribute on some element in the XML document; i.e. IDREF 5034 * values must match the value of some ID attribute. 5035 * 5036 * [ VC: Entity Name ] 5037 * Values of type ENTITY must match the Name production, values 5038 * of type ENTITIES must match Names; each Entity Name must match the 5039 * name of an unparsed entity declared in the DTD. 5040 * 5041 * [ VC: Name Token ] 5042 * Values of type NMTOKEN must match the Nmtoken production; values 5043 * of type NMTOKENS must match Nmtokens. 5044 * 5045 * Returns the attribute type 5046 */ 5047int 5048xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5049 SHRINK; 5050 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5051 SKIP(5); 5052 return(XML_ATTRIBUTE_CDATA); 5053 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5054 SKIP(6); 5055 return(XML_ATTRIBUTE_IDREFS); 5056 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5057 SKIP(5); 5058 return(XML_ATTRIBUTE_IDREF); 5059 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5060 SKIP(2); 5061 return(XML_ATTRIBUTE_ID); 5062 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5063 SKIP(6); 5064 return(XML_ATTRIBUTE_ENTITY); 5065 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5066 SKIP(8); 5067 return(XML_ATTRIBUTE_ENTITIES); 5068 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5069 SKIP(8); 5070 return(XML_ATTRIBUTE_NMTOKENS); 5071 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5072 SKIP(7); 5073 return(XML_ATTRIBUTE_NMTOKEN); 5074 } 5075 return(xmlParseEnumeratedType(ctxt, tree)); 5076} 5077 5078/** 5079 * xmlParseAttributeListDecl: 5080 * @ctxt: an XML parser context 5081 * 5082 * : parse the Attribute list def for an element 5083 * 5084 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5085 * 5086 * [53] AttDef ::= S Name S AttType S DefaultDecl 5087 * 5088 */ 5089void 5090xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5091 const xmlChar *elemName; 5092 const xmlChar *attrName; 5093 xmlEnumerationPtr tree; 5094 5095 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5096 xmlParserInputPtr input = ctxt->input; 5097 5098 SKIP(9); 5099 if (!IS_BLANK_CH(CUR)) { 5100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5101 "Space required after '<!ATTLIST'\n"); 5102 } 5103 SKIP_BLANKS; 5104 elemName = xmlParseName(ctxt); 5105 if (elemName == NULL) { 5106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5107 "ATTLIST: no name for Element\n"); 5108 return; 5109 } 5110 SKIP_BLANKS; 5111 GROW; 5112 while (RAW != '>') { 5113 const xmlChar *check = CUR_PTR; 5114 int type; 5115 int def; 5116 xmlChar *defaultValue = NULL; 5117 5118 GROW; 5119 tree = NULL; 5120 attrName = xmlParseName(ctxt); 5121 if (attrName == NULL) { 5122 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5123 "ATTLIST: no name for Attribute\n"); 5124 break; 5125 } 5126 GROW; 5127 if (!IS_BLANK_CH(CUR)) { 5128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5129 "Space required after the attribute name\n"); 5130 break; 5131 } 5132 SKIP_BLANKS; 5133 5134 type = xmlParseAttributeType(ctxt, &tree); 5135 if (type <= 0) { 5136 break; 5137 } 5138 5139 GROW; 5140 if (!IS_BLANK_CH(CUR)) { 5141 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5142 "Space required after the attribute type\n"); 5143 if (tree != NULL) 5144 xmlFreeEnumeration(tree); 5145 break; 5146 } 5147 SKIP_BLANKS; 5148 5149 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5150 if (def <= 0) { 5151 if (defaultValue != NULL) 5152 xmlFree(defaultValue); 5153 if (tree != NULL) 5154 xmlFreeEnumeration(tree); 5155 break; 5156 } 5157 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5158 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5159 5160 GROW; 5161 if (RAW != '>') { 5162 if (!IS_BLANK_CH(CUR)) { 5163 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5164 "Space required after the attribute default value\n"); 5165 if (defaultValue != NULL) 5166 xmlFree(defaultValue); 5167 if (tree != NULL) 5168 xmlFreeEnumeration(tree); 5169 break; 5170 } 5171 SKIP_BLANKS; 5172 } 5173 if (check == CUR_PTR) { 5174 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5175 "in xmlParseAttributeListDecl\n"); 5176 if (defaultValue != NULL) 5177 xmlFree(defaultValue); 5178 if (tree != NULL) 5179 xmlFreeEnumeration(tree); 5180 break; 5181 } 5182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5183 (ctxt->sax->attributeDecl != NULL)) 5184 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5185 type, def, defaultValue, tree); 5186 else if (tree != NULL) 5187 xmlFreeEnumeration(tree); 5188 5189 if ((ctxt->sax2) && (defaultValue != NULL) && 5190 (def != XML_ATTRIBUTE_IMPLIED) && 5191 (def != XML_ATTRIBUTE_REQUIRED)) { 5192 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5193 } 5194 if (ctxt->sax2) { 5195 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5196 } 5197 if (defaultValue != NULL) 5198 xmlFree(defaultValue); 5199 GROW; 5200 } 5201 if (RAW == '>') { 5202 if (input != ctxt->input) { 5203 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5204 "Attribute list declaration doesn't start and stop in the same entity\n"); 5205 } 5206 NEXT; 5207 } 5208 } 5209} 5210 5211/** 5212 * xmlParseElementMixedContentDecl: 5213 * @ctxt: an XML parser context 5214 * @inputchk: the input used for the current entity, needed for boundary checks 5215 * 5216 * parse the declaration for a Mixed Element content 5217 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5218 * 5219 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5220 * '(' S? '#PCDATA' S? ')' 5221 * 5222 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5223 * 5224 * [ VC: No Duplicate Types ] 5225 * The same name must not appear more than once in a single 5226 * mixed-content declaration. 5227 * 5228 * returns: the list of the xmlElementContentPtr describing the element choices 5229 */ 5230xmlElementContentPtr 5231xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5232 xmlElementContentPtr ret = NULL, cur = NULL, n; 5233 const xmlChar *elem = NULL; 5234 5235 GROW; 5236 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5237 SKIP(7); 5238 SKIP_BLANKS; 5239 SHRINK; 5240 if (RAW == ')') { 5241 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5242 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5243"Element content declaration doesn't start and stop in the same entity\n", 5244 NULL); 5245 } 5246 NEXT; 5247 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5248 if (ret == NULL) 5249 return(NULL); 5250 if (RAW == '*') { 5251 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5252 NEXT; 5253 } 5254 return(ret); 5255 } 5256 if ((RAW == '(') || (RAW == '|')) { 5257 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5258 if (ret == NULL) return(NULL); 5259 } 5260 while (RAW == '|') { 5261 NEXT; 5262 if (elem == NULL) { 5263 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5264 if (ret == NULL) return(NULL); 5265 ret->c1 = cur; 5266 if (cur != NULL) 5267 cur->parent = ret; 5268 cur = ret; 5269 } else { 5270 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5271 if (n == NULL) return(NULL); 5272 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5273 if (n->c1 != NULL) 5274 n->c1->parent = n; 5275 cur->c2 = n; 5276 if (n != NULL) 5277 n->parent = cur; 5278 cur = n; 5279 } 5280 SKIP_BLANKS; 5281 elem = xmlParseName(ctxt); 5282 if (elem == NULL) { 5283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5284 "xmlParseElementMixedContentDecl : Name expected\n"); 5285 xmlFreeDocElementContent(ctxt->myDoc, cur); 5286 return(NULL); 5287 } 5288 SKIP_BLANKS; 5289 GROW; 5290 } 5291 if ((RAW == ')') && (NXT(1) == '*')) { 5292 if (elem != NULL) { 5293 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5294 XML_ELEMENT_CONTENT_ELEMENT); 5295 if (cur->c2 != NULL) 5296 cur->c2->parent = cur; 5297 } 5298 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5299 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5300 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5301"Element content declaration doesn't start and stop in the same entity\n", 5302 NULL); 5303 } 5304 SKIP(2); 5305 } else { 5306 xmlFreeDocElementContent(ctxt->myDoc, ret); 5307 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5308 return(NULL); 5309 } 5310 5311 } else { 5312 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5313 } 5314 return(ret); 5315} 5316 5317/** 5318 * xmlParseElementChildrenContentDecl: 5319 * @ctxt: an XML parser context 5320 * @inputchk: the input used for the current entity, needed for boundary checks 5321 * 5322 * parse the declaration for a Mixed Element content 5323 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5324 * 5325 * 5326 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5327 * 5328 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5329 * 5330 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5331 * 5332 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5333 * 5334 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5335 * TODO Parameter-entity replacement text must be properly nested 5336 * with parenthesized groups. That is to say, if either of the 5337 * opening or closing parentheses in a choice, seq, or Mixed 5338 * construct is contained in the replacement text for a parameter 5339 * entity, both must be contained in the same replacement text. For 5340 * interoperability, if a parameter-entity reference appears in a 5341 * choice, seq, or Mixed construct, its replacement text should not 5342 * be empty, and neither the first nor last non-blank character of 5343 * the replacement text should be a connector (| or ,). 5344 * 5345 * Returns the tree of xmlElementContentPtr describing the element 5346 * hierarchy. 5347 */ 5348xmlElementContentPtr 5349xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 5350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5351 const xmlChar *elem; 5352 xmlChar type = 0; 5353 5354 SKIP_BLANKS; 5355 GROW; 5356 if (RAW == '(') { 5357 int inputid = ctxt->input->id; 5358 5359 /* Recurse on first child */ 5360 NEXT; 5361 SKIP_BLANKS; 5362 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 5363 SKIP_BLANKS; 5364 GROW; 5365 } else { 5366 elem = xmlParseName(ctxt); 5367 if (elem == NULL) { 5368 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5369 return(NULL); 5370 } 5371 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5372 if (cur == NULL) { 5373 xmlErrMemory(ctxt, NULL); 5374 return(NULL); 5375 } 5376 GROW; 5377 if (RAW == '?') { 5378 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5379 NEXT; 5380 } else if (RAW == '*') { 5381 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5382 NEXT; 5383 } else if (RAW == '+') { 5384 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5385 NEXT; 5386 } else { 5387 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5388 } 5389 GROW; 5390 } 5391 SKIP_BLANKS; 5392 SHRINK; 5393 while (RAW != ')') { 5394 /* 5395 * Each loop we parse one separator and one element. 5396 */ 5397 if (RAW == ',') { 5398 if (type == 0) type = CUR; 5399 5400 /* 5401 * Detect "Name | Name , Name" error 5402 */ 5403 else if (type != CUR) { 5404 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5405 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5406 type); 5407 if ((last != NULL) && (last != ret)) 5408 xmlFreeDocElementContent(ctxt->myDoc, last); 5409 if (ret != NULL) 5410 xmlFreeDocElementContent(ctxt->myDoc, ret); 5411 return(NULL); 5412 } 5413 NEXT; 5414 5415 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5416 if (op == NULL) { 5417 if ((last != NULL) && (last != ret)) 5418 xmlFreeDocElementContent(ctxt->myDoc, last); 5419 xmlFreeDocElementContent(ctxt->myDoc, ret); 5420 return(NULL); 5421 } 5422 if (last == NULL) { 5423 op->c1 = ret; 5424 if (ret != NULL) 5425 ret->parent = op; 5426 ret = cur = op; 5427 } else { 5428 cur->c2 = op; 5429 if (op != NULL) 5430 op->parent = cur; 5431 op->c1 = last; 5432 if (last != NULL) 5433 last->parent = op; 5434 cur =op; 5435 last = NULL; 5436 } 5437 } else if (RAW == '|') { 5438 if (type == 0) type = CUR; 5439 5440 /* 5441 * Detect "Name , Name | Name" error 5442 */ 5443 else if (type != CUR) { 5444 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5445 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5446 type); 5447 if ((last != NULL) && (last != ret)) 5448 xmlFreeDocElementContent(ctxt->myDoc, last); 5449 if (ret != NULL) 5450 xmlFreeDocElementContent(ctxt->myDoc, ret); 5451 return(NULL); 5452 } 5453 NEXT; 5454 5455 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5456 if (op == NULL) { 5457 if ((last != NULL) && (last != ret)) 5458 xmlFreeDocElementContent(ctxt->myDoc, last); 5459 if (ret != NULL) 5460 xmlFreeDocElementContent(ctxt->myDoc, ret); 5461 return(NULL); 5462 } 5463 if (last == NULL) { 5464 op->c1 = ret; 5465 if (ret != NULL) 5466 ret->parent = op; 5467 ret = cur = op; 5468 } else { 5469 cur->c2 = op; 5470 if (op != NULL) 5471 op->parent = cur; 5472 op->c1 = last; 5473 if (last != NULL) 5474 last->parent = op; 5475 cur =op; 5476 last = NULL; 5477 } 5478 } else { 5479 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5480 if ((last != NULL) && (last != ret)) 5481 xmlFreeDocElementContent(ctxt->myDoc, last); 5482 if (ret != NULL) 5483 xmlFreeDocElementContent(ctxt->myDoc, ret); 5484 return(NULL); 5485 } 5486 GROW; 5487 SKIP_BLANKS; 5488 GROW; 5489 if (RAW == '(') { 5490 int inputid = ctxt->input->id; 5491 /* Recurse on second child */ 5492 NEXT; 5493 SKIP_BLANKS; 5494 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5495 SKIP_BLANKS; 5496 } else { 5497 elem = xmlParseName(ctxt); 5498 if (elem == NULL) { 5499 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5500 if (ret != NULL) 5501 xmlFreeDocElementContent(ctxt->myDoc, ret); 5502 return(NULL); 5503 } 5504 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5505 if (last == NULL) { 5506 if (ret != NULL) 5507 xmlFreeDocElementContent(ctxt->myDoc, ret); 5508 return(NULL); 5509 } 5510 if (RAW == '?') { 5511 last->ocur = XML_ELEMENT_CONTENT_OPT; 5512 NEXT; 5513 } else if (RAW == '*') { 5514 last->ocur = XML_ELEMENT_CONTENT_MULT; 5515 NEXT; 5516 } else if (RAW == '+') { 5517 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5518 NEXT; 5519 } else { 5520 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5521 } 5522 } 5523 SKIP_BLANKS; 5524 GROW; 5525 } 5526 if ((cur != NULL) && (last != NULL)) { 5527 cur->c2 = last; 5528 if (last != NULL) 5529 last->parent = cur; 5530 } 5531 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5532 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5533"Element content declaration doesn't start and stop in the same entity\n", 5534 NULL); 5535 } 5536 NEXT; 5537 if (RAW == '?') { 5538 if (ret != NULL) { 5539 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 5540 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5541 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5542 else 5543 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5544 } 5545 NEXT; 5546 } else if (RAW == '*') { 5547 if (ret != NULL) { 5548 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5549 cur = ret; 5550 /* 5551 * Some normalization: 5552 * (a | b* | c?)* == (a | b | c)* 5553 */ 5554 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5555 if ((cur->c1 != NULL) && 5556 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5557 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5558 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5559 if ((cur->c2 != NULL) && 5560 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5561 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5562 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5563 cur = cur->c2; 5564 } 5565 } 5566 NEXT; 5567 } else if (RAW == '+') { 5568 if (ret != NULL) { 5569 int found = 0; 5570 5571 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 5572 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5573 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5574 else 5575 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 5576 /* 5577 * Some normalization: 5578 * (a | b*)+ == (a | b)* 5579 * (a | b?)+ == (a | b)* 5580 */ 5581 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5582 if ((cur->c1 != NULL) && 5583 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5584 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 5585 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5586 found = 1; 5587 } 5588 if ((cur->c2 != NULL) && 5589 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5590 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 5591 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5592 found = 1; 5593 } 5594 cur = cur->c2; 5595 } 5596 if (found) 5597 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5598 } 5599 NEXT; 5600 } 5601 return(ret); 5602} 5603 5604/** 5605 * xmlParseElementContentDecl: 5606 * @ctxt: an XML parser context 5607 * @name: the name of the element being defined. 5608 * @result: the Element Content pointer will be stored here if any 5609 * 5610 * parse the declaration for an Element content either Mixed or Children, 5611 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 5612 * 5613 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 5614 * 5615 * returns: the type of element content XML_ELEMENT_TYPE_xxx 5616 */ 5617 5618int 5619xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 5620 xmlElementContentPtr *result) { 5621 5622 xmlElementContentPtr tree = NULL; 5623 int inputid = ctxt->input->id; 5624 int res; 5625 5626 *result = NULL; 5627 5628 if (RAW != '(') { 5629 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5630 "xmlParseElementContentDecl : %s '(' expected\n", name); 5631 return(-1); 5632 } 5633 NEXT; 5634 GROW; 5635 SKIP_BLANKS; 5636 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5637 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 5638 res = XML_ELEMENT_TYPE_MIXED; 5639 } else { 5640 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 5641 res = XML_ELEMENT_TYPE_ELEMENT; 5642 } 5643 SKIP_BLANKS; 5644 *result = tree; 5645 return(res); 5646} 5647 5648/** 5649 * xmlParseElementDecl: 5650 * @ctxt: an XML parser context 5651 * 5652 * parse an Element declaration. 5653 * 5654 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 5655 * 5656 * [ VC: Unique Element Type Declaration ] 5657 * No element type may be declared more than once 5658 * 5659 * Returns the type of the element, or -1 in case of error 5660 */ 5661int 5662xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 5663 const xmlChar *name; 5664 int ret = -1; 5665 xmlElementContentPtr content = NULL; 5666 5667 /* GROW; done in the caller */ 5668 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 5669 xmlParserInputPtr input = ctxt->input; 5670 5671 SKIP(9); 5672 if (!IS_BLANK_CH(CUR)) { 5673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5674 "Space required after 'ELEMENT'\n"); 5675 } 5676 SKIP_BLANKS; 5677 name = xmlParseName(ctxt); 5678 if (name == NULL) { 5679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5680 "xmlParseElementDecl: no name for Element\n"); 5681 return(-1); 5682 } 5683 while ((RAW == 0) && (ctxt->inputNr > 1)) 5684 xmlPopInput(ctxt); 5685 if (!IS_BLANK_CH(CUR)) { 5686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5687 "Space required after the element name\n"); 5688 } 5689 SKIP_BLANKS; 5690 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 5691 SKIP(5); 5692 /* 5693 * Element must always be empty. 5694 */ 5695 ret = XML_ELEMENT_TYPE_EMPTY; 5696 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5697 (NXT(2) == 'Y')) { 5698 SKIP(3); 5699 /* 5700 * Element is a generic container. 5701 */ 5702 ret = XML_ELEMENT_TYPE_ANY; 5703 } else if (RAW == '(') { 5704 ret = xmlParseElementContentDecl(ctxt, name, &content); 5705 } else { 5706 /* 5707 * [ WFC: PEs in Internal Subset ] error handling. 5708 */ 5709 if ((RAW == '%') && (ctxt->external == 0) && 5710 (ctxt->inputNr == 1)) { 5711 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 5712 "PEReference: forbidden within markup decl in internal subset\n"); 5713 } else { 5714 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5715 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5716 } 5717 return(-1); 5718 } 5719 5720 SKIP_BLANKS; 5721 /* 5722 * Pop-up of finished entities. 5723 */ 5724 while ((RAW == 0) && (ctxt->inputNr > 1)) 5725 xmlPopInput(ctxt); 5726 SKIP_BLANKS; 5727 5728 if (RAW != '>') { 5729 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5730 if (content != NULL) { 5731 xmlFreeDocElementContent(ctxt->myDoc, content); 5732 } 5733 } else { 5734 if (input != ctxt->input) { 5735 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5736 "Element declaration doesn't start and stop in the same entity\n"); 5737 } 5738 5739 NEXT; 5740 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5741 (ctxt->sax->elementDecl != NULL)) { 5742 if (content != NULL) 5743 content->parent = NULL; 5744 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5745 content); 5746 if ((content != NULL) && (content->parent == NULL)) { 5747 /* 5748 * this is a trick: if xmlAddElementDecl is called, 5749 * instead of copying the full tree it is plugged directly 5750 * if called from the parser. Avoid duplicating the 5751 * interfaces or change the API/ABI 5752 */ 5753 xmlFreeDocElementContent(ctxt->myDoc, content); 5754 } 5755 } else if (content != NULL) { 5756 xmlFreeDocElementContent(ctxt->myDoc, content); 5757 } 5758 } 5759 } 5760 return(ret); 5761} 5762 5763/** 5764 * xmlParseConditionalSections 5765 * @ctxt: an XML parser context 5766 * 5767 * [61] conditionalSect ::= includeSect | ignoreSect 5768 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5769 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5770 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5771 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5772 */ 5773 5774static void 5775xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5776 SKIP(3); 5777 SKIP_BLANKS; 5778 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 5779 SKIP(7); 5780 SKIP_BLANKS; 5781 if (RAW != '[') { 5782 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5783 } else { 5784 NEXT; 5785 } 5786 if (xmlParserDebugEntities) { 5787 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5788 xmlGenericError(xmlGenericErrorContext, 5789 "%s(%d): ", ctxt->input->filename, 5790 ctxt->input->line); 5791 xmlGenericError(xmlGenericErrorContext, 5792 "Entering INCLUDE Conditional Section\n"); 5793 } 5794 5795 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5796 (NXT(2) != '>'))) { 5797 const xmlChar *check = CUR_PTR; 5798 unsigned int cons = ctxt->input->consumed; 5799 5800 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5801 xmlParseConditionalSections(ctxt); 5802 } else if (IS_BLANK_CH(CUR)) { 5803 NEXT; 5804 } else if (RAW == '%') { 5805 xmlParsePEReference(ctxt); 5806 } else 5807 xmlParseMarkupDecl(ctxt); 5808 5809 /* 5810 * Pop-up of finished entities. 5811 */ 5812 while ((RAW == 0) && (ctxt->inputNr > 1)) 5813 xmlPopInput(ctxt); 5814 5815 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5816 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5817 break; 5818 } 5819 } 5820 if (xmlParserDebugEntities) { 5821 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5822 xmlGenericError(xmlGenericErrorContext, 5823 "%s(%d): ", ctxt->input->filename, 5824 ctxt->input->line); 5825 xmlGenericError(xmlGenericErrorContext, 5826 "Leaving INCLUDE Conditional Section\n"); 5827 } 5828 5829 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5830 int state; 5831 xmlParserInputState instate; 5832 int depth = 0; 5833 5834 SKIP(6); 5835 SKIP_BLANKS; 5836 if (RAW != '[') { 5837 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5838 } else { 5839 NEXT; 5840 } 5841 if (xmlParserDebugEntities) { 5842 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5843 xmlGenericError(xmlGenericErrorContext, 5844 "%s(%d): ", ctxt->input->filename, 5845 ctxt->input->line); 5846 xmlGenericError(xmlGenericErrorContext, 5847 "Entering IGNORE Conditional Section\n"); 5848 } 5849 5850 /* 5851 * Parse up to the end of the conditional section 5852 * But disable SAX event generating DTD building in the meantime 5853 */ 5854 state = ctxt->disableSAX; 5855 instate = ctxt->instate; 5856 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5857 ctxt->instate = XML_PARSER_IGNORE; 5858 5859 while ((depth >= 0) && (RAW != 0)) { 5860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5861 depth++; 5862 SKIP(3); 5863 continue; 5864 } 5865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5866 if (--depth >= 0) SKIP(3); 5867 continue; 5868 } 5869 NEXT; 5870 continue; 5871 } 5872 5873 ctxt->disableSAX = state; 5874 ctxt->instate = instate; 5875 5876 if (xmlParserDebugEntities) { 5877 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5878 xmlGenericError(xmlGenericErrorContext, 5879 "%s(%d): ", ctxt->input->filename, 5880 ctxt->input->line); 5881 xmlGenericError(xmlGenericErrorContext, 5882 "Leaving IGNORE Conditional Section\n"); 5883 } 5884 5885 } else { 5886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5887 } 5888 5889 if (RAW == 0) 5890 SHRINK; 5891 5892 if (RAW == 0) { 5893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5894 } else { 5895 SKIP(3); 5896 } 5897} 5898 5899/** 5900 * xmlParseMarkupDecl: 5901 * @ctxt: an XML parser context 5902 * 5903 * parse Markup declarations 5904 * 5905 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5906 * NotationDecl | PI | Comment 5907 * 5908 * [ VC: Proper Declaration/PE Nesting ] 5909 * Parameter-entity replacement text must be properly nested with 5910 * markup declarations. That is to say, if either the first character 5911 * or the last character of a markup declaration (markupdecl above) is 5912 * contained in the replacement text for a parameter-entity reference, 5913 * both must be contained in the same replacement text. 5914 * 5915 * [ WFC: PEs in Internal Subset ] 5916 * In the internal DTD subset, parameter-entity references can occur 5917 * only where markup declarations can occur, not within markup declarations. 5918 * (This does not apply to references that occur in external parameter 5919 * entities or to the external subset.) 5920 */ 5921void 5922xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5923 GROW; 5924 if (CUR == '<') { 5925 if (NXT(1) == '!') { 5926 switch (NXT(2)) { 5927 case 'E': 5928 if (NXT(3) == 'L') 5929 xmlParseElementDecl(ctxt); 5930 else if (NXT(3) == 'N') 5931 xmlParseEntityDecl(ctxt); 5932 break; 5933 case 'A': 5934 xmlParseAttributeListDecl(ctxt); 5935 break; 5936 case 'N': 5937 xmlParseNotationDecl(ctxt); 5938 break; 5939 case '-': 5940 xmlParseComment(ctxt); 5941 break; 5942 default: 5943 /* there is an error but it will be detected later */ 5944 break; 5945 } 5946 } else if (NXT(1) == '?') { 5947 xmlParsePI(ctxt); 5948 } 5949 } 5950 /* 5951 * This is only for internal subset. On external entities, 5952 * the replacement is done before parsing stage 5953 */ 5954 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5955 xmlParsePEReference(ctxt); 5956 5957 /* 5958 * Conditional sections are allowed from entities included 5959 * by PE References in the internal subset. 5960 */ 5961 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5962 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5963 xmlParseConditionalSections(ctxt); 5964 } 5965 } 5966 5967 ctxt->instate = XML_PARSER_DTD; 5968} 5969 5970/** 5971 * xmlParseTextDecl: 5972 * @ctxt: an XML parser context 5973 * 5974 * parse an XML declaration header for external entities 5975 * 5976 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5977 * 5978 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5979 */ 5980 5981void 5982xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5983 xmlChar *version; 5984 const xmlChar *encoding; 5985 5986 /* 5987 * We know that '<?xml' is here. 5988 */ 5989 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5990 SKIP(5); 5991 } else { 5992 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5993 return; 5994 } 5995 5996 if (!IS_BLANK_CH(CUR)) { 5997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5998 "Space needed after '<?xml'\n"); 5999 } 6000 SKIP_BLANKS; 6001 6002 /* 6003 * We may have the VersionInfo here. 6004 */ 6005 version = xmlParseVersionInfo(ctxt); 6006 if (version == NULL) 6007 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6008 else { 6009 if (!IS_BLANK_CH(CUR)) { 6010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6011 "Space needed here\n"); 6012 } 6013 } 6014 ctxt->input->version = version; 6015 6016 /* 6017 * We must have the encoding declaration 6018 */ 6019 encoding = xmlParseEncodingDecl(ctxt); 6020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6021 /* 6022 * The XML REC instructs us to stop parsing right here 6023 */ 6024 return; 6025 } 6026 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6027 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6028 "Missing encoding in text declaration\n"); 6029 } 6030 6031 SKIP_BLANKS; 6032 if ((RAW == '?') && (NXT(1) == '>')) { 6033 SKIP(2); 6034 } else if (RAW == '>') { 6035 /* Deprecated old WD ... */ 6036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6037 NEXT; 6038 } else { 6039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6040 MOVETO_ENDTAG(CUR_PTR); 6041 NEXT; 6042 } 6043} 6044 6045/** 6046 * xmlParseExternalSubset: 6047 * @ctxt: an XML parser context 6048 * @ExternalID: the external identifier 6049 * @SystemID: the system identifier (or URL) 6050 * 6051 * parse Markup declarations from an external subset 6052 * 6053 * [30] extSubset ::= textDecl? extSubsetDecl 6054 * 6055 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6056 */ 6057void 6058xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6059 const xmlChar *SystemID) { 6060 xmlDetectSAX2(ctxt); 6061 GROW; 6062 6063 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 6064 (ctxt->input->end - ctxt->input->cur >= 4)) { 6065 xmlChar start[4]; 6066 xmlCharEncoding enc; 6067 6068 start[0] = RAW; 6069 start[1] = NXT(1); 6070 start[2] = NXT(2); 6071 start[3] = NXT(3); 6072 enc = xmlDetectCharEncoding(start, 4); 6073 if (enc != XML_CHAR_ENCODING_NONE) 6074 xmlSwitchEncoding(ctxt, enc); 6075 } 6076 6077 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6078 xmlParseTextDecl(ctxt); 6079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6080 /* 6081 * The XML REC instructs us to stop parsing right here 6082 */ 6083 ctxt->instate = XML_PARSER_EOF; 6084 return; 6085 } 6086 } 6087 if (ctxt->myDoc == NULL) { 6088 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6089 } 6090 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6091 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6092 6093 ctxt->instate = XML_PARSER_DTD; 6094 ctxt->external = 1; 6095 while (((RAW == '<') && (NXT(1) == '?')) || 6096 ((RAW == '<') && (NXT(1) == '!')) || 6097 (RAW == '%') || IS_BLANK_CH(CUR)) { 6098 const xmlChar *check = CUR_PTR; 6099 unsigned int cons = ctxt->input->consumed; 6100 6101 GROW; 6102 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6103 xmlParseConditionalSections(ctxt); 6104 } else if (IS_BLANK_CH(CUR)) { 6105 NEXT; 6106 } else if (RAW == '%') { 6107 xmlParsePEReference(ctxt); 6108 } else 6109 xmlParseMarkupDecl(ctxt); 6110 6111 /* 6112 * Pop-up of finished entities. 6113 */ 6114 while ((RAW == 0) && (ctxt->inputNr > 1)) 6115 xmlPopInput(ctxt); 6116 6117 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6118 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6119 break; 6120 } 6121 } 6122 6123 if (RAW != 0) { 6124 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6125 } 6126 6127} 6128 6129/** 6130 * xmlParseReference: 6131 * @ctxt: an XML parser context 6132 * 6133 * parse and handle entity references in content, depending on the SAX 6134 * interface, this may end-up in a call to character() if this is a 6135 * CharRef, a predefined entity, if there is no reference() callback. 6136 * or if the parser was asked to switch to that mode. 6137 * 6138 * [67] Reference ::= EntityRef | CharRef 6139 */ 6140void 6141xmlParseReference(xmlParserCtxtPtr ctxt) { 6142 xmlEntityPtr ent; 6143 xmlChar *val; 6144 if (RAW != '&') return; 6145 6146 if (NXT(1) == '#') { 6147 int i = 0; 6148 xmlChar out[10]; 6149 int hex = NXT(2); 6150 int value = xmlParseCharRef(ctxt); 6151 6152 if (value == 0) 6153 return; 6154 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6155 /* 6156 * So we are using non-UTF-8 buffers 6157 * Check that the char fit on 8bits, if not 6158 * generate a CharRef. 6159 */ 6160 if (value <= 0xFF) { 6161 out[0] = value; 6162 out[1] = 0; 6163 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6164 (!ctxt->disableSAX)) 6165 ctxt->sax->characters(ctxt->userData, out, 1); 6166 } else { 6167 if ((hex == 'x') || (hex == 'X')) 6168 snprintf((char *)out, sizeof(out), "#x%X", value); 6169 else 6170 snprintf((char *)out, sizeof(out), "#%d", value); 6171 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6172 (!ctxt->disableSAX)) 6173 ctxt->sax->reference(ctxt->userData, out); 6174 } 6175 } else { 6176 /* 6177 * Just encode the value in UTF-8 6178 */ 6179 COPY_BUF(0 ,out, i, value); 6180 out[i] = 0; 6181 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6182 (!ctxt->disableSAX)) 6183 ctxt->sax->characters(ctxt->userData, out, i); 6184 } 6185 } else { 6186 int was_checked; 6187 6188 ent = xmlParseEntityRef(ctxt); 6189 if (ent == NULL) return; 6190 if (!ctxt->wellFormed) 6191 return; 6192 was_checked = ent->checked; 6193 if ((ent->name != NULL) && 6194 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 6195 xmlNodePtr list = NULL; 6196 xmlParserErrors ret = XML_ERR_OK; 6197 6198 6199 /* 6200 * The first reference to the entity trigger a parsing phase 6201 * where the ent->children is filled with the result from 6202 * the parsing. 6203 */ 6204 if (ent->checked == 0) { 6205 xmlChar *value; 6206 6207 value = ent->content; 6208 6209 /* 6210 * Check that this entity is well formed 6211 */ 6212 if ((value != NULL) && (value[0] != 0) && 6213 (value[1] == 0) && (value[0] == '<') && 6214 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 6215 /* 6216 * DONE: get definite answer on this !!! 6217 * Lots of entity decls are used to declare a single 6218 * char 6219 * <!ENTITY lt "<"> 6220 * Which seems to be valid since 6221 * 2.4: The ampersand character (&) and the left angle 6222 * bracket (<) may appear in their literal form only 6223 * when used ... They are also legal within the literal 6224 * entity value of an internal entity declaration;i 6225 * see "4.3.2 Well-Formed Parsed Entities". 6226 * IMHO 2.4 and 4.3.2 are directly in contradiction. 6227 * Looking at the OASIS test suite and James Clark 6228 * tests, this is broken. However the XML REC uses 6229 * it. Is the XML REC not well-formed ???? 6230 * This is a hack to avoid this problem 6231 * 6232 * ANSWER: since lt gt amp .. are already defined, 6233 * this is a redefinition and hence the fact that the 6234 * content is not well balanced is not a Wf error, this 6235 * is lousy but acceptable. 6236 */ 6237 list = xmlNewDocText(ctxt->myDoc, value); 6238 if (list != NULL) { 6239 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 6240 (ent->children == NULL)) { 6241 ent->children = list; 6242 ent->last = list; 6243 ent->owner = 1; 6244 list->parent = (xmlNodePtr) ent; 6245 } else { 6246 xmlFreeNodeList(list); 6247 } 6248 } else if (list != NULL) { 6249 xmlFreeNodeList(list); 6250 } 6251 } else { 6252 /* 6253 * 4.3.2: An internal general parsed entity is well-formed 6254 * if its replacement text matches the production labeled 6255 * content. 6256 */ 6257 6258 void *user_data; 6259 /* 6260 * This is a bit hackish but this seems the best 6261 * way to make sure both SAX and DOM entity support 6262 * behaves okay. 6263 */ 6264 if (ctxt->userData == ctxt) 6265 user_data = NULL; 6266 else 6267 user_data = ctxt->userData; 6268 6269 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6270 ctxt->depth++; 6271 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6272 value, user_data, &list); 6273 ctxt->depth--; 6274 } else if (ent->etype == 6275 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6276 ctxt->depth++; 6277 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6278 ctxt->sax, user_data, ctxt->depth, 6279 ent->URI, ent->ExternalID, &list); 6280 ctxt->depth--; 6281 } else { 6282 ret = XML_ERR_ENTITY_PE_INTERNAL; 6283 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6284 "invalid entity type found\n", NULL); 6285 } 6286 if (ret == XML_ERR_ENTITY_LOOP) { 6287 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6288 return; 6289 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 6290 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6291 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6292 (ent->children == NULL)) { 6293 ent->children = list; 6294 if (ctxt->replaceEntities) { 6295 /* 6296 * Prune it directly in the generated document 6297 * except for single text nodes. 6298 */ 6299 if (((list->type == XML_TEXT_NODE) && 6300 (list->next == NULL)) || 6301 (ctxt->parseMode == XML_PARSE_READER)) { 6302 list->parent = (xmlNodePtr) ent; 6303 list = NULL; 6304 ent->owner = 1; 6305 } else { 6306 ent->owner = 0; 6307 while (list != NULL) { 6308 list->parent = (xmlNodePtr) ctxt->node; 6309 list->doc = ctxt->myDoc; 6310 if (list->next == NULL) 6311 ent->last = list; 6312 list = list->next; 6313 } 6314 list = ent->children; 6315#ifdef LIBXML_LEGACY_ENABLED 6316 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6317 xmlAddEntityReference(ent, list, NULL); 6318#endif /* LIBXML_LEGACY_ENABLED */ 6319 } 6320 } else { 6321 ent->owner = 1; 6322 while (list != NULL) { 6323 list->parent = (xmlNodePtr) ent; 6324 if (list->next == NULL) 6325 ent->last = list; 6326 list = list->next; 6327 } 6328 } 6329 } else { 6330 xmlFreeNodeList(list); 6331 list = NULL; 6332 } 6333 } else if ((ret != XML_ERR_OK) && 6334 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6335 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6336 "Entity '%s' failed to parse\n", ent->name); 6337 } else if (list != NULL) { 6338 xmlFreeNodeList(list); 6339 list = NULL; 6340 } 6341 } 6342 ent->checked = 1; 6343 } 6344 6345 if (ent->children == NULL) { 6346 /* 6347 * Probably running in SAX mode and the callbacks don't 6348 * build the entity content. So unless we already went 6349 * though parsing for first checking go though the entity 6350 * content to generate callbacks associated to the entity 6351 */ 6352 if (was_checked == 1) { 6353 void *user_data; 6354 /* 6355 * This is a bit hackish but this seems the best 6356 * way to make sure both SAX and DOM entity support 6357 * behaves okay. 6358 */ 6359 if (ctxt->userData == ctxt) 6360 user_data = NULL; 6361 else 6362 user_data = ctxt->userData; 6363 6364 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6365 ctxt->depth++; 6366 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6367 ent->content, user_data, NULL); 6368 ctxt->depth--; 6369 } else if (ent->etype == 6370 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6371 ctxt->depth++; 6372 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6373 ctxt->sax, user_data, ctxt->depth, 6374 ent->URI, ent->ExternalID, NULL); 6375 ctxt->depth--; 6376 } else { 6377 ret = XML_ERR_ENTITY_PE_INTERNAL; 6378 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6379 "invalid entity type found\n", NULL); 6380 } 6381 if (ret == XML_ERR_ENTITY_LOOP) { 6382 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6383 return; 6384 } 6385 } 6386 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6387 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6388 /* 6389 * Entity reference callback comes second, it's somewhat 6390 * superfluous but a compatibility to historical behaviour 6391 */ 6392 ctxt->sax->reference(ctxt->userData, ent->name); 6393 } 6394 return; 6395 } 6396 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6397 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6398 /* 6399 * Create a node. 6400 */ 6401 ctxt->sax->reference(ctxt->userData, ent->name); 6402 return; 6403 } 6404 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 6405 /* 6406 * There is a problem on the handling of _private for entities 6407 * (bug 155816): Should we copy the content of the field from 6408 * the entity (possibly overwriting some value set by the user 6409 * when a copy is created), should we leave it alone, or should 6410 * we try to take care of different situations? The problem 6411 * is exacerbated by the usage of this field by the xmlReader. 6412 * To fix this bug, we look at _private on the created node 6413 * and, if it's NULL, we copy in whatever was in the entity. 6414 * If it's not NULL we leave it alone. This is somewhat of a 6415 * hack - maybe we should have further tests to determine 6416 * what to do. 6417 */ 6418 if ((ctxt->node != NULL) && (ent->children != NULL)) { 6419 /* 6420 * Seems we are generating the DOM content, do 6421 * a simple tree copy for all references except the first 6422 * In the first occurrence list contains the replacement. 6423 * progressive == 2 means we are operating on the Reader 6424 * and since nodes are discarded we must copy all the time. 6425 */ 6426 if (((list == NULL) && (ent->owner == 0)) || 6427 (ctxt->parseMode == XML_PARSE_READER)) { 6428 xmlNodePtr nw = NULL, cur, firstChild = NULL; 6429 6430 /* 6431 * when operating on a reader, the entities definitions 6432 * are always owning the entities subtree. 6433 if (ctxt->parseMode == XML_PARSE_READER) 6434 ent->owner = 1; 6435 */ 6436 6437 cur = ent->children; 6438 while (cur != NULL) { 6439 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6440 if (nw != NULL) { 6441 if (nw->_private == NULL) 6442 nw->_private = cur->_private; 6443 if (firstChild == NULL){ 6444 firstChild = nw; 6445 } 6446 nw = xmlAddChild(ctxt->node, nw); 6447 } 6448 if (cur == ent->last) { 6449 /* 6450 * needed to detect some strange empty 6451 * node cases in the reader tests 6452 */ 6453 if ((ctxt->parseMode == XML_PARSE_READER) && 6454 (nw != NULL) && 6455 (nw->type == XML_ELEMENT_NODE) && 6456 (nw->children == NULL)) 6457 nw->extra = 1; 6458 6459 break; 6460 } 6461 cur = cur->next; 6462 } 6463#ifdef LIBXML_LEGACY_ENABLED 6464 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6465 xmlAddEntityReference(ent, firstChild, nw); 6466#endif /* LIBXML_LEGACY_ENABLED */ 6467 } else if (list == NULL) { 6468 xmlNodePtr nw = NULL, cur, next, last, 6469 firstChild = NULL; 6470 /* 6471 * Copy the entity child list and make it the new 6472 * entity child list. The goal is to make sure any 6473 * ID or REF referenced will be the one from the 6474 * document content and not the entity copy. 6475 */ 6476 cur = ent->children; 6477 ent->children = NULL; 6478 last = ent->last; 6479 ent->last = NULL; 6480 while (cur != NULL) { 6481 next = cur->next; 6482 cur->next = NULL; 6483 cur->parent = NULL; 6484 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6485 if (nw != NULL) { 6486 if (nw->_private == NULL) 6487 nw->_private = cur->_private; 6488 if (firstChild == NULL){ 6489 firstChild = cur; 6490 } 6491 xmlAddChild((xmlNodePtr) ent, nw); 6492 xmlAddChild(ctxt->node, cur); 6493 } 6494 if (cur == last) 6495 break; 6496 cur = next; 6497 } 6498 ent->owner = 1; 6499#ifdef LIBXML_LEGACY_ENABLED 6500 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6501 xmlAddEntityReference(ent, firstChild, nw); 6502#endif /* LIBXML_LEGACY_ENABLED */ 6503 } else { 6504 const xmlChar *nbktext; 6505 6506 /* 6507 * the name change is to avoid coalescing of the 6508 * node with a possible previous text one which 6509 * would make ent->children a dangling pointer 6510 */ 6511 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 6512 -1); 6513 if (ent->children->type == XML_TEXT_NODE) 6514 ent->children->name = nbktext; 6515 if ((ent->last != ent->children) && 6516 (ent->last->type == XML_TEXT_NODE)) 6517 ent->last->name = nbktext; 6518 xmlAddChildList(ctxt->node, ent->children); 6519 } 6520 6521 /* 6522 * This is to avoid a nasty side effect, see 6523 * characters() in SAX.c 6524 */ 6525 ctxt->nodemem = 0; 6526 ctxt->nodelen = 0; 6527 return; 6528 } 6529 } 6530 } else { 6531 val = ent->content; 6532 if (val == NULL) return; 6533 /* 6534 * inline the entity. 6535 */ 6536 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6537 (!ctxt->disableSAX)) 6538 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6539 } 6540 } 6541} 6542 6543/** 6544 * xmlParseEntityRef: 6545 * @ctxt: an XML parser context 6546 * 6547 * parse ENTITY references declarations 6548 * 6549 * [68] EntityRef ::= '&' Name ';' 6550 * 6551 * [ WFC: Entity Declared ] 6552 * In a document without any DTD, a document with only an internal DTD 6553 * subset which contains no parameter entity references, or a document 6554 * with "standalone='yes'", the Name given in the entity reference 6555 * must match that in an entity declaration, except that well-formed 6556 * documents need not declare any of the following entities: amp, lt, 6557 * gt, apos, quot. The declaration of a parameter entity must precede 6558 * any reference to it. Similarly, the declaration of a general entity 6559 * must precede any reference to it which appears in a default value in an 6560 * attribute-list declaration. Note that if entities are declared in the 6561 * external subset or in external parameter entities, a non-validating 6562 * processor is not obligated to read and process their declarations; 6563 * for such documents, the rule that an entity must be declared is a 6564 * well-formedness constraint only if standalone='yes'. 6565 * 6566 * [ WFC: Parsed Entity ] 6567 * An entity reference must not contain the name of an unparsed entity 6568 * 6569 * Returns the xmlEntityPtr if found, or NULL otherwise. 6570 */ 6571xmlEntityPtr 6572xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 6573 const xmlChar *name; 6574 xmlEntityPtr ent = NULL; 6575 6576 GROW; 6577 6578 if (RAW == '&') { 6579 NEXT; 6580 name = xmlParseName(ctxt); 6581 if (name == NULL) { 6582 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6583 "xmlParseEntityRef: no name\n"); 6584 } else { 6585 if (RAW == ';') { 6586 NEXT; 6587 /* 6588 * Ask first SAX for entity resolution, otherwise try the 6589 * predefined set. 6590 */ 6591 if (ctxt->sax != NULL) { 6592 if (ctxt->sax->getEntity != NULL) 6593 ent = ctxt->sax->getEntity(ctxt->userData, name); 6594 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 6595 ent = xmlGetPredefinedEntity(name); 6596 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 6597 (ctxt->userData==ctxt)) { 6598 ent = xmlSAX2GetEntity(ctxt, name); 6599 } 6600 } 6601 /* 6602 * [ WFC: Entity Declared ] 6603 * In a document without any DTD, a document with only an 6604 * internal DTD subset which contains no parameter entity 6605 * references, or a document with "standalone='yes'", the 6606 * Name given in the entity reference must match that in an 6607 * entity declaration, except that well-formed documents 6608 * need not declare any of the following entities: amp, lt, 6609 * gt, apos, quot. 6610 * The declaration of a parameter entity must precede any 6611 * reference to it. 6612 * Similarly, the declaration of a general entity must 6613 * precede any reference to it which appears in a default 6614 * value in an attribute-list declaration. Note that if 6615 * entities are declared in the external subset or in 6616 * external parameter entities, a non-validating processor 6617 * is not obligated to read and process their declarations; 6618 * for such documents, the rule that an entity must be 6619 * declared is a well-formedness constraint only if 6620 * standalone='yes'. 6621 */ 6622 if (ent == NULL) { 6623 if ((ctxt->standalone == 1) || 6624 ((ctxt->hasExternalSubset == 0) && 6625 (ctxt->hasPErefs == 0))) { 6626 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6627 "Entity '%s' not defined\n", name); 6628 } else { 6629 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6630 "Entity '%s' not defined\n", name); 6631 if ((ctxt->inSubset == 0) && 6632 (ctxt->sax != NULL) && 6633 (ctxt->sax->reference != NULL)) { 6634 ctxt->sax->reference(ctxt->userData, name); 6635 } 6636 } 6637 ctxt->valid = 0; 6638 } 6639 6640 /* 6641 * [ WFC: Parsed Entity ] 6642 * An entity reference must not contain the name of an 6643 * unparsed entity 6644 */ 6645 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6646 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6647 "Entity reference to unparsed entity %s\n", name); 6648 } 6649 6650 /* 6651 * [ WFC: No External Entity References ] 6652 * Attribute values cannot contain direct or indirect 6653 * entity references to external entities. 6654 */ 6655 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6656 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6657 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6658 "Attribute references external entity '%s'\n", name); 6659 } 6660 /* 6661 * [ WFC: No < in Attribute Values ] 6662 * The replacement text of any entity referred to directly or 6663 * indirectly in an attribute value (other than "<") must 6664 * not contain a <. 6665 */ 6666 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6667 (ent != NULL) && 6668 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6669 (ent->content != NULL) && 6670 (xmlStrchr(ent->content, '<'))) { 6671 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6672 "'<' in entity '%s' is not allowed in attributes values\n", name); 6673 } 6674 6675 /* 6676 * Internal check, no parameter entities here ... 6677 */ 6678 else { 6679 switch (ent->etype) { 6680 case XML_INTERNAL_PARAMETER_ENTITY: 6681 case XML_EXTERNAL_PARAMETER_ENTITY: 6682 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6683 "Attempt to reference the parameter entity '%s'\n", 6684 name); 6685 break; 6686 default: 6687 break; 6688 } 6689 } 6690 6691 /* 6692 * [ WFC: No Recursion ] 6693 * A parsed entity must not contain a recursive reference 6694 * to itself, either directly or indirectly. 6695 * Done somewhere else 6696 */ 6697 6698 } else { 6699 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6700 } 6701 } 6702 } 6703 return(ent); 6704} 6705 6706/** 6707 * xmlParseStringEntityRef: 6708 * @ctxt: an XML parser context 6709 * @str: a pointer to an index in the string 6710 * 6711 * parse ENTITY references declarations, but this version parses it from 6712 * a string value. 6713 * 6714 * [68] EntityRef ::= '&' Name ';' 6715 * 6716 * [ WFC: Entity Declared ] 6717 * In a document without any DTD, a document with only an internal DTD 6718 * subset which contains no parameter entity references, or a document 6719 * with "standalone='yes'", the Name given in the entity reference 6720 * must match that in an entity declaration, except that well-formed 6721 * documents need not declare any of the following entities: amp, lt, 6722 * gt, apos, quot. The declaration of a parameter entity must precede 6723 * any reference to it. Similarly, the declaration of a general entity 6724 * must precede any reference to it which appears in a default value in an 6725 * attribute-list declaration. Note that if entities are declared in the 6726 * external subset or in external parameter entities, a non-validating 6727 * processor is not obligated to read and process their declarations; 6728 * for such documents, the rule that an entity must be declared is a 6729 * well-formedness constraint only if standalone='yes'. 6730 * 6731 * [ WFC: Parsed Entity ] 6732 * An entity reference must not contain the name of an unparsed entity 6733 * 6734 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 6735 * is updated to the current location in the string. 6736 */ 6737xmlEntityPtr 6738xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 6739 xmlChar *name; 6740 const xmlChar *ptr; 6741 xmlChar cur; 6742 xmlEntityPtr ent = NULL; 6743 6744 if ((str == NULL) || (*str == NULL)) 6745 return(NULL); 6746 ptr = *str; 6747 cur = *ptr; 6748 if (cur == '&') { 6749 ptr++; 6750 cur = *ptr; 6751 name = xmlParseStringName(ctxt, &ptr); 6752 if (name == NULL) { 6753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6754 "xmlParseStringEntityRef: no name\n"); 6755 } else { 6756 if (*ptr == ';') { 6757 ptr++; 6758 /* 6759 * Ask first SAX for entity resolution, otherwise try the 6760 * predefined set. 6761 */ 6762 if (ctxt->sax != NULL) { 6763 if (ctxt->sax->getEntity != NULL) 6764 ent = ctxt->sax->getEntity(ctxt->userData, name); 6765 if (ent == NULL) 6766 ent = xmlGetPredefinedEntity(name); 6767 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6768 ent = xmlSAX2GetEntity(ctxt, name); 6769 } 6770 } 6771 /* 6772 * [ WFC: Entity Declared ] 6773 * In a document without any DTD, a document with only an 6774 * internal DTD subset which contains no parameter entity 6775 * references, or a document with "standalone='yes'", the 6776 * Name given in the entity reference must match that in an 6777 * entity declaration, except that well-formed documents 6778 * need not declare any of the following entities: amp, lt, 6779 * gt, apos, quot. 6780 * The declaration of a parameter entity must precede any 6781 * reference to it. 6782 * Similarly, the declaration of a general entity must 6783 * precede any reference to it which appears in a default 6784 * value in an attribute-list declaration. Note that if 6785 * entities are declared in the external subset or in 6786 * external parameter entities, a non-validating processor 6787 * is not obligated to read and process their declarations; 6788 * for such documents, the rule that an entity must be 6789 * declared is a well-formedness constraint only if 6790 * standalone='yes'. 6791 */ 6792 if (ent == NULL) { 6793 if ((ctxt->standalone == 1) || 6794 ((ctxt->hasExternalSubset == 0) && 6795 (ctxt->hasPErefs == 0))) { 6796 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6797 "Entity '%s' not defined\n", name); 6798 } else { 6799 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6800 "Entity '%s' not defined\n", 6801 name); 6802 } 6803 /* TODO ? check regressions ctxt->valid = 0; */ 6804 } 6805 6806 /* 6807 * [ WFC: Parsed Entity ] 6808 * An entity reference must not contain the name of an 6809 * unparsed entity 6810 */ 6811 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6812 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6813 "Entity reference to unparsed entity %s\n", name); 6814 } 6815 6816 /* 6817 * [ WFC: No External Entity References ] 6818 * Attribute values cannot contain direct or indirect 6819 * entity references to external entities. 6820 */ 6821 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6822 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6823 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6824 "Attribute references external entity '%s'\n", name); 6825 } 6826 /* 6827 * [ WFC: No < in Attribute Values ] 6828 * The replacement text of any entity referred to directly or 6829 * indirectly in an attribute value (other than "<") must 6830 * not contain a <. 6831 */ 6832 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6833 (ent != NULL) && 6834 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6835 (ent->content != NULL) && 6836 (xmlStrchr(ent->content, '<'))) { 6837 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6838 "'<' in entity '%s' is not allowed in attributes values\n", 6839 name); 6840 } 6841 6842 /* 6843 * Internal check, no parameter entities here ... 6844 */ 6845 else { 6846 switch (ent->etype) { 6847 case XML_INTERNAL_PARAMETER_ENTITY: 6848 case XML_EXTERNAL_PARAMETER_ENTITY: 6849 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6850 "Attempt to reference the parameter entity '%s'\n", 6851 name); 6852 break; 6853 default: 6854 break; 6855 } 6856 } 6857 6858 /* 6859 * [ WFC: No Recursion ] 6860 * A parsed entity must not contain a recursive reference 6861 * to itself, either directly or indirectly. 6862 * Done somewhere else 6863 */ 6864 6865 } else { 6866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6867 } 6868 xmlFree(name); 6869 } 6870 } 6871 *str = ptr; 6872 return(ent); 6873} 6874 6875/** 6876 * xmlParsePEReference: 6877 * @ctxt: an XML parser context 6878 * 6879 * parse PEReference declarations 6880 * The entity content is handled directly by pushing it's content as 6881 * a new input stream. 6882 * 6883 * [69] PEReference ::= '%' Name ';' 6884 * 6885 * [ WFC: No Recursion ] 6886 * A parsed entity must not contain a recursive 6887 * reference to itself, either directly or indirectly. 6888 * 6889 * [ WFC: Entity Declared ] 6890 * In a document without any DTD, a document with only an internal DTD 6891 * subset which contains no parameter entity references, or a document 6892 * with "standalone='yes'", ... ... The declaration of a parameter 6893 * entity must precede any reference to it... 6894 * 6895 * [ VC: Entity Declared ] 6896 * In a document with an external subset or external parameter entities 6897 * with "standalone='no'", ... ... The declaration of a parameter entity 6898 * must precede any reference to it... 6899 * 6900 * [ WFC: In DTD ] 6901 * Parameter-entity references may only appear in the DTD. 6902 * NOTE: misleading but this is handled. 6903 */ 6904void 6905xmlParsePEReference(xmlParserCtxtPtr ctxt) 6906{ 6907 const xmlChar *name; 6908 xmlEntityPtr entity = NULL; 6909 xmlParserInputPtr input; 6910 6911 if (RAW == '%') { 6912 NEXT; 6913 name = xmlParseName(ctxt); 6914 if (name == NULL) { 6915 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6916 "xmlParsePEReference: no name\n"); 6917 } else { 6918 if (RAW == ';') { 6919 NEXT; 6920 if ((ctxt->sax != NULL) && 6921 (ctxt->sax->getParameterEntity != NULL)) 6922 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6923 name); 6924 if (entity == NULL) { 6925 /* 6926 * [ WFC: Entity Declared ] 6927 * In a document without any DTD, a document with only an 6928 * internal DTD subset which contains no parameter entity 6929 * references, or a document with "standalone='yes'", ... 6930 * ... The declaration of a parameter entity must precede 6931 * any reference to it... 6932 */ 6933 if ((ctxt->standalone == 1) || 6934 ((ctxt->hasExternalSubset == 0) && 6935 (ctxt->hasPErefs == 0))) { 6936 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6937 "PEReference: %%%s; not found\n", 6938 name); 6939 } else { 6940 /* 6941 * [ VC: Entity Declared ] 6942 * In a document with an external subset or external 6943 * parameter entities with "standalone='no'", ... 6944 * ... The declaration of a parameter entity must 6945 * precede any reference to it... 6946 */ 6947 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6948 "PEReference: %%%s; not found\n", 6949 name, NULL); 6950 ctxt->valid = 0; 6951 } 6952 } else { 6953 /* 6954 * Internal checking in case the entity quest barfed 6955 */ 6956 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6957 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6958 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6959 "Internal: %%%s; is not a parameter entity\n", 6960 name, NULL); 6961 } else if (ctxt->input->free != deallocblankswrapper) { 6962 input = 6963 xmlNewBlanksWrapperInputStream(ctxt, entity); 6964 xmlPushInput(ctxt, input); 6965 } else { 6966 /* 6967 * TODO !!! 6968 * handle the extra spaces added before and after 6969 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6970 */ 6971 input = xmlNewEntityInputStream(ctxt, entity); 6972 xmlPushInput(ctxt, input); 6973 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6974 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6975 (IS_BLANK_CH(NXT(5)))) { 6976 xmlParseTextDecl(ctxt); 6977 if (ctxt->errNo == 6978 XML_ERR_UNSUPPORTED_ENCODING) { 6979 /* 6980 * The XML REC instructs us to stop parsing 6981 * right here 6982 */ 6983 ctxt->instate = XML_PARSER_EOF; 6984 return; 6985 } 6986 } 6987 } 6988 } 6989 ctxt->hasPErefs = 1; 6990 } else { 6991 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6992 } 6993 } 6994 } 6995} 6996 6997/** 6998 * xmlLoadEntityContent: 6999 * @ctxt: an XML parser context 7000 * @entity: an unloaded system entity 7001 * 7002 * Load the original content of the given system entity from the 7003 * ExternalID/SystemID given. This is to be used for Included in Literal 7004 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7005 * 7006 * Returns 0 in case of success and -1 in case of failure 7007 */ 7008static int 7009xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7010 xmlParserInputPtr input; 7011 xmlBufferPtr buf; 7012 int l, c; 7013 int count = 0; 7014 7015 if ((ctxt == NULL) || (entity == NULL) || 7016 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7017 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7018 (entity->content != NULL)) { 7019 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7020 "xmlLoadEntityContent parameter error"); 7021 return(-1); 7022 } 7023 7024 if (xmlParserDebugEntities) 7025 xmlGenericError(xmlGenericErrorContext, 7026 "Reading %s entity content input\n", entity->name); 7027 7028 buf = xmlBufferCreate(); 7029 if (buf == NULL) { 7030 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7031 "xmlLoadEntityContent parameter error"); 7032 return(-1); 7033 } 7034 7035 input = xmlNewEntityInputStream(ctxt, entity); 7036 if (input == NULL) { 7037 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7038 "xmlLoadEntityContent input error"); 7039 xmlBufferFree(buf); 7040 return(-1); 7041 } 7042 7043 /* 7044 * Push the entity as the current input, read char by char 7045 * saving to the buffer until the end of the entity or an error 7046 */ 7047 xmlPushInput(ctxt, input); 7048 GROW; 7049 c = CUR_CHAR(l); 7050 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7051 (IS_CHAR(c))) { 7052 xmlBufferAdd(buf, ctxt->input->cur, l); 7053 if (count++ > 100) { 7054 count = 0; 7055 GROW; 7056 } 7057 NEXTL(l); 7058 c = CUR_CHAR(l); 7059 } 7060 7061 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7062 xmlPopInput(ctxt); 7063 } else if (!IS_CHAR(c)) { 7064 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7065 "xmlLoadEntityContent: invalid char value %d\n", 7066 c); 7067 xmlBufferFree(buf); 7068 return(-1); 7069 } 7070 entity->content = buf->content; 7071 buf->content = NULL; 7072 xmlBufferFree(buf); 7073 7074 return(0); 7075} 7076 7077/** 7078 * xmlParseStringPEReference: 7079 * @ctxt: an XML parser context 7080 * @str: a pointer to an index in the string 7081 * 7082 * parse PEReference declarations 7083 * 7084 * [69] PEReference ::= '%' Name ';' 7085 * 7086 * [ WFC: No Recursion ] 7087 * A parsed entity must not contain a recursive 7088 * reference to itself, either directly or indirectly. 7089 * 7090 * [ WFC: Entity Declared ] 7091 * In a document without any DTD, a document with only an internal DTD 7092 * subset which contains no parameter entity references, or a document 7093 * with "standalone='yes'", ... ... The declaration of a parameter 7094 * entity must precede any reference to it... 7095 * 7096 * [ VC: Entity Declared ] 7097 * In a document with an external subset or external parameter entities 7098 * with "standalone='no'", ... ... The declaration of a parameter entity 7099 * must precede any reference to it... 7100 * 7101 * [ WFC: In DTD ] 7102 * Parameter-entity references may only appear in the DTD. 7103 * NOTE: misleading but this is handled. 7104 * 7105 * Returns the string of the entity content. 7106 * str is updated to the current value of the index 7107 */ 7108xmlEntityPtr 7109xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7110 const xmlChar *ptr; 7111 xmlChar cur; 7112 xmlChar *name; 7113 xmlEntityPtr entity = NULL; 7114 7115 if ((str == NULL) || (*str == NULL)) return(NULL); 7116 ptr = *str; 7117 cur = *ptr; 7118 if (cur == '%') { 7119 ptr++; 7120 cur = *ptr; 7121 name = xmlParseStringName(ctxt, &ptr); 7122 if (name == NULL) { 7123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7124 "xmlParseStringPEReference: no name\n"); 7125 } else { 7126 cur = *ptr; 7127 if (cur == ';') { 7128 ptr++; 7129 cur = *ptr; 7130 if ((ctxt->sax != NULL) && 7131 (ctxt->sax->getParameterEntity != NULL)) 7132 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7133 name); 7134 if (entity == NULL) { 7135 /* 7136 * [ WFC: Entity Declared ] 7137 * In a document without any DTD, a document with only an 7138 * internal DTD subset which contains no parameter entity 7139 * references, or a document with "standalone='yes'", ... 7140 * ... The declaration of a parameter entity must precede 7141 * any reference to it... 7142 */ 7143 if ((ctxt->standalone == 1) || 7144 ((ctxt->hasExternalSubset == 0) && 7145 (ctxt->hasPErefs == 0))) { 7146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7147 "PEReference: %%%s; not found\n", name); 7148 } else { 7149 /* 7150 * [ VC: Entity Declared ] 7151 * In a document with an external subset or external 7152 * parameter entities with "standalone='no'", ... 7153 * ... The declaration of a parameter entity must 7154 * precede any reference to it... 7155 */ 7156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7157 "PEReference: %%%s; not found\n", 7158 name, NULL); 7159 ctxt->valid = 0; 7160 } 7161 } else { 7162 /* 7163 * Internal checking in case the entity quest barfed 7164 */ 7165 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7166 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7167 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7168 "%%%s; is not a parameter entity\n", 7169 name, NULL); 7170 } 7171 } 7172 ctxt->hasPErefs = 1; 7173 } else { 7174 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7175 } 7176 xmlFree(name); 7177 } 7178 } 7179 *str = ptr; 7180 return(entity); 7181} 7182 7183/** 7184 * xmlParseDocTypeDecl: 7185 * @ctxt: an XML parser context 7186 * 7187 * parse a DOCTYPE declaration 7188 * 7189 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7190 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7191 * 7192 * [ VC: Root Element Type ] 7193 * The Name in the document type declaration must match the element 7194 * type of the root element. 7195 */ 7196 7197void 7198xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7199 const xmlChar *name = NULL; 7200 xmlChar *ExternalID = NULL; 7201 xmlChar *URI = NULL; 7202 7203 /* 7204 * We know that '<!DOCTYPE' has been detected. 7205 */ 7206 SKIP(9); 7207 7208 SKIP_BLANKS; 7209 7210 /* 7211 * Parse the DOCTYPE name. 7212 */ 7213 name = xmlParseName(ctxt); 7214 if (name == NULL) { 7215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7216 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7217 } 7218 ctxt->intSubName = name; 7219 7220 SKIP_BLANKS; 7221 7222 /* 7223 * Check for SystemID and ExternalID 7224 */ 7225 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7226 7227 if ((URI != NULL) || (ExternalID != NULL)) { 7228 ctxt->hasExternalSubset = 1; 7229 } 7230 ctxt->extSubURI = URI; 7231 ctxt->extSubSystem = ExternalID; 7232 7233 SKIP_BLANKS; 7234 7235 /* 7236 * Create and update the internal subset. 7237 */ 7238 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7239 (!ctxt->disableSAX)) 7240 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7241 7242 /* 7243 * Is there any internal subset declarations ? 7244 * they are handled separately in xmlParseInternalSubset() 7245 */ 7246 if (RAW == '[') 7247 return; 7248 7249 /* 7250 * We should be at the end of the DOCTYPE declaration. 7251 */ 7252 if (RAW != '>') { 7253 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7254 } 7255 NEXT; 7256} 7257 7258/** 7259 * xmlParseInternalSubset: 7260 * @ctxt: an XML parser context 7261 * 7262 * parse the internal subset declaration 7263 * 7264 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7265 */ 7266 7267static void 7268xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 7269 /* 7270 * Is there any DTD definition ? 7271 */ 7272 if (RAW == '[') { 7273 ctxt->instate = XML_PARSER_DTD; 7274 NEXT; 7275 /* 7276 * Parse the succession of Markup declarations and 7277 * PEReferences. 7278 * Subsequence (markupdecl | PEReference | S)* 7279 */ 7280 while (RAW != ']') { 7281 const xmlChar *check = CUR_PTR; 7282 unsigned int cons = ctxt->input->consumed; 7283 7284 SKIP_BLANKS; 7285 xmlParseMarkupDecl(ctxt); 7286 xmlParsePEReference(ctxt); 7287 7288 /* 7289 * Pop-up of finished entities. 7290 */ 7291 while ((RAW == 0) && (ctxt->inputNr > 1)) 7292 xmlPopInput(ctxt); 7293 7294 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7295 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7296 "xmlParseInternalSubset: error detected in Markup declaration\n"); 7297 break; 7298 } 7299 } 7300 if (RAW == ']') { 7301 NEXT; 7302 SKIP_BLANKS; 7303 } 7304 } 7305 7306 /* 7307 * We should be at the end of the DOCTYPE declaration. 7308 */ 7309 if (RAW != '>') { 7310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7311 } 7312 NEXT; 7313} 7314 7315#ifdef LIBXML_SAX1_ENABLED 7316/** 7317 * xmlParseAttribute: 7318 * @ctxt: an XML parser context 7319 * @value: a xmlChar ** used to store the value of the attribute 7320 * 7321 * parse an attribute 7322 * 7323 * [41] Attribute ::= Name Eq AttValue 7324 * 7325 * [ WFC: No External Entity References ] 7326 * Attribute values cannot contain direct or indirect entity references 7327 * to external entities. 7328 * 7329 * [ WFC: No < in Attribute Values ] 7330 * The replacement text of any entity referred to directly or indirectly in 7331 * an attribute value (other than "<") must not contain a <. 7332 * 7333 * [ VC: Attribute Value Type ] 7334 * The attribute must have been declared; the value must be of the type 7335 * declared for it. 7336 * 7337 * [25] Eq ::= S? '=' S? 7338 * 7339 * With namespace: 7340 * 7341 * [NS 11] Attribute ::= QName Eq AttValue 7342 * 7343 * Also the case QName == xmlns:??? is handled independently as a namespace 7344 * definition. 7345 * 7346 * Returns the attribute name, and the value in *value. 7347 */ 7348 7349const xmlChar * 7350xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 7351 const xmlChar *name; 7352 xmlChar *val; 7353 7354 *value = NULL; 7355 GROW; 7356 name = xmlParseName(ctxt); 7357 if (name == NULL) { 7358 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7359 "error parsing attribute name\n"); 7360 return(NULL); 7361 } 7362 7363 /* 7364 * read the value 7365 */ 7366 SKIP_BLANKS; 7367 if (RAW == '=') { 7368 NEXT; 7369 SKIP_BLANKS; 7370 val = xmlParseAttValue(ctxt); 7371 ctxt->instate = XML_PARSER_CONTENT; 7372 } else { 7373 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7374 "Specification mandate value for attribute %s\n", name); 7375 return(NULL); 7376 } 7377 7378 /* 7379 * Check that xml:lang conforms to the specification 7380 * No more registered as an error, just generate a warning now 7381 * since this was deprecated in XML second edition 7382 */ 7383 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7384 if (!xmlCheckLanguageID(val)) { 7385 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7386 "Malformed value for xml:lang : %s\n", 7387 val, NULL); 7388 } 7389 } 7390 7391 /* 7392 * Check that xml:space conforms to the specification 7393 */ 7394 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7395 if (xmlStrEqual(val, BAD_CAST "default")) 7396 *(ctxt->space) = 0; 7397 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7398 *(ctxt->space) = 1; 7399 else { 7400 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 7401"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7402 val, NULL); 7403 } 7404 } 7405 7406 *value = val; 7407 return(name); 7408} 7409 7410/** 7411 * xmlParseStartTag: 7412 * @ctxt: an XML parser context 7413 * 7414 * parse a start of tag either for rule element or 7415 * EmptyElement. In both case we don't parse the tag closing chars. 7416 * 7417 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7418 * 7419 * [ WFC: Unique Att Spec ] 7420 * No attribute name may appear more than once in the same start-tag or 7421 * empty-element tag. 7422 * 7423 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7424 * 7425 * [ WFC: Unique Att Spec ] 7426 * No attribute name may appear more than once in the same start-tag or 7427 * empty-element tag. 7428 * 7429 * With namespace: 7430 * 7431 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7432 * 7433 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7434 * 7435 * Returns the element name parsed 7436 */ 7437 7438const xmlChar * 7439xmlParseStartTag(xmlParserCtxtPtr ctxt) { 7440 const xmlChar *name; 7441 const xmlChar *attname; 7442 xmlChar *attvalue; 7443 const xmlChar **atts = ctxt->atts; 7444 int nbatts = 0; 7445 int maxatts = ctxt->maxatts; 7446 int i; 7447 7448 if (RAW != '<') return(NULL); 7449 NEXT1; 7450 7451 name = xmlParseName(ctxt); 7452 if (name == NULL) { 7453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7454 "xmlParseStartTag: invalid element name\n"); 7455 return(NULL); 7456 } 7457 7458 /* 7459 * Now parse the attributes, it ends up with the ending 7460 * 7461 * (S Attribute)* S? 7462 */ 7463 SKIP_BLANKS; 7464 GROW; 7465 7466 while ((RAW != '>') && 7467 ((RAW != '/') || (NXT(1) != '>')) && 7468 (IS_BYTE_CHAR(RAW))) { 7469 const xmlChar *q = CUR_PTR; 7470 unsigned int cons = ctxt->input->consumed; 7471 7472 attname = xmlParseAttribute(ctxt, &attvalue); 7473 if ((attname != NULL) && (attvalue != NULL)) { 7474 /* 7475 * [ WFC: Unique Att Spec ] 7476 * No attribute name may appear more than once in the same 7477 * start-tag or empty-element tag. 7478 */ 7479 for (i = 0; i < nbatts;i += 2) { 7480 if (xmlStrEqual(atts[i], attname)) { 7481 xmlErrAttributeDup(ctxt, NULL, attname); 7482 xmlFree(attvalue); 7483 goto failed; 7484 } 7485 } 7486 /* 7487 * Add the pair to atts 7488 */ 7489 if (atts == NULL) { 7490 maxatts = 22; /* allow for 10 attrs by default */ 7491 atts = (const xmlChar **) 7492 xmlMalloc(maxatts * sizeof(xmlChar *)); 7493 if (atts == NULL) { 7494 xmlErrMemory(ctxt, NULL); 7495 if (attvalue != NULL) 7496 xmlFree(attvalue); 7497 goto failed; 7498 } 7499 ctxt->atts = atts; 7500 ctxt->maxatts = maxatts; 7501 } else if (nbatts + 4 > maxatts) { 7502 const xmlChar **n; 7503 7504 maxatts *= 2; 7505 n = (const xmlChar **) xmlRealloc((void *) atts, 7506 maxatts * sizeof(const xmlChar *)); 7507 if (n == NULL) { 7508 xmlErrMemory(ctxt, NULL); 7509 if (attvalue != NULL) 7510 xmlFree(attvalue); 7511 goto failed; 7512 } 7513 atts = n; 7514 ctxt->atts = atts; 7515 ctxt->maxatts = maxatts; 7516 } 7517 atts[nbatts++] = attname; 7518 atts[nbatts++] = attvalue; 7519 atts[nbatts] = NULL; 7520 atts[nbatts + 1] = NULL; 7521 } else { 7522 if (attvalue != NULL) 7523 xmlFree(attvalue); 7524 } 7525 7526failed: 7527 7528 GROW 7529 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7530 break; 7531 if (!IS_BLANK_CH(RAW)) { 7532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7533 "attributes construct error\n"); 7534 } 7535 SKIP_BLANKS; 7536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7537 (attname == NULL) && (attvalue == NULL)) { 7538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 7539 "xmlParseStartTag: problem parsing attributes\n"); 7540 break; 7541 } 7542 SHRINK; 7543 GROW; 7544 } 7545 7546 /* 7547 * SAX: Start of Element ! 7548 */ 7549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 7550 (!ctxt->disableSAX)) { 7551 if (nbatts > 0) 7552 ctxt->sax->startElement(ctxt->userData, name, atts); 7553 else 7554 ctxt->sax->startElement(ctxt->userData, name, NULL); 7555 } 7556 7557 if (atts != NULL) { 7558 /* Free only the content strings */ 7559 for (i = 1;i < nbatts;i+=2) 7560 if (atts[i] != NULL) 7561 xmlFree((xmlChar *) atts[i]); 7562 } 7563 return(name); 7564} 7565 7566/** 7567 * xmlParseEndTag1: 7568 * @ctxt: an XML parser context 7569 * @line: line of the start tag 7570 * @nsNr: number of namespaces on the start tag 7571 * 7572 * parse an end of tag 7573 * 7574 * [42] ETag ::= '</' Name S? '>' 7575 * 7576 * With namespace 7577 * 7578 * [NS 9] ETag ::= '</' QName S? '>' 7579 */ 7580 7581static void 7582xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 7583 const xmlChar *name; 7584 7585 GROW; 7586 if ((RAW != '<') || (NXT(1) != '/')) { 7587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 7588 "xmlParseEndTag: '</' not found\n"); 7589 return; 7590 } 7591 SKIP(2); 7592 7593 name = xmlParseNameAndCompare(ctxt,ctxt->name); 7594 7595 /* 7596 * We should definitely be at the ending "S? '>'" part 7597 */ 7598 GROW; 7599 SKIP_BLANKS; 7600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7602 } else 7603 NEXT1; 7604 7605 /* 7606 * [ WFC: Element Type Match ] 7607 * The Name in an element's end-tag must match the element type in the 7608 * start-tag. 7609 * 7610 */ 7611 if (name != (xmlChar*)1) { 7612 if (name == NULL) name = BAD_CAST "unparseable"; 7613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7614 "Opening and ending tag mismatch: %s line %d and %s\n", 7615 ctxt->name, line, name); 7616 } 7617 7618 /* 7619 * SAX: End of Tag 7620 */ 7621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7622 (!ctxt->disableSAX)) 7623 ctxt->sax->endElement(ctxt->userData, ctxt->name); 7624 7625 namePop(ctxt); 7626 spacePop(ctxt); 7627 return; 7628} 7629 7630/** 7631 * xmlParseEndTag: 7632 * @ctxt: an XML parser context 7633 * 7634 * parse an end of tag 7635 * 7636 * [42] ETag ::= '</' Name S? '>' 7637 * 7638 * With namespace 7639 * 7640 * [NS 9] ETag ::= '</' QName S? '>' 7641 */ 7642 7643void 7644xmlParseEndTag(xmlParserCtxtPtr ctxt) { 7645 xmlParseEndTag1(ctxt, 0); 7646} 7647#endif /* LIBXML_SAX1_ENABLED */ 7648 7649/************************************************************************ 7650 * * 7651 * SAX 2 specific operations * 7652 * * 7653 ************************************************************************/ 7654 7655static const xmlChar * 7656xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 7657 int len = 0, l; 7658 int c; 7659 int count = 0; 7660 7661 /* 7662 * Handler for more complex cases 7663 */ 7664 GROW; 7665 c = CUR_CHAR(l); 7666 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 7667 (!IS_LETTER(c) && (c != '_'))) { 7668 return(NULL); 7669 } 7670 7671 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 7672 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 7673 (c == '.') || (c == '-') || (c == '_') || 7674 (IS_COMBINING(c)) || 7675 (IS_EXTENDER(c)))) { 7676 if (count++ > 100) { 7677 count = 0; 7678 GROW; 7679 } 7680 len += l; 7681 NEXTL(l); 7682 c = CUR_CHAR(l); 7683 } 7684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 7685} 7686 7687/* 7688 * xmlGetNamespace: 7689 * @ctxt: an XML parser context 7690 * @prefix: the prefix to lookup 7691 * 7692 * Lookup the namespace name for the @prefix (which ca be NULL) 7693 * The prefix must come from the @ctxt->dict dictionnary 7694 * 7695 * Returns the namespace name or NULL if not bound 7696 */ 7697static const xmlChar * 7698xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 7699 int i; 7700 7701 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 7702 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 7703 if (ctxt->nsTab[i] == prefix) { 7704 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 7705 return(NULL); 7706 return(ctxt->nsTab[i + 1]); 7707 } 7708 return(NULL); 7709} 7710 7711/** 7712 * xmlParseNCName: 7713 * @ctxt: an XML parser context 7714 * @len: lenght of the string parsed 7715 * 7716 * parse an XML name. 7717 * 7718 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 7719 * CombiningChar | Extender 7720 * 7721 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 7722 * 7723 * Returns the Name parsed or NULL 7724 */ 7725 7726static const xmlChar * 7727xmlParseNCName(xmlParserCtxtPtr ctxt) { 7728 const xmlChar *in; 7729 const xmlChar *ret; 7730 int count = 0; 7731 7732 /* 7733 * Accelerator for simple ASCII names 7734 */ 7735 in = ctxt->input->cur; 7736 if (((*in >= 0x61) && (*in <= 0x7A)) || 7737 ((*in >= 0x41) && (*in <= 0x5A)) || 7738 (*in == '_')) { 7739 in++; 7740 while (((*in >= 0x61) && (*in <= 0x7A)) || 7741 ((*in >= 0x41) && (*in <= 0x5A)) || 7742 ((*in >= 0x30) && (*in <= 0x39)) || 7743 (*in == '_') || (*in == '-') || 7744 (*in == '.')) 7745 in++; 7746 if ((*in > 0) && (*in < 0x80)) { 7747 count = in - ctxt->input->cur; 7748 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 7749 ctxt->input->cur = in; 7750 ctxt->nbChars += count; 7751 ctxt->input->col += count; 7752 if (ret == NULL) { 7753 xmlErrMemory(ctxt, NULL); 7754 } 7755 return(ret); 7756 } 7757 } 7758 return(xmlParseNCNameComplex(ctxt)); 7759} 7760 7761/** 7762 * xmlParseQName: 7763 * @ctxt: an XML parser context 7764 * @prefix: pointer to store the prefix part 7765 * 7766 * parse an XML Namespace QName 7767 * 7768 * [6] QName ::= (Prefix ':')? LocalPart 7769 * [7] Prefix ::= NCName 7770 * [8] LocalPart ::= NCName 7771 * 7772 * Returns the Name parsed or NULL 7773 */ 7774 7775static const xmlChar * 7776xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 7777 const xmlChar *l, *p; 7778 7779 GROW; 7780 7781 l = xmlParseNCName(ctxt); 7782 if (l == NULL) { 7783 if (CUR == ':') { 7784 l = xmlParseName(ctxt); 7785 if (l != NULL) { 7786 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7787 "Failed to parse QName '%s'\n", l, NULL, NULL); 7788 *prefix = NULL; 7789 return(l); 7790 } 7791 } 7792 return(NULL); 7793 } 7794 if (CUR == ':') { 7795 NEXT; 7796 p = l; 7797 l = xmlParseNCName(ctxt); 7798 if (l == NULL) { 7799 xmlChar *tmp; 7800 7801 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7802 "Failed to parse QName '%s:'\n", p, NULL, NULL); 7803 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 7804 p = xmlDictLookup(ctxt->dict, tmp, -1); 7805 if (tmp != NULL) xmlFree(tmp); 7806 *prefix = NULL; 7807 return(p); 7808 } 7809 if (CUR == ':') { 7810 xmlChar *tmp; 7811 7812 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7813 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 7814 NEXT; 7815 tmp = (xmlChar *) xmlParseName(ctxt); 7816 if (tmp != NULL) { 7817 tmp = xmlBuildQName(tmp, l, NULL, 0); 7818 l = xmlDictLookup(ctxt->dict, tmp, -1); 7819 if (tmp != NULL) xmlFree(tmp); 7820 *prefix = p; 7821 return(l); 7822 } 7823 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 7824 l = xmlDictLookup(ctxt->dict, tmp, -1); 7825 if (tmp != NULL) xmlFree(tmp); 7826 *prefix = p; 7827 return(l); 7828 } 7829 *prefix = p; 7830 } else 7831 *prefix = NULL; 7832 return(l); 7833} 7834 7835/** 7836 * xmlParseQNameAndCompare: 7837 * @ctxt: an XML parser context 7838 * @name: the localname 7839 * @prefix: the prefix, if any. 7840 * 7841 * parse an XML name and compares for match 7842 * (specialized for endtag parsing) 7843 * 7844 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7845 * and the name for mismatch 7846 */ 7847 7848static const xmlChar * 7849xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7850 xmlChar const *prefix) { 7851 const xmlChar *cmp = name; 7852 const xmlChar *in; 7853 const xmlChar *ret; 7854 const xmlChar *prefix2; 7855 7856 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7857 7858 GROW; 7859 in = ctxt->input->cur; 7860 7861 cmp = prefix; 7862 while (*in != 0 && *in == *cmp) { 7863 ++in; 7864 ++cmp; 7865 } 7866 if ((*cmp == 0) && (*in == ':')) { 7867 in++; 7868 cmp = name; 7869 while (*in != 0 && *in == *cmp) { 7870 ++in; 7871 ++cmp; 7872 } 7873 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 7874 /* success */ 7875 ctxt->input->cur = in; 7876 return((const xmlChar*) 1); 7877 } 7878 } 7879 /* 7880 * all strings coms from the dictionary, equality can be done directly 7881 */ 7882 ret = xmlParseQName (ctxt, &prefix2); 7883 if ((ret == name) && (prefix == prefix2)) 7884 return((const xmlChar*) 1); 7885 return ret; 7886} 7887 7888/** 7889 * xmlParseAttValueInternal: 7890 * @ctxt: an XML parser context 7891 * @len: attribute len result 7892 * @alloc: whether the attribute was reallocated as a new string 7893 * @normalize: if 1 then further non-CDATA normalization must be done 7894 * 7895 * parse a value for an attribute. 7896 * NOTE: if no normalization is needed, the routine will return pointers 7897 * directly from the data buffer. 7898 * 7899 * 3.3.3 Attribute-Value Normalization: 7900 * Before the value of an attribute is passed to the application or 7901 * checked for validity, the XML processor must normalize it as follows: 7902 * - a character reference is processed by appending the referenced 7903 * character to the attribute value 7904 * - an entity reference is processed by recursively processing the 7905 * replacement text of the entity 7906 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7907 * appending #x20 to the normalized value, except that only a single 7908 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7909 * parsed entity or the literal entity value of an internal parsed entity 7910 * - other characters are processed by appending them to the normalized value 7911 * If the declared value is not CDATA, then the XML processor must further 7912 * process the normalized attribute value by discarding any leading and 7913 * trailing space (#x20) characters, and by replacing sequences of space 7914 * (#x20) characters by a single space (#x20) character. 7915 * All attributes for which no declaration has been read should be treated 7916 * by a non-validating parser as if declared CDATA. 7917 * 7918 * Returns the AttValue parsed or NULL. The value has to be freed by the 7919 * caller if it was copied, this can be detected by val[*len] == 0. 7920 */ 7921 7922static xmlChar * 7923xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7924 int normalize) 7925{ 7926 xmlChar limit = 0; 7927 const xmlChar *in = NULL, *start, *end, *last; 7928 xmlChar *ret = NULL; 7929 7930 GROW; 7931 in = (xmlChar *) CUR_PTR; 7932 if (*in != '"' && *in != '\'') { 7933 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7934 return (NULL); 7935 } 7936 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7937 7938 /* 7939 * try to handle in this routine the most common case where no 7940 * allocation of a new string is required and where content is 7941 * pure ASCII. 7942 */ 7943 limit = *in++; 7944 end = ctxt->input->end; 7945 start = in; 7946 if (in >= end) { 7947 const xmlChar *oldbase = ctxt->input->base; 7948 GROW; 7949 if (oldbase != ctxt->input->base) { 7950 long delta = ctxt->input->base - oldbase; 7951 start = start + delta; 7952 in = in + delta; 7953 } 7954 end = ctxt->input->end; 7955 } 7956 if (normalize) { 7957 /* 7958 * Skip any leading spaces 7959 */ 7960 while ((in < end) && (*in != limit) && 7961 ((*in == 0x20) || (*in == 0x9) || 7962 (*in == 0xA) || (*in == 0xD))) { 7963 in++; 7964 start = in; 7965 if (in >= end) { 7966 const xmlChar *oldbase = ctxt->input->base; 7967 GROW; 7968 if (oldbase != ctxt->input->base) { 7969 long delta = ctxt->input->base - oldbase; 7970 start = start + delta; 7971 in = in + delta; 7972 } 7973 end = ctxt->input->end; 7974 } 7975 } 7976 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7977 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7978 if ((*in++ == 0x20) && (*in == 0x20)) break; 7979 if (in >= end) { 7980 const xmlChar *oldbase = ctxt->input->base; 7981 GROW; 7982 if (oldbase != ctxt->input->base) { 7983 long delta = ctxt->input->base - oldbase; 7984 start = start + delta; 7985 in = in + delta; 7986 } 7987 end = ctxt->input->end; 7988 } 7989 } 7990 last = in; 7991 /* 7992 * skip the trailing blanks 7993 */ 7994 while ((last[-1] == 0x20) && (last > start)) last--; 7995 while ((in < end) && (*in != limit) && 7996 ((*in == 0x20) || (*in == 0x9) || 7997 (*in == 0xA) || (*in == 0xD))) { 7998 in++; 7999 if (in >= end) { 8000 const xmlChar *oldbase = ctxt->input->base; 8001 GROW; 8002 if (oldbase != ctxt->input->base) { 8003 long delta = ctxt->input->base - oldbase; 8004 start = start + delta; 8005 in = in + delta; 8006 last = last + delta; 8007 } 8008 end = ctxt->input->end; 8009 } 8010 } 8011 if (*in != limit) goto need_complex; 8012 } else { 8013 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8014 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8015 in++; 8016 if (in >= end) { 8017 const xmlChar *oldbase = ctxt->input->base; 8018 GROW; 8019 if (oldbase != ctxt->input->base) { 8020 long delta = ctxt->input->base - oldbase; 8021 start = start + delta; 8022 in = in + delta; 8023 } 8024 end = ctxt->input->end; 8025 } 8026 } 8027 last = in; 8028 if (*in != limit) goto need_complex; 8029 } 8030 in++; 8031 if (len != NULL) { 8032 *len = last - start; 8033 ret = (xmlChar *) start; 8034 } else { 8035 if (alloc) *alloc = 1; 8036 ret = xmlStrndup(start, last - start); 8037 } 8038 CUR_PTR = in; 8039 if (alloc) *alloc = 0; 8040 return ret; 8041need_complex: 8042 if (alloc) *alloc = 1; 8043 return xmlParseAttValueComplex(ctxt, len, normalize); 8044} 8045 8046/** 8047 * xmlParseAttribute2: 8048 * @ctxt: an XML parser context 8049 * @pref: the element prefix 8050 * @elem: the element name 8051 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8052 * @value: a xmlChar ** used to store the value of the attribute 8053 * @len: an int * to save the length of the attribute 8054 * @alloc: an int * to indicate if the attribute was allocated 8055 * 8056 * parse an attribute in the new SAX2 framework. 8057 * 8058 * Returns the attribute name, and the value in *value, . 8059 */ 8060 8061static const xmlChar * 8062xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8063 const xmlChar * pref, const xmlChar * elem, 8064 const xmlChar ** prefix, xmlChar ** value, 8065 int *len, int *alloc) 8066{ 8067 const xmlChar *name; 8068 xmlChar *val, *internal_val = NULL; 8069 int normalize = 0; 8070 8071 *value = NULL; 8072 GROW; 8073 name = xmlParseQName(ctxt, prefix); 8074 if (name == NULL) { 8075 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8076 "error parsing attribute name\n"); 8077 return (NULL); 8078 } 8079 8080 /* 8081 * get the type if needed 8082 */ 8083 if (ctxt->attsSpecial != NULL) { 8084 int type; 8085 8086 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8087 pref, elem, *prefix, name); 8088 if (type != 0) 8089 normalize = 1; 8090 } 8091 8092 /* 8093 * read the value 8094 */ 8095 SKIP_BLANKS; 8096 if (RAW == '=') { 8097 NEXT; 8098 SKIP_BLANKS; 8099 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8100 if (normalize) { 8101 /* 8102 * Sometimes a second normalisation pass for spaces is needed 8103 * but that only happens if charrefs or entities refernces 8104 * have been used in the attribute value, i.e. the attribute 8105 * value have been extracted in an allocated string already. 8106 */ 8107 if (*alloc) { 8108 const xmlChar *val2; 8109 8110 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8111 if (val2 != NULL) { 8112 xmlFree(val); 8113 val = (xmlChar *) val2; 8114 } 8115 } 8116 } 8117 ctxt->instate = XML_PARSER_CONTENT; 8118 } else { 8119 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8120 "Specification mandate value for attribute %s\n", 8121 name); 8122 return (NULL); 8123 } 8124 8125 if (*prefix == ctxt->str_xml) { 8126 /* 8127 * Check that xml:lang conforms to the specification 8128 * No more registered as an error, just generate a warning now 8129 * since this was deprecated in XML second edition 8130 */ 8131 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8132 internal_val = xmlStrndup(val, *len); 8133 if (!xmlCheckLanguageID(internal_val)) { 8134 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8135 "Malformed value for xml:lang : %s\n", 8136 internal_val, NULL); 8137 } 8138 } 8139 8140 /* 8141 * Check that xml:space conforms to the specification 8142 */ 8143 if (xmlStrEqual(name, BAD_CAST "space")) { 8144 internal_val = xmlStrndup(val, *len); 8145 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8146 *(ctxt->space) = 0; 8147 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8148 *(ctxt->space) = 1; 8149 else { 8150 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8151 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8152 internal_val, NULL); 8153 } 8154 } 8155 if (internal_val) { 8156 xmlFree(internal_val); 8157 } 8158 } 8159 8160 *value = val; 8161 return (name); 8162} 8163/** 8164 * xmlParseStartTag2: 8165 * @ctxt: an XML parser context 8166 * 8167 * parse a start of tag either for rule element or 8168 * EmptyElement. In both case we don't parse the tag closing chars. 8169 * This routine is called when running SAX2 parsing 8170 * 8171 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8172 * 8173 * [ WFC: Unique Att Spec ] 8174 * No attribute name may appear more than once in the same start-tag or 8175 * empty-element tag. 8176 * 8177 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8178 * 8179 * [ WFC: Unique Att Spec ] 8180 * No attribute name may appear more than once in the same start-tag or 8181 * empty-element tag. 8182 * 8183 * With namespace: 8184 * 8185 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8186 * 8187 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8188 * 8189 * Returns the element name parsed 8190 */ 8191 8192static const xmlChar * 8193xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8194 const xmlChar **URI, int *tlen) { 8195 const xmlChar *localname; 8196 const xmlChar *prefix; 8197 const xmlChar *attname; 8198 const xmlChar *aprefix; 8199 const xmlChar *nsname; 8200 xmlChar *attvalue; 8201 const xmlChar **atts = ctxt->atts; 8202 int maxatts = ctxt->maxatts; 8203 int nratts, nbatts, nbdef; 8204 int i, j, nbNs, attval, oldline, oldcol; 8205 const xmlChar *base; 8206 unsigned long cur; 8207 int nsNr = ctxt->nsNr; 8208 8209 if (RAW != '<') return(NULL); 8210 NEXT1; 8211 8212 /* 8213 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8214 * point since the attribute values may be stored as pointers to 8215 * the buffer and calling SHRINK would destroy them ! 8216 * The Shrinking is only possible once the full set of attribute 8217 * callbacks have been done. 8218 */ 8219reparse: 8220 SHRINK; 8221 base = ctxt->input->base; 8222 cur = ctxt->input->cur - ctxt->input->base; 8223 oldline = ctxt->input->line; 8224 oldcol = ctxt->input->col; 8225 nbatts = 0; 8226 nratts = 0; 8227 nbdef = 0; 8228 nbNs = 0; 8229 attval = 0; 8230 /* Forget any namespaces added during an earlier parse of this element. */ 8231 ctxt->nsNr = nsNr; 8232 8233 localname = xmlParseQName(ctxt, &prefix); 8234 if (localname == NULL) { 8235 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8236 "StartTag: invalid element name\n"); 8237 return(NULL); 8238 } 8239 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8240 8241 /* 8242 * Now parse the attributes, it ends up with the ending 8243 * 8244 * (S Attribute)* S? 8245 */ 8246 SKIP_BLANKS; 8247 GROW; 8248 if (ctxt->input->base != base) goto base_changed; 8249 8250 while ((RAW != '>') && 8251 ((RAW != '/') || (NXT(1) != '>')) && 8252 (IS_BYTE_CHAR(RAW))) { 8253 const xmlChar *q = CUR_PTR; 8254 unsigned int cons = ctxt->input->consumed; 8255 int len = -1, alloc = 0; 8256 8257 attname = xmlParseAttribute2(ctxt, prefix, localname, 8258 &aprefix, &attvalue, &len, &alloc); 8259 if (ctxt->input->base != base) { 8260 if ((attvalue != NULL) && (alloc != 0)) 8261 xmlFree(attvalue); 8262 attvalue = NULL; 8263 goto base_changed; 8264 } 8265 if ((attname != NULL) && (attvalue != NULL)) { 8266 if (len < 0) len = xmlStrlen(attvalue); 8267 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8268 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8269 xmlURIPtr uri; 8270 8271 if (*URL != 0) { 8272 uri = xmlParseURI((const char *) URL); 8273 if (uri == NULL) { 8274 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 8275 "xmlns: %s not a valid URI\n", 8276 URL, NULL); 8277 } else { 8278 if (uri->scheme == NULL) { 8279 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 8280 "xmlns: URI %s is not absolute\n", 8281 URL, NULL); 8282 } 8283 xmlFreeURI(uri); 8284 } 8285 } 8286 /* 8287 * check that it's not a defined namespace 8288 */ 8289 for (j = 1;j <= nbNs;j++) 8290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8291 break; 8292 if (j <= nbNs) 8293 xmlErrAttributeDup(ctxt, NULL, attname); 8294 else 8295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8296 if (alloc != 0) xmlFree(attvalue); 8297 SKIP_BLANKS; 8298 continue; 8299 } 8300 if (aprefix == ctxt->str_xmlns) { 8301 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8302 xmlURIPtr uri; 8303 8304 if (attname == ctxt->str_xml) { 8305 if (URL != ctxt->str_xml_ns) { 8306 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8307 "xml namespace prefix mapped to wrong URI\n", 8308 NULL, NULL, NULL); 8309 } 8310 /* 8311 * Do not keep a namespace definition node 8312 */ 8313 if (alloc != 0) xmlFree(attvalue); 8314 SKIP_BLANKS; 8315 continue; 8316 } 8317 uri = xmlParseURI((const char *) URL); 8318 if (uri == NULL) { 8319 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 8320 "xmlns:%s: '%s' is not a valid URI\n", 8321 attname, URL); 8322 } else { 8323 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 8324 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 8325 "xmlns:%s: URI %s is not absolute\n", 8326 attname, URL); 8327 } 8328 xmlFreeURI(uri); 8329 } 8330 8331 /* 8332 * check that it's not a defined namespace 8333 */ 8334 for (j = 1;j <= nbNs;j++) 8335 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8336 break; 8337 if (j <= nbNs) 8338 xmlErrAttributeDup(ctxt, aprefix, attname); 8339 else 8340 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 8341 if (alloc != 0) xmlFree(attvalue); 8342 SKIP_BLANKS; 8343 if (ctxt->input->base != base) goto base_changed; 8344 continue; 8345 } 8346 8347 /* 8348 * Add the pair to atts 8349 */ 8350 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8351 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8352 if (attvalue[len] == 0) 8353 xmlFree(attvalue); 8354 goto failed; 8355 } 8356 maxatts = ctxt->maxatts; 8357 atts = ctxt->atts; 8358 } 8359 ctxt->attallocs[nratts++] = alloc; 8360 atts[nbatts++] = attname; 8361 atts[nbatts++] = aprefix; 8362 atts[nbatts++] = NULL; /* the URI will be fetched later */ 8363 atts[nbatts++] = attvalue; 8364 attvalue += len; 8365 atts[nbatts++] = attvalue; 8366 /* 8367 * tag if some deallocation is needed 8368 */ 8369 if (alloc != 0) attval = 1; 8370 } else { 8371 if ((attvalue != NULL) && (attvalue[len] == 0)) 8372 xmlFree(attvalue); 8373 } 8374 8375failed: 8376 8377 GROW 8378 if (ctxt->input->base != base) goto base_changed; 8379 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8380 break; 8381 if (!IS_BLANK_CH(RAW)) { 8382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8383 "attributes construct error\n"); 8384 break; 8385 } 8386 SKIP_BLANKS; 8387 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8388 (attname == NULL) && (attvalue == NULL)) { 8389 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8390 "xmlParseStartTag: problem parsing attributes\n"); 8391 break; 8392 } 8393 GROW; 8394 if (ctxt->input->base != base) goto base_changed; 8395 } 8396 8397 /* 8398 * The attributes defaulting 8399 */ 8400 if (ctxt->attsDefault != NULL) { 8401 xmlDefAttrsPtr defaults; 8402 8403 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 8404 if (defaults != NULL) { 8405 for (i = 0;i < defaults->nbAttrs;i++) { 8406 attname = defaults->values[4 * i]; 8407 aprefix = defaults->values[4 * i + 1]; 8408 8409 /* 8410 * special work for namespaces defaulted defs 8411 */ 8412 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8413 /* 8414 * check that it's not a defined namespace 8415 */ 8416 for (j = 1;j <= nbNs;j++) 8417 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8418 break; 8419 if (j <= nbNs) continue; 8420 8421 nsname = xmlGetNamespace(ctxt, NULL); 8422 if (nsname != defaults->values[4 * i + 2]) { 8423 if (nsPush(ctxt, NULL, 8424 defaults->values[4 * i + 2]) > 0) 8425 nbNs++; 8426 } 8427 } else if (aprefix == ctxt->str_xmlns) { 8428 /* 8429 * check that it's not a defined namespace 8430 */ 8431 for (j = 1;j <= nbNs;j++) 8432 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8433 break; 8434 if (j <= nbNs) continue; 8435 8436 nsname = xmlGetNamespace(ctxt, attname); 8437 if (nsname != defaults->values[2]) { 8438 if (nsPush(ctxt, attname, 8439 defaults->values[4 * i + 2]) > 0) 8440 nbNs++; 8441 } 8442 } else { 8443 /* 8444 * check that it's not a defined attribute 8445 */ 8446 for (j = 0;j < nbatts;j+=5) { 8447 if ((attname == atts[j]) && (aprefix == atts[j+1])) 8448 break; 8449 } 8450 if (j < nbatts) continue; 8451 8452 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8453 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8454 return(NULL); 8455 } 8456 maxatts = ctxt->maxatts; 8457 atts = ctxt->atts; 8458 } 8459 atts[nbatts++] = attname; 8460 atts[nbatts++] = aprefix; 8461 if (aprefix == NULL) 8462 atts[nbatts++] = NULL; 8463 else 8464 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 8465 atts[nbatts++] = defaults->values[4 * i + 2]; 8466 atts[nbatts++] = defaults->values[4 * i + 3]; 8467 nbdef++; 8468 } 8469 } 8470 } 8471 } 8472 8473 /* 8474 * The attributes checkings 8475 */ 8476 for (i = 0; i < nbatts;i += 5) { 8477 /* 8478 * The default namespace does not apply to attribute names. 8479 */ 8480 if (atts[i + 1] != NULL) { 8481 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 8482 if (nsname == NULL) { 8483 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8484 "Namespace prefix %s for %s on %s is not defined\n", 8485 atts[i + 1], atts[i], localname); 8486 } 8487 atts[i + 2] = nsname; 8488 } else 8489 nsname = NULL; 8490 /* 8491 * [ WFC: Unique Att Spec ] 8492 * No attribute name may appear more than once in the same 8493 * start-tag or empty-element tag. 8494 * As extended by the Namespace in XML REC. 8495 */ 8496 for (j = 0; j < i;j += 5) { 8497 if (atts[i] == atts[j]) { 8498 if (atts[i+1] == atts[j+1]) { 8499 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 8500 break; 8501 } 8502 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 8503 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 8504 "Namespaced Attribute %s in '%s' redefined\n", 8505 atts[i], nsname, NULL); 8506 break; 8507 } 8508 } 8509 } 8510 } 8511 8512 nsname = xmlGetNamespace(ctxt, prefix); 8513 if ((prefix != NULL) && (nsname == NULL)) { 8514 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8515 "Namespace prefix %s on %s is not defined\n", 8516 prefix, localname, NULL); 8517 } 8518 *pref = prefix; 8519 *URI = nsname; 8520 8521 /* 8522 * SAX: Start of Element ! 8523 */ 8524 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 8525 (!ctxt->disableSAX)) { 8526 if (nbNs > 0) 8527 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8528 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 8529 nbatts / 5, nbdef, atts); 8530 else 8531 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8532 nsname, 0, NULL, nbatts / 5, nbdef, atts); 8533 } 8534 8535 /* 8536 * Free up attribute allocated strings if needed 8537 */ 8538 if (attval != 0) { 8539 for (i = 3,j = 0; j < nratts;i += 5,j++) 8540 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8541 xmlFree((xmlChar *) atts[i]); 8542 } 8543 8544 return(localname); 8545 8546base_changed: 8547 /* 8548 * the attribute strings are valid iif the base didn't changed 8549 */ 8550 if (attval != 0) { 8551 for (i = 3,j = 0; j < nratts;i += 5,j++) 8552 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8553 xmlFree((xmlChar *) atts[i]); 8554 } 8555 ctxt->input->cur = ctxt->input->base + cur; 8556 ctxt->input->line = oldline; 8557 ctxt->input->col = oldcol; 8558 if (ctxt->wellFormed == 1) { 8559 goto reparse; 8560 } 8561 return(NULL); 8562} 8563 8564/** 8565 * xmlParseEndTag2: 8566 * @ctxt: an XML parser context 8567 * @line: line of the start tag 8568 * @nsNr: number of namespaces on the start tag 8569 * 8570 * parse an end of tag 8571 * 8572 * [42] ETag ::= '</' Name S? '>' 8573 * 8574 * With namespace 8575 * 8576 * [NS 9] ETag ::= '</' QName S? '>' 8577 */ 8578 8579static void 8580xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 8581 const xmlChar *URI, int line, int nsNr, int tlen) { 8582 const xmlChar *name; 8583 8584 GROW; 8585 if ((RAW != '<') || (NXT(1) != '/')) { 8586 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 8587 return; 8588 } 8589 SKIP(2); 8590 8591 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 8592 if (ctxt->input->cur[tlen] == '>') { 8593 ctxt->input->cur += tlen + 1; 8594 goto done; 8595 } 8596 ctxt->input->cur += tlen; 8597 name = (xmlChar*)1; 8598 } else { 8599 if (prefix == NULL) 8600 name = xmlParseNameAndCompare(ctxt, ctxt->name); 8601 else 8602 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 8603 } 8604 8605 /* 8606 * We should definitely be at the ending "S? '>'" part 8607 */ 8608 GROW; 8609 SKIP_BLANKS; 8610 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8611 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8612 } else 8613 NEXT1; 8614 8615 /* 8616 * [ WFC: Element Type Match ] 8617 * The Name in an element's end-tag must match the element type in the 8618 * start-tag. 8619 * 8620 */ 8621 if (name != (xmlChar*)1) { 8622 if (name == NULL) name = BAD_CAST "unparseable"; 8623 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8624 "Opening and ending tag mismatch: %s line %d and %s\n", 8625 ctxt->name, line, name); 8626 } 8627 8628 /* 8629 * SAX: End of Tag 8630 */ 8631done: 8632 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8633 (!ctxt->disableSAX)) 8634 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 8635 8636 spacePop(ctxt); 8637 if (nsNr != 0) 8638 nsPop(ctxt, nsNr); 8639 return; 8640} 8641 8642/** 8643 * xmlParseCDSect: 8644 * @ctxt: an XML parser context 8645 * 8646 * Parse escaped pure raw content. 8647 * 8648 * [18] CDSect ::= CDStart CData CDEnd 8649 * 8650 * [19] CDStart ::= '<![CDATA[' 8651 * 8652 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 8653 * 8654 * [21] CDEnd ::= ']]>' 8655 */ 8656void 8657xmlParseCDSect(xmlParserCtxtPtr ctxt) { 8658 xmlChar *buf = NULL; 8659 int len = 0; 8660 int size = XML_PARSER_BUFFER_SIZE; 8661 int r, rl; 8662 int s, sl; 8663 int cur, l; 8664 int count = 0; 8665 8666 /* Check 2.6.0 was NXT(0) not RAW */ 8667 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8668 SKIP(9); 8669 } else 8670 return; 8671 8672 ctxt->instate = XML_PARSER_CDATA_SECTION; 8673 r = CUR_CHAR(rl); 8674 if (!IS_CHAR(r)) { 8675 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8676 ctxt->instate = XML_PARSER_CONTENT; 8677 return; 8678 } 8679 NEXTL(rl); 8680 s = CUR_CHAR(sl); 8681 if (!IS_CHAR(s)) { 8682 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8683 ctxt->instate = XML_PARSER_CONTENT; 8684 return; 8685 } 8686 NEXTL(sl); 8687 cur = CUR_CHAR(l); 8688 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8689 if (buf == NULL) { 8690 xmlErrMemory(ctxt, NULL); 8691 return; 8692 } 8693 while (IS_CHAR(cur) && 8694 ((r != ']') || (s != ']') || (cur != '>'))) { 8695 if (len + 5 >= size) { 8696 xmlChar *tmp; 8697 8698 size *= 2; 8699 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8700 if (tmp == NULL) { 8701 xmlFree(buf); 8702 xmlErrMemory(ctxt, NULL); 8703 return; 8704 } 8705 buf = tmp; 8706 } 8707 COPY_BUF(rl,buf,len,r); 8708 r = s; 8709 rl = sl; 8710 s = cur; 8711 sl = l; 8712 count++; 8713 if (count > 50) { 8714 GROW; 8715 count = 0; 8716 } 8717 NEXTL(l); 8718 cur = CUR_CHAR(l); 8719 } 8720 buf[len] = 0; 8721 ctxt->instate = XML_PARSER_CONTENT; 8722 if (cur != '>') { 8723 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 8724 "CData section not finished\n%.50s\n", buf); 8725 xmlFree(buf); 8726 return; 8727 } 8728 NEXTL(l); 8729 8730 /* 8731 * OK the buffer is to be consumed as cdata. 8732 */ 8733 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8734 if (ctxt->sax->cdataBlock != NULL) 8735 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 8736 else if (ctxt->sax->characters != NULL) 8737 ctxt->sax->characters(ctxt->userData, buf, len); 8738 } 8739 xmlFree(buf); 8740} 8741 8742/** 8743 * xmlParseContent: 8744 * @ctxt: an XML parser context 8745 * 8746 * Parse a content: 8747 * 8748 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8749 */ 8750 8751void 8752xmlParseContent(xmlParserCtxtPtr ctxt) { 8753 GROW; 8754 while ((RAW != 0) && 8755 ((RAW != '<') || (NXT(1) != '/')) && 8756 (ctxt->instate != XML_PARSER_EOF)) { 8757 const xmlChar *test = CUR_PTR; 8758 unsigned int cons = ctxt->input->consumed; 8759 const xmlChar *cur = ctxt->input->cur; 8760 8761 /* 8762 * First case : a Processing Instruction. 8763 */ 8764 if ((*cur == '<') && (cur[1] == '?')) { 8765 xmlParsePI(ctxt); 8766 } 8767 8768 /* 8769 * Second case : a CDSection 8770 */ 8771 /* 2.6.0 test was *cur not RAW */ 8772 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8773 xmlParseCDSect(ctxt); 8774 } 8775 8776 /* 8777 * Third case : a comment 8778 */ 8779 else if ((*cur == '<') && (NXT(1) == '!') && 8780 (NXT(2) == '-') && (NXT(3) == '-')) { 8781 xmlParseComment(ctxt); 8782 ctxt->instate = XML_PARSER_CONTENT; 8783 } 8784 8785 /* 8786 * Fourth case : a sub-element. 8787 */ 8788 else if (*cur == '<') { 8789 xmlParseElement(ctxt); 8790 } 8791 8792 /* 8793 * Fifth case : a reference. If if has not been resolved, 8794 * parsing returns it's Name, create the node 8795 */ 8796 8797 else if (*cur == '&') { 8798 xmlParseReference(ctxt); 8799 } 8800 8801 /* 8802 * Last case, text. Note that References are handled directly. 8803 */ 8804 else { 8805 xmlParseCharData(ctxt, 0); 8806 } 8807 8808 GROW; 8809 /* 8810 * Pop-up of finished entities. 8811 */ 8812 while ((RAW == 0) && (ctxt->inputNr > 1)) 8813 xmlPopInput(ctxt); 8814 SHRINK; 8815 8816 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8817 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8818 "detected an error in element content\n"); 8819 ctxt->instate = XML_PARSER_EOF; 8820 break; 8821 } 8822 } 8823} 8824 8825/** 8826 * xmlParseElement: 8827 * @ctxt: an XML parser context 8828 * 8829 * parse an XML element, this is highly recursive 8830 * 8831 * [39] element ::= EmptyElemTag | STag content ETag 8832 * 8833 * [ WFC: Element Type Match ] 8834 * The Name in an element's end-tag must match the element type in the 8835 * start-tag. 8836 * 8837 */ 8838 8839void 8840xmlParseElement(xmlParserCtxtPtr ctxt) { 8841 const xmlChar *name; 8842 const xmlChar *prefix; 8843 const xmlChar *URI; 8844 xmlParserNodeInfo node_info; 8845 int line, tlen; 8846 xmlNodePtr ret; 8847 int nsNr = ctxt->nsNr; 8848 8849 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { 8850 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 8851 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 8852 xmlParserMaxDepth); 8853 ctxt->instate = XML_PARSER_EOF; 8854 return; 8855 } 8856 8857 /* Capture start position */ 8858 if (ctxt->record_info) { 8859 node_info.begin_pos = ctxt->input->consumed + 8860 (CUR_PTR - ctxt->input->base); 8861 node_info.begin_line = ctxt->input->line; 8862 } 8863 8864 if (ctxt->spaceNr == 0) 8865 spacePush(ctxt, -1); 8866 else if (*ctxt->space == -2) 8867 spacePush(ctxt, -1); 8868 else 8869 spacePush(ctxt, *ctxt->space); 8870 8871 line = ctxt->input->line; 8872#ifdef LIBXML_SAX1_ENABLED 8873 if (ctxt->sax2) 8874#endif /* LIBXML_SAX1_ENABLED */ 8875 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 8876#ifdef LIBXML_SAX1_ENABLED 8877 else 8878 name = xmlParseStartTag(ctxt); 8879#endif /* LIBXML_SAX1_ENABLED */ 8880 if (name == NULL) { 8881 spacePop(ctxt); 8882 return; 8883 } 8884 namePush(ctxt, name); 8885 ret = ctxt->node; 8886 8887#ifdef LIBXML_VALID_ENABLED 8888 /* 8889 * [ VC: Root Element Type ] 8890 * The Name in the document type declaration must match the element 8891 * type of the root element. 8892 */ 8893 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8894 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8895 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8896#endif /* LIBXML_VALID_ENABLED */ 8897 8898 /* 8899 * Check for an Empty Element. 8900 */ 8901 if ((RAW == '/') && (NXT(1) == '>')) { 8902 SKIP(2); 8903 if (ctxt->sax2) { 8904 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8905 (!ctxt->disableSAX)) 8906 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8907#ifdef LIBXML_SAX1_ENABLED 8908 } else { 8909 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8910 (!ctxt->disableSAX)) 8911 ctxt->sax->endElement(ctxt->userData, name); 8912#endif /* LIBXML_SAX1_ENABLED */ 8913 } 8914 namePop(ctxt); 8915 spacePop(ctxt); 8916 if (nsNr != ctxt->nsNr) 8917 nsPop(ctxt, ctxt->nsNr - nsNr); 8918 if ( ret != NULL && ctxt->record_info ) { 8919 node_info.end_pos = ctxt->input->consumed + 8920 (CUR_PTR - ctxt->input->base); 8921 node_info.end_line = ctxt->input->line; 8922 node_info.node = ret; 8923 xmlParserAddNodeInfo(ctxt, &node_info); 8924 } 8925 return; 8926 } 8927 if (RAW == '>') { 8928 NEXT1; 8929 } else { 8930 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 8931 "Couldn't find end of Start Tag %s line %d\n", 8932 name, line, NULL); 8933 8934 /* 8935 * end of parsing of this node. 8936 */ 8937 nodePop(ctxt); 8938 namePop(ctxt); 8939 spacePop(ctxt); 8940 if (nsNr != ctxt->nsNr) 8941 nsPop(ctxt, ctxt->nsNr - nsNr); 8942 8943 /* 8944 * Capture end position and add node 8945 */ 8946 if ( ret != NULL && ctxt->record_info ) { 8947 node_info.end_pos = ctxt->input->consumed + 8948 (CUR_PTR - ctxt->input->base); 8949 node_info.end_line = ctxt->input->line; 8950 node_info.node = ret; 8951 xmlParserAddNodeInfo(ctxt, &node_info); 8952 } 8953 return; 8954 } 8955 8956 /* 8957 * Parse the content of the element: 8958 */ 8959 xmlParseContent(ctxt); 8960 if (!IS_BYTE_CHAR(RAW)) { 8961 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 8962 "Premature end of data in tag %s line %d\n", 8963 name, line, NULL); 8964 8965 /* 8966 * end of parsing of this node. 8967 */ 8968 nodePop(ctxt); 8969 namePop(ctxt); 8970 spacePop(ctxt); 8971 if (nsNr != ctxt->nsNr) 8972 nsPop(ctxt, ctxt->nsNr - nsNr); 8973 return; 8974 } 8975 8976 /* 8977 * parse the end of tag: '</' should be here. 8978 */ 8979 if (ctxt->sax2) { 8980 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 8981 namePop(ctxt); 8982 } 8983#ifdef LIBXML_SAX1_ENABLED 8984 else 8985 xmlParseEndTag1(ctxt, line); 8986#endif /* LIBXML_SAX1_ENABLED */ 8987 8988 /* 8989 * Capture end position and add node 8990 */ 8991 if ( ret != NULL && ctxt->record_info ) { 8992 node_info.end_pos = ctxt->input->consumed + 8993 (CUR_PTR - ctxt->input->base); 8994 node_info.end_line = ctxt->input->line; 8995 node_info.node = ret; 8996 xmlParserAddNodeInfo(ctxt, &node_info); 8997 } 8998} 8999 9000/** 9001 * xmlParseVersionNum: 9002 * @ctxt: an XML parser context 9003 * 9004 * parse the XML version value. 9005 * 9006 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 9007 * 9008 * Returns the string giving the XML version number, or NULL 9009 */ 9010xmlChar * 9011xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9012 xmlChar *buf = NULL; 9013 int len = 0; 9014 int size = 10; 9015 xmlChar cur; 9016 9017 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9018 if (buf == NULL) { 9019 xmlErrMemory(ctxt, NULL); 9020 return(NULL); 9021 } 9022 cur = CUR; 9023 while (((cur >= 'a') && (cur <= 'z')) || 9024 ((cur >= 'A') && (cur <= 'Z')) || 9025 ((cur >= '0') && (cur <= '9')) || 9026 (cur == '_') || (cur == '.') || 9027 (cur == ':') || (cur == '-')) { 9028 if (len + 1 >= size) { 9029 xmlChar *tmp; 9030 9031 size *= 2; 9032 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9033 if (tmp == NULL) { 9034 xmlFree(buf); 9035 xmlErrMemory(ctxt, NULL); 9036 return(NULL); 9037 } 9038 buf = tmp; 9039 } 9040 buf[len++] = cur; 9041 NEXT; 9042 cur=CUR; 9043 } 9044 buf[len] = 0; 9045 return(buf); 9046} 9047 9048/** 9049 * xmlParseVersionInfo: 9050 * @ctxt: an XML parser context 9051 * 9052 * parse the XML version. 9053 * 9054 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9055 * 9056 * [25] Eq ::= S? '=' S? 9057 * 9058 * Returns the version string, e.g. "1.0" 9059 */ 9060 9061xmlChar * 9062xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9063 xmlChar *version = NULL; 9064 9065 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9066 SKIP(7); 9067 SKIP_BLANKS; 9068 if (RAW != '=') { 9069 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9070 return(NULL); 9071 } 9072 NEXT; 9073 SKIP_BLANKS; 9074 if (RAW == '"') { 9075 NEXT; 9076 version = xmlParseVersionNum(ctxt); 9077 if (RAW != '"') { 9078 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9079 } else 9080 NEXT; 9081 } else if (RAW == '\''){ 9082 NEXT; 9083 version = xmlParseVersionNum(ctxt); 9084 if (RAW != '\'') { 9085 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9086 } else 9087 NEXT; 9088 } else { 9089 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9090 } 9091 } 9092 return(version); 9093} 9094 9095/** 9096 * xmlParseEncName: 9097 * @ctxt: an XML parser context 9098 * 9099 * parse the XML encoding name 9100 * 9101 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9102 * 9103 * Returns the encoding name value or NULL 9104 */ 9105xmlChar * 9106xmlParseEncName(xmlParserCtxtPtr ctxt) { 9107 xmlChar *buf = NULL; 9108 int len = 0; 9109 int size = 10; 9110 xmlChar cur; 9111 9112 cur = CUR; 9113 if (((cur >= 'a') && (cur <= 'z')) || 9114 ((cur >= 'A') && (cur <= 'Z'))) { 9115 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9116 if (buf == NULL) { 9117 xmlErrMemory(ctxt, NULL); 9118 return(NULL); 9119 } 9120 9121 buf[len++] = cur; 9122 NEXT; 9123 cur = CUR; 9124 while (((cur >= 'a') && (cur <= 'z')) || 9125 ((cur >= 'A') && (cur <= 'Z')) || 9126 ((cur >= '0') && (cur <= '9')) || 9127 (cur == '.') || (cur == '_') || 9128 (cur == '-')) { 9129 if (len + 1 >= size) { 9130 xmlChar *tmp; 9131 9132 size *= 2; 9133 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9134 if (tmp == NULL) { 9135 xmlErrMemory(ctxt, NULL); 9136 xmlFree(buf); 9137 return(NULL); 9138 } 9139 buf = tmp; 9140 } 9141 buf[len++] = cur; 9142 NEXT; 9143 cur = CUR; 9144 if (cur == 0) { 9145 SHRINK; 9146 GROW; 9147 cur = CUR; 9148 } 9149 } 9150 buf[len] = 0; 9151 } else { 9152 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9153 } 9154 return(buf); 9155} 9156 9157/** 9158 * xmlParseEncodingDecl: 9159 * @ctxt: an XML parser context 9160 * 9161 * parse the XML encoding declaration 9162 * 9163 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9164 * 9165 * this setups the conversion filters. 9166 * 9167 * Returns the encoding value or NULL 9168 */ 9169 9170const xmlChar * 9171xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9172 xmlChar *encoding = NULL; 9173 9174 SKIP_BLANKS; 9175 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9176 SKIP(8); 9177 SKIP_BLANKS; 9178 if (RAW != '=') { 9179 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9180 return(NULL); 9181 } 9182 NEXT; 9183 SKIP_BLANKS; 9184 if (RAW == '"') { 9185 NEXT; 9186 encoding = xmlParseEncName(ctxt); 9187 if (RAW != '"') { 9188 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9189 } else 9190 NEXT; 9191 } else if (RAW == '\''){ 9192 NEXT; 9193 encoding = xmlParseEncName(ctxt); 9194 if (RAW != '\'') { 9195 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9196 } else 9197 NEXT; 9198 } else { 9199 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9200 } 9201 /* 9202 * UTF-16 encoding stwich has already taken place at this stage, 9203 * more over the little-endian/big-endian selection is already done 9204 */ 9205 if ((encoding != NULL) && 9206 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9207 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9208 if (ctxt->encoding != NULL) 9209 xmlFree((xmlChar *) ctxt->encoding); 9210 ctxt->encoding = encoding; 9211 } 9212 /* 9213 * UTF-8 encoding is handled natively 9214 */ 9215 else if ((encoding != NULL) && 9216 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9217 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9218 if (ctxt->encoding != NULL) 9219 xmlFree((xmlChar *) ctxt->encoding); 9220 ctxt->encoding = encoding; 9221 } 9222 else if (encoding != NULL) { 9223 xmlCharEncodingHandlerPtr handler; 9224 9225 if (ctxt->input->encoding != NULL) 9226 xmlFree((xmlChar *) ctxt->input->encoding); 9227 ctxt->input->encoding = encoding; 9228 9229 handler = xmlFindCharEncodingHandler((const char *) encoding); 9230 if (handler != NULL) { 9231 xmlSwitchToEncoding(ctxt, handler); 9232 } else { 9233 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9234 "Unsupported encoding %s\n", encoding); 9235 return(NULL); 9236 } 9237 } 9238 } 9239 return(encoding); 9240} 9241 9242/** 9243 * xmlParseSDDecl: 9244 * @ctxt: an XML parser context 9245 * 9246 * parse the XML standalone declaration 9247 * 9248 * [32] SDDecl ::= S 'standalone' Eq 9249 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 9250 * 9251 * [ VC: Standalone Document Declaration ] 9252 * TODO The standalone document declaration must have the value "no" 9253 * if any external markup declarations contain declarations of: 9254 * - attributes with default values, if elements to which these 9255 * attributes apply appear in the document without specifications 9256 * of values for these attributes, or 9257 * - entities (other than amp, lt, gt, apos, quot), if references 9258 * to those entities appear in the document, or 9259 * - attributes with values subject to normalization, where the 9260 * attribute appears in the document with a value which will change 9261 * as a result of normalization, or 9262 * - element types with element content, if white space occurs directly 9263 * within any instance of those types. 9264 * 9265 * Returns: 9266 * 1 if standalone="yes" 9267 * 0 if standalone="no" 9268 * -2 if standalone attribute is missing or invalid 9269 * (A standalone value of -2 means that the XML declaration was found, 9270 * but no value was specified for the standalone attribute). 9271 */ 9272 9273int 9274xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 9275 int standalone = -2; 9276 9277 SKIP_BLANKS; 9278 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 9279 SKIP(10); 9280 SKIP_BLANKS; 9281 if (RAW != '=') { 9282 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9283 return(standalone); 9284 } 9285 NEXT; 9286 SKIP_BLANKS; 9287 if (RAW == '\''){ 9288 NEXT; 9289 if ((RAW == 'n') && (NXT(1) == 'o')) { 9290 standalone = 0; 9291 SKIP(2); 9292 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9293 (NXT(2) == 's')) { 9294 standalone = 1; 9295 SKIP(3); 9296 } else { 9297 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9298 } 9299 if (RAW != '\'') { 9300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9301 } else 9302 NEXT; 9303 } else if (RAW == '"'){ 9304 NEXT; 9305 if ((RAW == 'n') && (NXT(1) == 'o')) { 9306 standalone = 0; 9307 SKIP(2); 9308 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9309 (NXT(2) == 's')) { 9310 standalone = 1; 9311 SKIP(3); 9312 } else { 9313 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9314 } 9315 if (RAW != '"') { 9316 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9317 } else 9318 NEXT; 9319 } else { 9320 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9321 } 9322 } 9323 return(standalone); 9324} 9325 9326/** 9327 * xmlParseXMLDecl: 9328 * @ctxt: an XML parser context 9329 * 9330 * parse an XML declaration header 9331 * 9332 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 9333 */ 9334 9335void 9336xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 9337 xmlChar *version; 9338 9339 /* 9340 * This value for standalone indicates that the document has an 9341 * XML declaration but it does not have a standalone attribute. 9342 * It will be overwritten later if a standalone attribute is found. 9343 */ 9344 ctxt->input->standalone = -2; 9345 9346 /* 9347 * We know that '<?xml' is here. 9348 */ 9349 SKIP(5); 9350 9351 if (!IS_BLANK_CH(RAW)) { 9352 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9353 "Blank needed after '<?xml'\n"); 9354 } 9355 SKIP_BLANKS; 9356 9357 /* 9358 * We must have the VersionInfo here. 9359 */ 9360 version = xmlParseVersionInfo(ctxt); 9361 if (version == NULL) { 9362 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 9363 } else { 9364 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 9365 /* 9366 * TODO: Blueberry should be detected here 9367 */ 9368 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 9369 "Unsupported version '%s'\n", 9370 version, NULL); 9371 } 9372 if (ctxt->version != NULL) 9373 xmlFree((void *) ctxt->version); 9374 ctxt->version = version; 9375 } 9376 9377 /* 9378 * We may have the encoding declaration 9379 */ 9380 if (!IS_BLANK_CH(RAW)) { 9381 if ((RAW == '?') && (NXT(1) == '>')) { 9382 SKIP(2); 9383 return; 9384 } 9385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9386 } 9387 xmlParseEncodingDecl(ctxt); 9388 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9389 /* 9390 * The XML REC instructs us to stop parsing right here 9391 */ 9392 return; 9393 } 9394 9395 /* 9396 * We may have the standalone status. 9397 */ 9398 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 9399 if ((RAW == '?') && (NXT(1) == '>')) { 9400 SKIP(2); 9401 return; 9402 } 9403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9404 } 9405 SKIP_BLANKS; 9406 ctxt->input->standalone = xmlParseSDDecl(ctxt); 9407 9408 SKIP_BLANKS; 9409 if ((RAW == '?') && (NXT(1) == '>')) { 9410 SKIP(2); 9411 } else if (RAW == '>') { 9412 /* Deprecated old WD ... */ 9413 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9414 NEXT; 9415 } else { 9416 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9417 MOVETO_ENDTAG(CUR_PTR); 9418 NEXT; 9419 } 9420} 9421 9422/** 9423 * xmlParseMisc: 9424 * @ctxt: an XML parser context 9425 * 9426 * parse an XML Misc* optional field. 9427 * 9428 * [27] Misc ::= Comment | PI | S 9429 */ 9430 9431void 9432xmlParseMisc(xmlParserCtxtPtr ctxt) { 9433 while (((RAW == '<') && (NXT(1) == '?')) || 9434 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 9435 IS_BLANK_CH(CUR)) { 9436 if ((RAW == '<') && (NXT(1) == '?')) { 9437 xmlParsePI(ctxt); 9438 } else if (IS_BLANK_CH(CUR)) { 9439 NEXT; 9440 } else 9441 xmlParseComment(ctxt); 9442 } 9443} 9444 9445/** 9446 * xmlParseDocument: 9447 * @ctxt: an XML parser context 9448 * 9449 * parse an XML document (and build a tree if using the standard SAX 9450 * interface). 9451 * 9452 * [1] document ::= prolog element Misc* 9453 * 9454 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 9455 * 9456 * Returns 0, -1 in case of error. the parser context is augmented 9457 * as a result of the parsing. 9458 */ 9459 9460int 9461xmlParseDocument(xmlParserCtxtPtr ctxt) { 9462 xmlChar start[4]; 9463 xmlCharEncoding enc; 9464 9465 xmlInitParser(); 9466 9467 if ((ctxt == NULL) || (ctxt->input == NULL)) 9468 return(-1); 9469 9470 GROW; 9471 9472 /* 9473 * SAX: detecting the level. 9474 */ 9475 xmlDetectSAX2(ctxt); 9476 9477 /* 9478 * SAX: beginning of the document processing. 9479 */ 9480 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9481 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9482 9483 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 9484 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 9485 /* 9486 * Get the 4 first bytes and decode the charset 9487 * if enc != XML_CHAR_ENCODING_NONE 9488 * plug some encoding conversion routines. 9489 */ 9490 start[0] = RAW; 9491 start[1] = NXT(1); 9492 start[2] = NXT(2); 9493 start[3] = NXT(3); 9494 enc = xmlDetectCharEncoding(&start[0], 4); 9495 if (enc != XML_CHAR_ENCODING_NONE) { 9496 xmlSwitchEncoding(ctxt, enc); 9497 } 9498 } 9499 9500 9501 if (CUR == 0) { 9502 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9503 } 9504 9505 /* 9506 * Check for the XMLDecl in the Prolog. 9507 */ 9508 GROW; 9509 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9510 9511 /* 9512 * Note that we will switch encoding on the fly. 9513 */ 9514 xmlParseXMLDecl(ctxt); 9515 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9516 /* 9517 * The XML REC instructs us to stop parsing right here 9518 */ 9519 return(-1); 9520 } 9521 ctxt->standalone = ctxt->input->standalone; 9522 SKIP_BLANKS; 9523 } else { 9524 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9525 } 9526 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9527 ctxt->sax->startDocument(ctxt->userData); 9528 9529 /* 9530 * The Misc part of the Prolog 9531 */ 9532 GROW; 9533 xmlParseMisc(ctxt); 9534 9535 /* 9536 * Then possibly doc type declaration(s) and more Misc 9537 * (doctypedecl Misc*)? 9538 */ 9539 GROW; 9540 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 9541 9542 ctxt->inSubset = 1; 9543 xmlParseDocTypeDecl(ctxt); 9544 if (RAW == '[') { 9545 ctxt->instate = XML_PARSER_DTD; 9546 xmlParseInternalSubset(ctxt); 9547 } 9548 9549 /* 9550 * Create and update the external subset. 9551 */ 9552 ctxt->inSubset = 2; 9553 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 9554 (!ctxt->disableSAX)) 9555 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9556 ctxt->extSubSystem, ctxt->extSubURI); 9557 ctxt->inSubset = 0; 9558 9559 xmlCleanSpecialAttr(ctxt); 9560 9561 ctxt->instate = XML_PARSER_PROLOG; 9562 xmlParseMisc(ctxt); 9563 } 9564 9565 /* 9566 * Time to start parsing the tree itself 9567 */ 9568 GROW; 9569 if (RAW != '<') { 9570 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 9571 "Start tag expected, '<' not found\n"); 9572 } else { 9573 ctxt->instate = XML_PARSER_CONTENT; 9574 xmlParseElement(ctxt); 9575 ctxt->instate = XML_PARSER_EPILOG; 9576 9577 9578 /* 9579 * The Misc part at the end 9580 */ 9581 xmlParseMisc(ctxt); 9582 9583 if (RAW != 0) { 9584 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9585 } 9586 ctxt->instate = XML_PARSER_EOF; 9587 } 9588 9589 /* 9590 * SAX: end of the document processing. 9591 */ 9592 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9593 ctxt->sax->endDocument(ctxt->userData); 9594 9595 /* 9596 * Remove locally kept entity definitions if the tree was not built 9597 */ 9598 if ((ctxt->myDoc != NULL) && 9599 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 9600 xmlFreeDoc(ctxt->myDoc); 9601 ctxt->myDoc = NULL; 9602 } 9603 9604 if (! ctxt->wellFormed) { 9605 ctxt->valid = 0; 9606 return(-1); 9607 } 9608 return(0); 9609} 9610 9611/** 9612 * xmlParseExtParsedEnt: 9613 * @ctxt: an XML parser context 9614 * 9615 * parse a general parsed entity 9616 * An external general parsed entity is well-formed if it matches the 9617 * production labeled extParsedEnt. 9618 * 9619 * [78] extParsedEnt ::= TextDecl? content 9620 * 9621 * Returns 0, -1 in case of error. the parser context is augmented 9622 * as a result of the parsing. 9623 */ 9624 9625int 9626xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 9627 xmlChar start[4]; 9628 xmlCharEncoding enc; 9629 9630 if ((ctxt == NULL) || (ctxt->input == NULL)) 9631 return(-1); 9632 9633 xmlDefaultSAXHandlerInit(); 9634 9635 xmlDetectSAX2(ctxt); 9636 9637 GROW; 9638 9639 /* 9640 * SAX: beginning of the document processing. 9641 */ 9642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9643 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9644 9645 /* 9646 * Get the 4 first bytes and decode the charset 9647 * if enc != XML_CHAR_ENCODING_NONE 9648 * plug some encoding conversion routines. 9649 */ 9650 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 9651 start[0] = RAW; 9652 start[1] = NXT(1); 9653 start[2] = NXT(2); 9654 start[3] = NXT(3); 9655 enc = xmlDetectCharEncoding(start, 4); 9656 if (enc != XML_CHAR_ENCODING_NONE) { 9657 xmlSwitchEncoding(ctxt, enc); 9658 } 9659 } 9660 9661 9662 if (CUR == 0) { 9663 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9664 } 9665 9666 /* 9667 * Check for the XMLDecl in the Prolog. 9668 */ 9669 GROW; 9670 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9671 9672 /* 9673 * Note that we will switch encoding on the fly. 9674 */ 9675 xmlParseXMLDecl(ctxt); 9676 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9677 /* 9678 * The XML REC instructs us to stop parsing right here 9679 */ 9680 return(-1); 9681 } 9682 SKIP_BLANKS; 9683 } else { 9684 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9685 } 9686 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9687 ctxt->sax->startDocument(ctxt->userData); 9688 9689 /* 9690 * Doing validity checking on chunk doesn't make sense 9691 */ 9692 ctxt->instate = XML_PARSER_CONTENT; 9693 ctxt->validate = 0; 9694 ctxt->loadsubset = 0; 9695 ctxt->depth = 0; 9696 9697 xmlParseContent(ctxt); 9698 9699 if ((RAW == '<') && (NXT(1) == '/')) { 9700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 9701 } else if (RAW != 0) { 9702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 9703 } 9704 9705 /* 9706 * SAX: end of the document processing. 9707 */ 9708 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9709 ctxt->sax->endDocument(ctxt->userData); 9710 9711 if (! ctxt->wellFormed) return(-1); 9712 return(0); 9713} 9714 9715#ifdef LIBXML_PUSH_ENABLED 9716/************************************************************************ 9717 * * 9718 * Progressive parsing interfaces * 9719 * * 9720 ************************************************************************/ 9721 9722/** 9723 * xmlParseLookupSequence: 9724 * @ctxt: an XML parser context 9725 * @first: the first char to lookup 9726 * @next: the next char to lookup or zero 9727 * @third: the next char to lookup or zero 9728 * 9729 * Try to find if a sequence (first, next, third) or just (first next) or 9730 * (first) is available in the input stream. 9731 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 9732 * to avoid rescanning sequences of bytes, it DOES change the state of the 9733 * parser, do not use liberally. 9734 * 9735 * Returns the index to the current parsing point if the full sequence 9736 * is available, -1 otherwise. 9737 */ 9738static int 9739xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 9740 xmlChar next, xmlChar third) { 9741 int base, len; 9742 xmlParserInputPtr in; 9743 const xmlChar *buf; 9744 9745 in = ctxt->input; 9746 if (in == NULL) return(-1); 9747 base = in->cur - in->base; 9748 if (base < 0) return(-1); 9749 if (ctxt->checkIndex > base) 9750 base = ctxt->checkIndex; 9751 if (in->buf == NULL) { 9752 buf = in->base; 9753 len = in->length; 9754 } else { 9755 buf = in->buf->buffer->content; 9756 len = in->buf->buffer->use; 9757 } 9758 /* take into account the sequence length */ 9759 if (third) len -= 2; 9760 else if (next) len --; 9761 for (;base < len;base++) { 9762 if (buf[base] == first) { 9763 if (third != 0) { 9764 if ((buf[base + 1] != next) || 9765 (buf[base + 2] != third)) continue; 9766 } else if (next != 0) { 9767 if (buf[base + 1] != next) continue; 9768 } 9769 ctxt->checkIndex = 0; 9770#ifdef DEBUG_PUSH 9771 if (next == 0) 9772 xmlGenericError(xmlGenericErrorContext, 9773 "PP: lookup '%c' found at %d\n", 9774 first, base); 9775 else if (third == 0) 9776 xmlGenericError(xmlGenericErrorContext, 9777 "PP: lookup '%c%c' found at %d\n", 9778 first, next, base); 9779 else 9780 xmlGenericError(xmlGenericErrorContext, 9781 "PP: lookup '%c%c%c' found at %d\n", 9782 first, next, third, base); 9783#endif 9784 return(base - (in->cur - in->base)); 9785 } 9786 } 9787 ctxt->checkIndex = base; 9788#ifdef DEBUG_PUSH 9789 if (next == 0) 9790 xmlGenericError(xmlGenericErrorContext, 9791 "PP: lookup '%c' failed\n", first); 9792 else if (third == 0) 9793 xmlGenericError(xmlGenericErrorContext, 9794 "PP: lookup '%c%c' failed\n", first, next); 9795 else 9796 xmlGenericError(xmlGenericErrorContext, 9797 "PP: lookup '%c%c%c' failed\n", first, next, third); 9798#endif 9799 return(-1); 9800} 9801 9802/** 9803 * xmlParseGetLasts: 9804 * @ctxt: an XML parser context 9805 * @lastlt: pointer to store the last '<' from the input 9806 * @lastgt: pointer to store the last '>' from the input 9807 * 9808 * Lookup the last < and > in the current chunk 9809 */ 9810static void 9811xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 9812 const xmlChar **lastgt) { 9813 const xmlChar *tmp; 9814 9815 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 9816 xmlGenericError(xmlGenericErrorContext, 9817 "Internal error: xmlParseGetLasts\n"); 9818 return; 9819 } 9820 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 9821 tmp = ctxt->input->end; 9822 tmp--; 9823 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 9824 if (tmp < ctxt->input->base) { 9825 *lastlt = NULL; 9826 *lastgt = NULL; 9827 } else { 9828 *lastlt = tmp; 9829 tmp++; 9830 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 9831 if (*tmp == '\'') { 9832 tmp++; 9833 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 9834 if (tmp < ctxt->input->end) tmp++; 9835 } else if (*tmp == '"') { 9836 tmp++; 9837 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 9838 if (tmp < ctxt->input->end) tmp++; 9839 } else 9840 tmp++; 9841 } 9842 if (tmp < ctxt->input->end) 9843 *lastgt = tmp; 9844 else { 9845 tmp = *lastlt; 9846 tmp--; 9847 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 9848 if (tmp >= ctxt->input->base) 9849 *lastgt = tmp; 9850 else 9851 *lastgt = NULL; 9852 } 9853 } 9854 } else { 9855 *lastlt = NULL; 9856 *lastgt = NULL; 9857 } 9858} 9859/** 9860 * xmlCheckCdataPush: 9861 * @cur: pointer to the bock of characters 9862 * @len: length of the block in bytes 9863 * 9864 * Check that the block of characters is okay as SCdata content [20] 9865 * 9866 * Returns the number of bytes to pass if okay, a negative index where an 9867 * UTF-8 error occured otherwise 9868 */ 9869static int 9870xmlCheckCdataPush(const xmlChar *utf, int len) { 9871 int ix; 9872 unsigned char c; 9873 int codepoint; 9874 9875 if ((utf == NULL) || (len <= 0)) 9876 return(0); 9877 9878 for (ix = 0; ix < len;) { /* string is 0-terminated */ 9879 c = utf[ix]; 9880 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 9881 if (c >= 0x20) 9882 ix++; 9883 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 9884 ix++; 9885 else 9886 return(-ix); 9887 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 9888 if (ix + 2 > len) return(ix); 9889 if ((utf[ix+1] & 0xc0 ) != 0x80) 9890 return(-ix); 9891 codepoint = (utf[ix] & 0x1f) << 6; 9892 codepoint |= utf[ix+1] & 0x3f; 9893 if (!xmlIsCharQ(codepoint)) 9894 return(-ix); 9895 ix += 2; 9896 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 9897 if (ix + 3 > len) return(ix); 9898 if (((utf[ix+1] & 0xc0) != 0x80) || 9899 ((utf[ix+2] & 0xc0) != 0x80)) 9900 return(-ix); 9901 codepoint = (utf[ix] & 0xf) << 12; 9902 codepoint |= (utf[ix+1] & 0x3f) << 6; 9903 codepoint |= utf[ix+2] & 0x3f; 9904 if (!xmlIsCharQ(codepoint)) 9905 return(-ix); 9906 ix += 3; 9907 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 9908 if (ix + 4 > len) return(ix); 9909 if (((utf[ix+1] & 0xc0) != 0x80) || 9910 ((utf[ix+2] & 0xc0) != 0x80) || 9911 ((utf[ix+3] & 0xc0) != 0x80)) 9912 return(-ix); 9913 codepoint = (utf[ix] & 0x7) << 18; 9914 codepoint |= (utf[ix+1] & 0x3f) << 12; 9915 codepoint |= (utf[ix+2] & 0x3f) << 6; 9916 codepoint |= utf[ix+3] & 0x3f; 9917 if (!xmlIsCharQ(codepoint)) 9918 return(-ix); 9919 ix += 4; 9920 } else /* unknown encoding */ 9921 return(-ix); 9922 } 9923 return(ix); 9924} 9925 9926/** 9927 * xmlParseTryOrFinish: 9928 * @ctxt: an XML parser context 9929 * @terminate: last chunk indicator 9930 * 9931 * Try to progress on parsing 9932 * 9933 * Returns zero if no parsing was possible 9934 */ 9935static int 9936xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 9937 int ret = 0; 9938 int avail, tlen; 9939 xmlChar cur, next; 9940 const xmlChar *lastlt, *lastgt; 9941 9942 if (ctxt->input == NULL) 9943 return(0); 9944 9945#ifdef DEBUG_PUSH 9946 switch (ctxt->instate) { 9947 case XML_PARSER_EOF: 9948 xmlGenericError(xmlGenericErrorContext, 9949 "PP: try EOF\n"); break; 9950 case XML_PARSER_START: 9951 xmlGenericError(xmlGenericErrorContext, 9952 "PP: try START\n"); break; 9953 case XML_PARSER_MISC: 9954 xmlGenericError(xmlGenericErrorContext, 9955 "PP: try MISC\n");break; 9956 case XML_PARSER_COMMENT: 9957 xmlGenericError(xmlGenericErrorContext, 9958 "PP: try COMMENT\n");break; 9959 case XML_PARSER_PROLOG: 9960 xmlGenericError(xmlGenericErrorContext, 9961 "PP: try PROLOG\n");break; 9962 case XML_PARSER_START_TAG: 9963 xmlGenericError(xmlGenericErrorContext, 9964 "PP: try START_TAG\n");break; 9965 case XML_PARSER_CONTENT: 9966 xmlGenericError(xmlGenericErrorContext, 9967 "PP: try CONTENT\n");break; 9968 case XML_PARSER_CDATA_SECTION: 9969 xmlGenericError(xmlGenericErrorContext, 9970 "PP: try CDATA_SECTION\n");break; 9971 case XML_PARSER_END_TAG: 9972 xmlGenericError(xmlGenericErrorContext, 9973 "PP: try END_TAG\n");break; 9974 case XML_PARSER_ENTITY_DECL: 9975 xmlGenericError(xmlGenericErrorContext, 9976 "PP: try ENTITY_DECL\n");break; 9977 case XML_PARSER_ENTITY_VALUE: 9978 xmlGenericError(xmlGenericErrorContext, 9979 "PP: try ENTITY_VALUE\n");break; 9980 case XML_PARSER_ATTRIBUTE_VALUE: 9981 xmlGenericError(xmlGenericErrorContext, 9982 "PP: try ATTRIBUTE_VALUE\n");break; 9983 case XML_PARSER_DTD: 9984 xmlGenericError(xmlGenericErrorContext, 9985 "PP: try DTD\n");break; 9986 case XML_PARSER_EPILOG: 9987 xmlGenericError(xmlGenericErrorContext, 9988 "PP: try EPILOG\n");break; 9989 case XML_PARSER_PI: 9990 xmlGenericError(xmlGenericErrorContext, 9991 "PP: try PI\n");break; 9992 case XML_PARSER_IGNORE: 9993 xmlGenericError(xmlGenericErrorContext, 9994 "PP: try IGNORE\n");break; 9995 } 9996#endif 9997 9998 if ((ctxt->input != NULL) && 9999 (ctxt->input->cur - ctxt->input->base > 4096)) { 10000 xmlSHRINK(ctxt); 10001 ctxt->checkIndex = 0; 10002 } 10003 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10004 10005 while (1) { 10006 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10007 return(0); 10008 10009 10010 /* 10011 * Pop-up of finished entities. 10012 */ 10013 while ((RAW == 0) && (ctxt->inputNr > 1)) 10014 xmlPopInput(ctxt); 10015 10016 if (ctxt->input == NULL) break; 10017 if (ctxt->input->buf == NULL) 10018 avail = ctxt->input->length - 10019 (ctxt->input->cur - ctxt->input->base); 10020 else { 10021 /* 10022 * If we are operating on converted input, try to flush 10023 * remainng chars to avoid them stalling in the non-converted 10024 * buffer. 10025 */ 10026 if ((ctxt->input->buf->raw != NULL) && 10027 (ctxt->input->buf->raw->use > 0)) { 10028 int base = ctxt->input->base - 10029 ctxt->input->buf->buffer->content; 10030 int current = ctxt->input->cur - ctxt->input->base; 10031 10032 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10033 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10034 ctxt->input->cur = ctxt->input->base + current; 10035 ctxt->input->end = 10036 &ctxt->input->buf->buffer->content[ 10037 ctxt->input->buf->buffer->use]; 10038 } 10039 avail = ctxt->input->buf->buffer->use - 10040 (ctxt->input->cur - ctxt->input->base); 10041 } 10042 if (avail < 1) 10043 goto done; 10044 switch (ctxt->instate) { 10045 case XML_PARSER_EOF: 10046 /* 10047 * Document parsing is done ! 10048 */ 10049 goto done; 10050 case XML_PARSER_START: 10051 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10052 xmlChar start[4]; 10053 xmlCharEncoding enc; 10054 10055 /* 10056 * Very first chars read from the document flow. 10057 */ 10058 if (avail < 4) 10059 goto done; 10060 10061 /* 10062 * Get the 4 first bytes and decode the charset 10063 * if enc != XML_CHAR_ENCODING_NONE 10064 * plug some encoding conversion routines, 10065 * else xmlSwitchEncoding will set to (default) 10066 * UTF8. 10067 */ 10068 start[0] = RAW; 10069 start[1] = NXT(1); 10070 start[2] = NXT(2); 10071 start[3] = NXT(3); 10072 enc = xmlDetectCharEncoding(start, 4); 10073 xmlSwitchEncoding(ctxt, enc); 10074 break; 10075 } 10076 10077 if (avail < 2) 10078 goto done; 10079 cur = ctxt->input->cur[0]; 10080 next = ctxt->input->cur[1]; 10081 if (cur == 0) { 10082 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10083 ctxt->sax->setDocumentLocator(ctxt->userData, 10084 &xmlDefaultSAXLocator); 10085 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10086 ctxt->instate = XML_PARSER_EOF; 10087#ifdef DEBUG_PUSH 10088 xmlGenericError(xmlGenericErrorContext, 10089 "PP: entering EOF\n"); 10090#endif 10091 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10092 ctxt->sax->endDocument(ctxt->userData); 10093 goto done; 10094 } 10095 if ((cur == '<') && (next == '?')) { 10096 /* PI or XML decl */ 10097 if (avail < 5) return(ret); 10098 if ((!terminate) && 10099 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10100 return(ret); 10101 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10102 ctxt->sax->setDocumentLocator(ctxt->userData, 10103 &xmlDefaultSAXLocator); 10104 if ((ctxt->input->cur[2] == 'x') && 10105 (ctxt->input->cur[3] == 'm') && 10106 (ctxt->input->cur[4] == 'l') && 10107 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10108 ret += 5; 10109#ifdef DEBUG_PUSH 10110 xmlGenericError(xmlGenericErrorContext, 10111 "PP: Parsing XML Decl\n"); 10112#endif 10113 xmlParseXMLDecl(ctxt); 10114 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10115 /* 10116 * The XML REC instructs us to stop parsing right 10117 * here 10118 */ 10119 ctxt->instate = XML_PARSER_EOF; 10120 return(0); 10121 } 10122 ctxt->standalone = ctxt->input->standalone; 10123 if ((ctxt->encoding == NULL) && 10124 (ctxt->input->encoding != NULL)) 10125 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10126 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10127 (!ctxt->disableSAX)) 10128 ctxt->sax->startDocument(ctxt->userData); 10129 ctxt->instate = XML_PARSER_MISC; 10130#ifdef DEBUG_PUSH 10131 xmlGenericError(xmlGenericErrorContext, 10132 "PP: entering MISC\n"); 10133#endif 10134 } else { 10135 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10136 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10137 (!ctxt->disableSAX)) 10138 ctxt->sax->startDocument(ctxt->userData); 10139 ctxt->instate = XML_PARSER_MISC; 10140#ifdef DEBUG_PUSH 10141 xmlGenericError(xmlGenericErrorContext, 10142 "PP: entering MISC\n"); 10143#endif 10144 } 10145 } else { 10146 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10147 ctxt->sax->setDocumentLocator(ctxt->userData, 10148 &xmlDefaultSAXLocator); 10149 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10150 if (ctxt->version == NULL) { 10151 xmlErrMemory(ctxt, NULL); 10152 break; 10153 } 10154 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10155 (!ctxt->disableSAX)) 10156 ctxt->sax->startDocument(ctxt->userData); 10157 ctxt->instate = XML_PARSER_MISC; 10158#ifdef DEBUG_PUSH 10159 xmlGenericError(xmlGenericErrorContext, 10160 "PP: entering MISC\n"); 10161#endif 10162 } 10163 break; 10164 case XML_PARSER_START_TAG: { 10165 const xmlChar *name; 10166 const xmlChar *prefix; 10167 const xmlChar *URI; 10168 int nsNr = ctxt->nsNr; 10169 10170 if ((avail < 2) && (ctxt->inputNr == 1)) 10171 goto done; 10172 cur = ctxt->input->cur[0]; 10173 if (cur != '<') { 10174 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10175 ctxt->instate = XML_PARSER_EOF; 10176 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10177 ctxt->sax->endDocument(ctxt->userData); 10178 goto done; 10179 } 10180 if (!terminate) { 10181 if (ctxt->progressive) { 10182 /* > can be found unescaped in attribute values */ 10183 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10184 goto done; 10185 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10186 goto done; 10187 } 10188 } 10189 if (ctxt->spaceNr == 0) 10190 spacePush(ctxt, -1); 10191 else if (*ctxt->space == -2) 10192 spacePush(ctxt, -1); 10193 else 10194 spacePush(ctxt, *ctxt->space); 10195#ifdef LIBXML_SAX1_ENABLED 10196 if (ctxt->sax2) 10197#endif /* LIBXML_SAX1_ENABLED */ 10198 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10199#ifdef LIBXML_SAX1_ENABLED 10200 else 10201 name = xmlParseStartTag(ctxt); 10202#endif /* LIBXML_SAX1_ENABLED */ 10203 if (name == NULL) { 10204 spacePop(ctxt); 10205 ctxt->instate = XML_PARSER_EOF; 10206 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10207 ctxt->sax->endDocument(ctxt->userData); 10208 goto done; 10209 } 10210#ifdef LIBXML_VALID_ENABLED 10211 /* 10212 * [ VC: Root Element Type ] 10213 * The Name in the document type declaration must match 10214 * the element type of the root element. 10215 */ 10216 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10217 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10218 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10219#endif /* LIBXML_VALID_ENABLED */ 10220 10221 /* 10222 * Check for an Empty Element. 10223 */ 10224 if ((RAW == '/') && (NXT(1) == '>')) { 10225 SKIP(2); 10226 10227 if (ctxt->sax2) { 10228 if ((ctxt->sax != NULL) && 10229 (ctxt->sax->endElementNs != NULL) && 10230 (!ctxt->disableSAX)) 10231 ctxt->sax->endElementNs(ctxt->userData, name, 10232 prefix, URI); 10233 if (ctxt->nsNr - nsNr > 0) 10234 nsPop(ctxt, ctxt->nsNr - nsNr); 10235#ifdef LIBXML_SAX1_ENABLED 10236 } else { 10237 if ((ctxt->sax != NULL) && 10238 (ctxt->sax->endElement != NULL) && 10239 (!ctxt->disableSAX)) 10240 ctxt->sax->endElement(ctxt->userData, name); 10241#endif /* LIBXML_SAX1_ENABLED */ 10242 } 10243 spacePop(ctxt); 10244 if (ctxt->nameNr == 0) { 10245 ctxt->instate = XML_PARSER_EPILOG; 10246 } else { 10247 ctxt->instate = XML_PARSER_CONTENT; 10248 } 10249 break; 10250 } 10251 if (RAW == '>') { 10252 NEXT; 10253 } else { 10254 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 10255 "Couldn't find end of Start Tag %s\n", 10256 name); 10257 nodePop(ctxt); 10258 spacePop(ctxt); 10259 } 10260 if (ctxt->sax2) 10261 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 10262#ifdef LIBXML_SAX1_ENABLED 10263 else 10264 namePush(ctxt, name); 10265#endif /* LIBXML_SAX1_ENABLED */ 10266 10267 ctxt->instate = XML_PARSER_CONTENT; 10268 break; 10269 } 10270 case XML_PARSER_CONTENT: { 10271 const xmlChar *test; 10272 unsigned int cons; 10273 if ((avail < 2) && (ctxt->inputNr == 1)) 10274 goto done; 10275 cur = ctxt->input->cur[0]; 10276 next = ctxt->input->cur[1]; 10277 10278 test = CUR_PTR; 10279 cons = ctxt->input->consumed; 10280 if ((cur == '<') && (next == '/')) { 10281 ctxt->instate = XML_PARSER_END_TAG; 10282 break; 10283 } else if ((cur == '<') && (next == '?')) { 10284 if ((!terminate) && 10285 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10286 goto done; 10287 xmlParsePI(ctxt); 10288 } else if ((cur == '<') && (next != '!')) { 10289 ctxt->instate = XML_PARSER_START_TAG; 10290 break; 10291 } else if ((cur == '<') && (next == '!') && 10292 (ctxt->input->cur[2] == '-') && 10293 (ctxt->input->cur[3] == '-')) { 10294 int term; 10295 10296 if (avail < 4) 10297 goto done; 10298 ctxt->input->cur += 4; 10299 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 10300 ctxt->input->cur -= 4; 10301 if ((!terminate) && (term < 0)) 10302 goto done; 10303 xmlParseComment(ctxt); 10304 ctxt->instate = XML_PARSER_CONTENT; 10305 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 10306 (ctxt->input->cur[2] == '[') && 10307 (ctxt->input->cur[3] == 'C') && 10308 (ctxt->input->cur[4] == 'D') && 10309 (ctxt->input->cur[5] == 'A') && 10310 (ctxt->input->cur[6] == 'T') && 10311 (ctxt->input->cur[7] == 'A') && 10312 (ctxt->input->cur[8] == '[')) { 10313 SKIP(9); 10314 ctxt->instate = XML_PARSER_CDATA_SECTION; 10315 break; 10316 } else if ((cur == '<') && (next == '!') && 10317 (avail < 9)) { 10318 goto done; 10319 } else if (cur == '&') { 10320 if ((!terminate) && 10321 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 10322 goto done; 10323 xmlParseReference(ctxt); 10324 } else { 10325 /* TODO Avoid the extra copy, handle directly !!! */ 10326 /* 10327 * Goal of the following test is: 10328 * - minimize calls to the SAX 'character' callback 10329 * when they are mergeable 10330 * - handle an problem for isBlank when we only parse 10331 * a sequence of blank chars and the next one is 10332 * not available to check against '<' presence. 10333 * - tries to homogenize the differences in SAX 10334 * callbacks between the push and pull versions 10335 * of the parser. 10336 */ 10337 if ((ctxt->inputNr == 1) && 10338 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 10339 if (!terminate) { 10340 if (ctxt->progressive) { 10341 if ((lastlt == NULL) || 10342 (ctxt->input->cur > lastlt)) 10343 goto done; 10344 } else if (xmlParseLookupSequence(ctxt, 10345 '<', 0, 0) < 0) { 10346 goto done; 10347 } 10348 } 10349 } 10350 ctxt->checkIndex = 0; 10351 xmlParseCharData(ctxt, 0); 10352 } 10353 /* 10354 * Pop-up of finished entities. 10355 */ 10356 while ((RAW == 0) && (ctxt->inputNr > 1)) 10357 xmlPopInput(ctxt); 10358 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10359 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10360 "detected an error in element content\n"); 10361 ctxt->instate = XML_PARSER_EOF; 10362 break; 10363 } 10364 break; 10365 } 10366 case XML_PARSER_END_TAG: 10367 if (avail < 2) 10368 goto done; 10369 if (!terminate) { 10370 if (ctxt->progressive) { 10371 /* > can be found unescaped in attribute values */ 10372 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10373 goto done; 10374 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10375 goto done; 10376 } 10377 } 10378 if (ctxt->sax2) { 10379 xmlParseEndTag2(ctxt, 10380 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 10381 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 10382 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 10383 nameNsPop(ctxt); 10384 } 10385#ifdef LIBXML_SAX1_ENABLED 10386 else 10387 xmlParseEndTag1(ctxt, 0); 10388#endif /* LIBXML_SAX1_ENABLED */ 10389 if (ctxt->nameNr == 0) { 10390 ctxt->instate = XML_PARSER_EPILOG; 10391 } else { 10392 ctxt->instate = XML_PARSER_CONTENT; 10393 } 10394 break; 10395 case XML_PARSER_CDATA_SECTION: { 10396 /* 10397 * The Push mode need to have the SAX callback for 10398 * cdataBlock merge back contiguous callbacks. 10399 */ 10400 int base; 10401 10402 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 10403 if (base < 0) { 10404 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 10405 int tmp; 10406 10407 tmp = xmlCheckCdataPush(ctxt->input->cur, 10408 XML_PARSER_BIG_BUFFER_SIZE); 10409 if (tmp < 0) { 10410 tmp = -tmp; 10411 ctxt->input->cur += tmp; 10412 goto encoding_error; 10413 } 10414 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10415 if (ctxt->sax->cdataBlock != NULL) 10416 ctxt->sax->cdataBlock(ctxt->userData, 10417 ctxt->input->cur, tmp); 10418 else if (ctxt->sax->characters != NULL) 10419 ctxt->sax->characters(ctxt->userData, 10420 ctxt->input->cur, tmp); 10421 } 10422 SKIPL(tmp); 10423 ctxt->checkIndex = 0; 10424 } 10425 goto done; 10426 } else { 10427 int tmp; 10428 10429 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 10430 if ((tmp < 0) || (tmp != base)) { 10431 tmp = -tmp; 10432 ctxt->input->cur += tmp; 10433 goto encoding_error; 10434 } 10435 if ((ctxt->sax != NULL) && (base == 0) && 10436 (ctxt->sax->cdataBlock != NULL) && 10437 (!ctxt->disableSAX)) { 10438 /* 10439 * Special case to provide identical behaviour 10440 * between pull and push parsers on enpty CDATA 10441 * sections 10442 */ 10443 if ((ctxt->input->cur - ctxt->input->base >= 9) && 10444 (!strncmp((const char *)&ctxt->input->cur[-9], 10445 "<![CDATA[", 9))) 10446 ctxt->sax->cdataBlock(ctxt->userData, 10447 BAD_CAST "", 0); 10448 } else if ((ctxt->sax != NULL) && (base > 0) && 10449 (!ctxt->disableSAX)) { 10450 if (ctxt->sax->cdataBlock != NULL) 10451 ctxt->sax->cdataBlock(ctxt->userData, 10452 ctxt->input->cur, base); 10453 else if (ctxt->sax->characters != NULL) 10454 ctxt->sax->characters(ctxt->userData, 10455 ctxt->input->cur, base); 10456 } 10457 SKIPL(base + 3); 10458 ctxt->checkIndex = 0; 10459 ctxt->instate = XML_PARSER_CONTENT; 10460#ifdef DEBUG_PUSH 10461 xmlGenericError(xmlGenericErrorContext, 10462 "PP: entering CONTENT\n"); 10463#endif 10464 } 10465 break; 10466 } 10467 case XML_PARSER_MISC: 10468 SKIP_BLANKS; 10469 if (ctxt->input->buf == NULL) 10470 avail = ctxt->input->length - 10471 (ctxt->input->cur - ctxt->input->base); 10472 else 10473 avail = ctxt->input->buf->buffer->use - 10474 (ctxt->input->cur - ctxt->input->base); 10475 if (avail < 2) 10476 goto done; 10477 cur = ctxt->input->cur[0]; 10478 next = ctxt->input->cur[1]; 10479 if ((cur == '<') && (next == '?')) { 10480 if ((!terminate) && 10481 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10482 goto done; 10483#ifdef DEBUG_PUSH 10484 xmlGenericError(xmlGenericErrorContext, 10485 "PP: Parsing PI\n"); 10486#endif 10487 xmlParsePI(ctxt); 10488 ctxt->checkIndex = 0; 10489 } else if ((cur == '<') && (next == '!') && 10490 (ctxt->input->cur[2] == '-') && 10491 (ctxt->input->cur[3] == '-')) { 10492 if ((!terminate) && 10493 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10494 goto done; 10495#ifdef DEBUG_PUSH 10496 xmlGenericError(xmlGenericErrorContext, 10497 "PP: Parsing Comment\n"); 10498#endif 10499 xmlParseComment(ctxt); 10500 ctxt->instate = XML_PARSER_MISC; 10501 ctxt->checkIndex = 0; 10502 } else if ((cur == '<') && (next == '!') && 10503 (ctxt->input->cur[2] == 'D') && 10504 (ctxt->input->cur[3] == 'O') && 10505 (ctxt->input->cur[4] == 'C') && 10506 (ctxt->input->cur[5] == 'T') && 10507 (ctxt->input->cur[6] == 'Y') && 10508 (ctxt->input->cur[7] == 'P') && 10509 (ctxt->input->cur[8] == 'E')) { 10510 if ((!terminate) && 10511 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 10512 goto done; 10513#ifdef DEBUG_PUSH 10514 xmlGenericError(xmlGenericErrorContext, 10515 "PP: Parsing internal subset\n"); 10516#endif 10517 ctxt->inSubset = 1; 10518 xmlParseDocTypeDecl(ctxt); 10519 if (RAW == '[') { 10520 ctxt->instate = XML_PARSER_DTD; 10521#ifdef DEBUG_PUSH 10522 xmlGenericError(xmlGenericErrorContext, 10523 "PP: entering DTD\n"); 10524#endif 10525 } else { 10526 /* 10527 * Create and update the external subset. 10528 */ 10529 ctxt->inSubset = 2; 10530 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10531 (ctxt->sax->externalSubset != NULL)) 10532 ctxt->sax->externalSubset(ctxt->userData, 10533 ctxt->intSubName, ctxt->extSubSystem, 10534 ctxt->extSubURI); 10535 ctxt->inSubset = 0; 10536 xmlCleanSpecialAttr(ctxt); 10537 ctxt->instate = XML_PARSER_PROLOG; 10538#ifdef DEBUG_PUSH 10539 xmlGenericError(xmlGenericErrorContext, 10540 "PP: entering PROLOG\n"); 10541#endif 10542 } 10543 } else if ((cur == '<') && (next == '!') && 10544 (avail < 9)) { 10545 goto done; 10546 } else { 10547 ctxt->instate = XML_PARSER_START_TAG; 10548 ctxt->progressive = 1; 10549 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10550#ifdef DEBUG_PUSH 10551 xmlGenericError(xmlGenericErrorContext, 10552 "PP: entering START_TAG\n"); 10553#endif 10554 } 10555 break; 10556 case XML_PARSER_PROLOG: 10557 SKIP_BLANKS; 10558 if (ctxt->input->buf == NULL) 10559 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10560 else 10561 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10562 if (avail < 2) 10563 goto done; 10564 cur = ctxt->input->cur[0]; 10565 next = ctxt->input->cur[1]; 10566 if ((cur == '<') && (next == '?')) { 10567 if ((!terminate) && 10568 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10569 goto done; 10570#ifdef DEBUG_PUSH 10571 xmlGenericError(xmlGenericErrorContext, 10572 "PP: Parsing PI\n"); 10573#endif 10574 xmlParsePI(ctxt); 10575 } else if ((cur == '<') && (next == '!') && 10576 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10577 if ((!terminate) && 10578 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10579 goto done; 10580#ifdef DEBUG_PUSH 10581 xmlGenericError(xmlGenericErrorContext, 10582 "PP: Parsing Comment\n"); 10583#endif 10584 xmlParseComment(ctxt); 10585 ctxt->instate = XML_PARSER_PROLOG; 10586 } else if ((cur == '<') && (next == '!') && 10587 (avail < 4)) { 10588 goto done; 10589 } else { 10590 ctxt->instate = XML_PARSER_START_TAG; 10591 if (ctxt->progressive == 0) 10592 ctxt->progressive = 1; 10593 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10594#ifdef DEBUG_PUSH 10595 xmlGenericError(xmlGenericErrorContext, 10596 "PP: entering START_TAG\n"); 10597#endif 10598 } 10599 break; 10600 case XML_PARSER_EPILOG: 10601 SKIP_BLANKS; 10602 if (ctxt->input->buf == NULL) 10603 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10604 else 10605 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10606 if (avail < 2) 10607 goto done; 10608 cur = ctxt->input->cur[0]; 10609 next = ctxt->input->cur[1]; 10610 if ((cur == '<') && (next == '?')) { 10611 if ((!terminate) && 10612 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10613 goto done; 10614#ifdef DEBUG_PUSH 10615 xmlGenericError(xmlGenericErrorContext, 10616 "PP: Parsing PI\n"); 10617#endif 10618 xmlParsePI(ctxt); 10619 ctxt->instate = XML_PARSER_EPILOG; 10620 } else if ((cur == '<') && (next == '!') && 10621 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10622 if ((!terminate) && 10623 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10624 goto done; 10625#ifdef DEBUG_PUSH 10626 xmlGenericError(xmlGenericErrorContext, 10627 "PP: Parsing Comment\n"); 10628#endif 10629 xmlParseComment(ctxt); 10630 ctxt->instate = XML_PARSER_EPILOG; 10631 } else if ((cur == '<') && (next == '!') && 10632 (avail < 4)) { 10633 goto done; 10634 } else { 10635 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10636 ctxt->instate = XML_PARSER_EOF; 10637#ifdef DEBUG_PUSH 10638 xmlGenericError(xmlGenericErrorContext, 10639 "PP: entering EOF\n"); 10640#endif 10641 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10642 ctxt->sax->endDocument(ctxt->userData); 10643 goto done; 10644 } 10645 break; 10646 case XML_PARSER_DTD: { 10647 /* 10648 * Sorry but progressive parsing of the internal subset 10649 * is not expected to be supported. We first check that 10650 * the full content of the internal subset is available and 10651 * the parsing is launched only at that point. 10652 * Internal subset ends up with "']' S? '>'" in an unescaped 10653 * section and not in a ']]>' sequence which are conditional 10654 * sections (whoever argued to keep that crap in XML deserve 10655 * a place in hell !). 10656 */ 10657 int base, i; 10658 xmlChar *buf; 10659 xmlChar quote = 0; 10660 10661 base = ctxt->input->cur - ctxt->input->base; 10662 if (base < 0) return(0); 10663 if (ctxt->checkIndex > base) 10664 base = ctxt->checkIndex; 10665 buf = ctxt->input->buf->buffer->content; 10666 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 10667 base++) { 10668 if (quote != 0) { 10669 if (buf[base] == quote) 10670 quote = 0; 10671 continue; 10672 } 10673 if ((quote == 0) && (buf[base] == '<')) { 10674 int found = 0; 10675 /* special handling of comments */ 10676 if (((unsigned int) base + 4 < 10677 ctxt->input->buf->buffer->use) && 10678 (buf[base + 1] == '!') && 10679 (buf[base + 2] == '-') && 10680 (buf[base + 3] == '-')) { 10681 for (;(unsigned int) base + 3 < 10682 ctxt->input->buf->buffer->use; base++) { 10683 if ((buf[base] == '-') && 10684 (buf[base + 1] == '-') && 10685 (buf[base + 2] == '>')) { 10686 found = 1; 10687 base += 2; 10688 break; 10689 } 10690 } 10691 if (!found) { 10692#if 0 10693 fprintf(stderr, "unfinished comment\n"); 10694#endif 10695 break; /* for */ 10696 } 10697 continue; 10698 } 10699 } 10700 if (buf[base] == '"') { 10701 quote = '"'; 10702 continue; 10703 } 10704 if (buf[base] == '\'') { 10705 quote = '\''; 10706 continue; 10707 } 10708 if (buf[base] == ']') { 10709#if 0 10710 fprintf(stderr, "%c%c%c%c: ", buf[base], 10711 buf[base + 1], buf[base + 2], buf[base + 3]); 10712#endif 10713 if ((unsigned int) base +1 >= 10714 ctxt->input->buf->buffer->use) 10715 break; 10716 if (buf[base + 1] == ']') { 10717 /* conditional crap, skip both ']' ! */ 10718 base++; 10719 continue; 10720 } 10721 for (i = 1; 10722 (unsigned int) base + i < ctxt->input->buf->buffer->use; 10723 i++) { 10724 if (buf[base + i] == '>') { 10725#if 0 10726 fprintf(stderr, "found\n"); 10727#endif 10728 goto found_end_int_subset; 10729 } 10730 if (!IS_BLANK_CH(buf[base + i])) { 10731#if 0 10732 fprintf(stderr, "not found\n"); 10733#endif 10734 goto not_end_of_int_subset; 10735 } 10736 } 10737#if 0 10738 fprintf(stderr, "end of stream\n"); 10739#endif 10740 break; 10741 10742 } 10743not_end_of_int_subset: 10744 continue; /* for */ 10745 } 10746 /* 10747 * We didn't found the end of the Internal subset 10748 */ 10749#ifdef DEBUG_PUSH 10750 if (next == 0) 10751 xmlGenericError(xmlGenericErrorContext, 10752 "PP: lookup of int subset end filed\n"); 10753#endif 10754 goto done; 10755 10756found_end_int_subset: 10757 xmlParseInternalSubset(ctxt); 10758 ctxt->inSubset = 2; 10759 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10760 (ctxt->sax->externalSubset != NULL)) 10761 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10762 ctxt->extSubSystem, ctxt->extSubURI); 10763 ctxt->inSubset = 0; 10764 xmlCleanSpecialAttr(ctxt); 10765 ctxt->instate = XML_PARSER_PROLOG; 10766 ctxt->checkIndex = 0; 10767#ifdef DEBUG_PUSH 10768 xmlGenericError(xmlGenericErrorContext, 10769 "PP: entering PROLOG\n"); 10770#endif 10771 break; 10772 } 10773 case XML_PARSER_COMMENT: 10774 xmlGenericError(xmlGenericErrorContext, 10775 "PP: internal error, state == COMMENT\n"); 10776 ctxt->instate = XML_PARSER_CONTENT; 10777#ifdef DEBUG_PUSH 10778 xmlGenericError(xmlGenericErrorContext, 10779 "PP: entering CONTENT\n"); 10780#endif 10781 break; 10782 case XML_PARSER_IGNORE: 10783 xmlGenericError(xmlGenericErrorContext, 10784 "PP: internal error, state == IGNORE"); 10785 ctxt->instate = XML_PARSER_DTD; 10786#ifdef DEBUG_PUSH 10787 xmlGenericError(xmlGenericErrorContext, 10788 "PP: entering DTD\n"); 10789#endif 10790 break; 10791 case XML_PARSER_PI: 10792 xmlGenericError(xmlGenericErrorContext, 10793 "PP: internal error, state == PI\n"); 10794 ctxt->instate = XML_PARSER_CONTENT; 10795#ifdef DEBUG_PUSH 10796 xmlGenericError(xmlGenericErrorContext, 10797 "PP: entering CONTENT\n"); 10798#endif 10799 break; 10800 case XML_PARSER_ENTITY_DECL: 10801 xmlGenericError(xmlGenericErrorContext, 10802 "PP: internal error, state == ENTITY_DECL\n"); 10803 ctxt->instate = XML_PARSER_DTD; 10804#ifdef DEBUG_PUSH 10805 xmlGenericError(xmlGenericErrorContext, 10806 "PP: entering DTD\n"); 10807#endif 10808 break; 10809 case XML_PARSER_ENTITY_VALUE: 10810 xmlGenericError(xmlGenericErrorContext, 10811 "PP: internal error, state == ENTITY_VALUE\n"); 10812 ctxt->instate = XML_PARSER_CONTENT; 10813#ifdef DEBUG_PUSH 10814 xmlGenericError(xmlGenericErrorContext, 10815 "PP: entering DTD\n"); 10816#endif 10817 break; 10818 case XML_PARSER_ATTRIBUTE_VALUE: 10819 xmlGenericError(xmlGenericErrorContext, 10820 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 10821 ctxt->instate = XML_PARSER_START_TAG; 10822#ifdef DEBUG_PUSH 10823 xmlGenericError(xmlGenericErrorContext, 10824 "PP: entering START_TAG\n"); 10825#endif 10826 break; 10827 case XML_PARSER_SYSTEM_LITERAL: 10828 xmlGenericError(xmlGenericErrorContext, 10829 "PP: internal error, state == SYSTEM_LITERAL\n"); 10830 ctxt->instate = XML_PARSER_START_TAG; 10831#ifdef DEBUG_PUSH 10832 xmlGenericError(xmlGenericErrorContext, 10833 "PP: entering START_TAG\n"); 10834#endif 10835 break; 10836 case XML_PARSER_PUBLIC_LITERAL: 10837 xmlGenericError(xmlGenericErrorContext, 10838 "PP: internal error, state == PUBLIC_LITERAL\n"); 10839 ctxt->instate = XML_PARSER_START_TAG; 10840#ifdef DEBUG_PUSH 10841 xmlGenericError(xmlGenericErrorContext, 10842 "PP: entering START_TAG\n"); 10843#endif 10844 break; 10845 } 10846 } 10847done: 10848#ifdef DEBUG_PUSH 10849 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 10850#endif 10851 return(ret); 10852encoding_error: 10853 { 10854 char buffer[150]; 10855 10856 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 10857 ctxt->input->cur[0], ctxt->input->cur[1], 10858 ctxt->input->cur[2], ctxt->input->cur[3]); 10859 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 10860 "Input is not proper UTF-8, indicate encoding !\n%s", 10861 BAD_CAST buffer, NULL); 10862 } 10863 return(0); 10864} 10865 10866/** 10867 * xmlParseChunk: 10868 * @ctxt: an XML parser context 10869 * @chunk: an char array 10870 * @size: the size in byte of the chunk 10871 * @terminate: last chunk indicator 10872 * 10873 * Parse a Chunk of memory 10874 * 10875 * Returns zero if no error, the xmlParserErrors otherwise. 10876 */ 10877int 10878xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 10879 int terminate) { 10880 int end_in_lf = 0; 10881 10882 if (ctxt == NULL) 10883 return(XML_ERR_INTERNAL_ERROR); 10884 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10885 return(ctxt->errNo); 10886 if (ctxt->instate == XML_PARSER_START) 10887 xmlDetectSAX2(ctxt); 10888 if ((size > 0) && (chunk != NULL) && (!terminate) && 10889 (chunk[size - 1] == '\r')) { 10890 end_in_lf = 1; 10891 size--; 10892 } 10893 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10894 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 10895 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10896 int cur = ctxt->input->cur - ctxt->input->base; 10897 int res; 10898 10899 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10900 if (res < 0) { 10901 ctxt->errNo = XML_PARSER_EOF; 10902 ctxt->disableSAX = 1; 10903 return (XML_PARSER_EOF); 10904 } 10905 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10906 ctxt->input->cur = ctxt->input->base + cur; 10907 ctxt->input->end = 10908 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10909#ifdef DEBUG_PUSH 10910 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10911#endif 10912 10913 } else if (ctxt->instate != XML_PARSER_EOF) { 10914 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 10915 xmlParserInputBufferPtr in = ctxt->input->buf; 10916 if ((in->encoder != NULL) && (in->buffer != NULL) && 10917 (in->raw != NULL)) { 10918 int nbchars; 10919 10920 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 10921 if (nbchars < 0) { 10922 /* TODO 2.6.0 */ 10923 xmlGenericError(xmlGenericErrorContext, 10924 "xmlParseChunk: encoder error\n"); 10925 return(XML_ERR_INVALID_ENCODING); 10926 } 10927 } 10928 } 10929 } 10930 xmlParseTryOrFinish(ctxt, terminate); 10931 if ((end_in_lf == 1) && (ctxt->input != NULL) && 10932 (ctxt->input->buf != NULL)) { 10933 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 10934 } 10935 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10936 return(ctxt->errNo); 10937 if (terminate) { 10938 /* 10939 * Check for termination 10940 */ 10941 int avail = 0; 10942 10943 if (ctxt->input != NULL) { 10944 if (ctxt->input->buf == NULL) 10945 avail = ctxt->input->length - 10946 (ctxt->input->cur - ctxt->input->base); 10947 else 10948 avail = ctxt->input->buf->buffer->use - 10949 (ctxt->input->cur - ctxt->input->base); 10950 } 10951 10952 if ((ctxt->instate != XML_PARSER_EOF) && 10953 (ctxt->instate != XML_PARSER_EPILOG)) { 10954 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10955 } 10956 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 10957 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10958 } 10959 if (ctxt->instate != XML_PARSER_EOF) { 10960 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10961 ctxt->sax->endDocument(ctxt->userData); 10962 } 10963 ctxt->instate = XML_PARSER_EOF; 10964 } 10965 return((xmlParserErrors) ctxt->errNo); 10966} 10967 10968/************************************************************************ 10969 * * 10970 * I/O front end functions to the parser * 10971 * * 10972 ************************************************************************/ 10973 10974/** 10975 * xmlCreatePushParserCtxt: 10976 * @sax: a SAX handler 10977 * @user_data: The user data returned on SAX callbacks 10978 * @chunk: a pointer to an array of chars 10979 * @size: number of chars in the array 10980 * @filename: an optional file name or URI 10981 * 10982 * Create a parser context for using the XML parser in push mode. 10983 * If @buffer and @size are non-NULL, the data is used to detect 10984 * the encoding. The remaining characters will be parsed so they 10985 * don't need to be fed in again through xmlParseChunk. 10986 * To allow content encoding detection, @size should be >= 4 10987 * The value of @filename is used for fetching external entities 10988 * and error/warning reports. 10989 * 10990 * Returns the new parser context or NULL 10991 */ 10992 10993xmlParserCtxtPtr 10994xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10995 const char *chunk, int size, const char *filename) { 10996 xmlParserCtxtPtr ctxt; 10997 xmlParserInputPtr inputStream; 10998 xmlParserInputBufferPtr buf; 10999 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11000 11001 /* 11002 * plug some encoding conversion routines 11003 */ 11004 if ((chunk != NULL) && (size >= 4)) 11005 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11006 11007 buf = xmlAllocParserInputBuffer(enc); 11008 if (buf == NULL) return(NULL); 11009 11010 ctxt = xmlNewParserCtxt(); 11011 if (ctxt == NULL) { 11012 xmlErrMemory(NULL, "creating parser: out of memory\n"); 11013 xmlFreeParserInputBuffer(buf); 11014 return(NULL); 11015 } 11016 ctxt->dictNames = 1; 11017 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11018 if (ctxt->pushTab == NULL) { 11019 xmlErrMemory(ctxt, NULL); 11020 xmlFreeParserInputBuffer(buf); 11021 xmlFreeParserCtxt(ctxt); 11022 return(NULL); 11023 } 11024 if (sax != NULL) { 11025#ifdef LIBXML_SAX1_ENABLED 11026 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11027#endif /* LIBXML_SAX1_ENABLED */ 11028 xmlFree(ctxt->sax); 11029 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11030 if (ctxt->sax == NULL) { 11031 xmlErrMemory(ctxt, NULL); 11032 xmlFreeParserInputBuffer(buf); 11033 xmlFreeParserCtxt(ctxt); 11034 return(NULL); 11035 } 11036 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11037 if (sax->initialized == XML_SAX2_MAGIC) 11038 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11039 else 11040 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11041 if (user_data != NULL) 11042 ctxt->userData = user_data; 11043 } 11044 if (filename == NULL) { 11045 ctxt->directory = NULL; 11046 } else { 11047 ctxt->directory = xmlParserGetDirectory(filename); 11048 } 11049 11050 inputStream = xmlNewInputStream(ctxt); 11051 if (inputStream == NULL) { 11052 xmlFreeParserCtxt(ctxt); 11053 xmlFreeParserInputBuffer(buf); 11054 return(NULL); 11055 } 11056 11057 if (filename == NULL) 11058 inputStream->filename = NULL; 11059 else { 11060 inputStream->filename = (char *) 11061 xmlCanonicPath((const xmlChar *) filename); 11062 if (inputStream->filename == NULL) { 11063 xmlFreeParserCtxt(ctxt); 11064 xmlFreeParserInputBuffer(buf); 11065 return(NULL); 11066 } 11067 } 11068 inputStream->buf = buf; 11069 inputStream->base = inputStream->buf->buffer->content; 11070 inputStream->cur = inputStream->buf->buffer->content; 11071 inputStream->end = 11072 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11073 11074 inputPush(ctxt, inputStream); 11075 11076 /* 11077 * If the caller didn't provide an initial 'chunk' for determining 11078 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11079 * that it can be automatically determined later 11080 */ 11081 if ((size == 0) || (chunk == NULL)) { 11082 ctxt->charset = XML_CHAR_ENCODING_NONE; 11083 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11084 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11085 int cur = ctxt->input->cur - ctxt->input->base; 11086 11087 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11088 11089 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11090 ctxt->input->cur = ctxt->input->base + cur; 11091 ctxt->input->end = 11092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11093#ifdef DEBUG_PUSH 11094 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11095#endif 11096 } 11097 11098 if (enc != XML_CHAR_ENCODING_NONE) { 11099 xmlSwitchEncoding(ctxt, enc); 11100 } 11101 11102 return(ctxt); 11103} 11104#endif /* LIBXML_PUSH_ENABLED */ 11105 11106/** 11107 * xmlStopParser: 11108 * @ctxt: an XML parser context 11109 * 11110 * Blocks further parser processing 11111 */ 11112void 11113xmlStopParser(xmlParserCtxtPtr ctxt) { 11114 if (ctxt == NULL) 11115 return; 11116 ctxt->instate = XML_PARSER_EOF; 11117 ctxt->disableSAX = 1; 11118 if (ctxt->input != NULL) { 11119 ctxt->input->cur = BAD_CAST""; 11120 ctxt->input->base = ctxt->input->cur; 11121 } 11122} 11123 11124/** 11125 * xmlCreateIOParserCtxt: 11126 * @sax: a SAX handler 11127 * @user_data: The user data returned on SAX callbacks 11128 * @ioread: an I/O read function 11129 * @ioclose: an I/O close function 11130 * @ioctx: an I/O handler 11131 * @enc: the charset encoding if known 11132 * 11133 * Create a parser context for using the XML parser with an existing 11134 * I/O stream 11135 * 11136 * Returns the new parser context or NULL 11137 */ 11138xmlParserCtxtPtr 11139xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11140 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11141 void *ioctx, xmlCharEncoding enc) { 11142 xmlParserCtxtPtr ctxt; 11143 xmlParserInputPtr inputStream; 11144 xmlParserInputBufferPtr buf; 11145 11146 if (ioread == NULL) return(NULL); 11147 11148 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11149 if (buf == NULL) return(NULL); 11150 11151 ctxt = xmlNewParserCtxt(); 11152 if (ctxt == NULL) { 11153 xmlFreeParserInputBuffer(buf); 11154 return(NULL); 11155 } 11156 if (sax != NULL) { 11157#ifdef LIBXML_SAX1_ENABLED 11158 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11159#endif /* LIBXML_SAX1_ENABLED */ 11160 xmlFree(ctxt->sax); 11161 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11162 if (ctxt->sax == NULL) { 11163 xmlErrMemory(ctxt, NULL); 11164 xmlFreeParserCtxt(ctxt); 11165 return(NULL); 11166 } 11167 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11168 if (sax->initialized == XML_SAX2_MAGIC) 11169 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11170 else 11171 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11172 if (user_data != NULL) 11173 ctxt->userData = user_data; 11174 } 11175 11176 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 11177 if (inputStream == NULL) { 11178 xmlFreeParserCtxt(ctxt); 11179 return(NULL); 11180 } 11181 inputPush(ctxt, inputStream); 11182 11183 return(ctxt); 11184} 11185 11186#ifdef LIBXML_VALID_ENABLED 11187/************************************************************************ 11188 * * 11189 * Front ends when parsing a DTD * 11190 * * 11191 ************************************************************************/ 11192 11193/** 11194 * xmlIOParseDTD: 11195 * @sax: the SAX handler block or NULL 11196 * @input: an Input Buffer 11197 * @enc: the charset encoding if known 11198 * 11199 * Load and parse a DTD 11200 * 11201 * Returns the resulting xmlDtdPtr or NULL in case of error. 11202 * @input will be freed by the function in any case. 11203 */ 11204 11205xmlDtdPtr 11206xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 11207 xmlCharEncoding enc) { 11208 xmlDtdPtr ret = NULL; 11209 xmlParserCtxtPtr ctxt; 11210 xmlParserInputPtr pinput = NULL; 11211 xmlChar start[4]; 11212 11213 if (input == NULL) 11214 return(NULL); 11215 11216 ctxt = xmlNewParserCtxt(); 11217 if (ctxt == NULL) { 11218 xmlFreeParserInputBuffer(input); 11219 return(NULL); 11220 } 11221 11222 /* 11223 * Set-up the SAX context 11224 */ 11225 if (sax != NULL) { 11226 if (ctxt->sax != NULL) 11227 xmlFree(ctxt->sax); 11228 ctxt->sax = sax; 11229 ctxt->userData = ctxt; 11230 } 11231 xmlDetectSAX2(ctxt); 11232 11233 /* 11234 * generate a parser input from the I/O handler 11235 */ 11236 11237 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 11238 if (pinput == NULL) { 11239 if (sax != NULL) ctxt->sax = NULL; 11240 xmlFreeParserInputBuffer(input); 11241 xmlFreeParserCtxt(ctxt); 11242 return(NULL); 11243 } 11244 11245 /* 11246 * plug some encoding conversion routines here. 11247 */ 11248 xmlPushInput(ctxt, pinput); 11249 if (enc != XML_CHAR_ENCODING_NONE) { 11250 xmlSwitchEncoding(ctxt, enc); 11251 } 11252 11253 pinput->filename = NULL; 11254 pinput->line = 1; 11255 pinput->col = 1; 11256 pinput->base = ctxt->input->cur; 11257 pinput->cur = ctxt->input->cur; 11258 pinput->free = NULL; 11259 11260 /* 11261 * let's parse that entity knowing it's an external subset. 11262 */ 11263 ctxt->inSubset = 2; 11264 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11265 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11266 BAD_CAST "none", BAD_CAST "none"); 11267 11268 if ((enc == XML_CHAR_ENCODING_NONE) && 11269 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 11270 /* 11271 * Get the 4 first bytes and decode the charset 11272 * if enc != XML_CHAR_ENCODING_NONE 11273 * plug some encoding conversion routines. 11274 */ 11275 start[0] = RAW; 11276 start[1] = NXT(1); 11277 start[2] = NXT(2); 11278 start[3] = NXT(3); 11279 enc = xmlDetectCharEncoding(start, 4); 11280 if (enc != XML_CHAR_ENCODING_NONE) { 11281 xmlSwitchEncoding(ctxt, enc); 11282 } 11283 } 11284 11285 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 11286 11287 if (ctxt->myDoc != NULL) { 11288 if (ctxt->wellFormed) { 11289 ret = ctxt->myDoc->extSubset; 11290 ctxt->myDoc->extSubset = NULL; 11291 if (ret != NULL) { 11292 xmlNodePtr tmp; 11293 11294 ret->doc = NULL; 11295 tmp = ret->children; 11296 while (tmp != NULL) { 11297 tmp->doc = NULL; 11298 tmp = tmp->next; 11299 } 11300 } 11301 } else { 11302 ret = NULL; 11303 } 11304 xmlFreeDoc(ctxt->myDoc); 11305 ctxt->myDoc = NULL; 11306 } 11307 if (sax != NULL) ctxt->sax = NULL; 11308 xmlFreeParserCtxt(ctxt); 11309 11310 return(ret); 11311} 11312 11313/** 11314 * xmlSAXParseDTD: 11315 * @sax: the SAX handler block 11316 * @ExternalID: a NAME* containing the External ID of the DTD 11317 * @SystemID: a NAME* containing the URL to the DTD 11318 * 11319 * Load and parse an external subset. 11320 * 11321 * Returns the resulting xmlDtdPtr or NULL in case of error. 11322 */ 11323 11324xmlDtdPtr 11325xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 11326 const xmlChar *SystemID) { 11327 xmlDtdPtr ret = NULL; 11328 xmlParserCtxtPtr ctxt; 11329 xmlParserInputPtr input = NULL; 11330 xmlCharEncoding enc; 11331 xmlChar* systemIdCanonic; 11332 11333 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 11334 11335 ctxt = xmlNewParserCtxt(); 11336 if (ctxt == NULL) { 11337 return(NULL); 11338 } 11339 11340 /* 11341 * Set-up the SAX context 11342 */ 11343 if (sax != NULL) { 11344 if (ctxt->sax != NULL) 11345 xmlFree(ctxt->sax); 11346 ctxt->sax = sax; 11347 ctxt->userData = ctxt; 11348 } 11349 11350 /* 11351 * Canonicalise the system ID 11352 */ 11353 systemIdCanonic = xmlCanonicPath(SystemID); 11354 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 11355 xmlFreeParserCtxt(ctxt); 11356 return(NULL); 11357 } 11358 11359 /* 11360 * Ask the Entity resolver to load the damn thing 11361 */ 11362 11363 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 11364 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 11365 systemIdCanonic); 11366 if (input == NULL) { 11367 if (sax != NULL) ctxt->sax = NULL; 11368 xmlFreeParserCtxt(ctxt); 11369 if (systemIdCanonic != NULL) 11370 xmlFree(systemIdCanonic); 11371 return(NULL); 11372 } 11373 11374 /* 11375 * plug some encoding conversion routines here. 11376 */ 11377 xmlPushInput(ctxt, input); 11378 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11379 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 11380 xmlSwitchEncoding(ctxt, enc); 11381 } 11382 11383 if (input->filename == NULL) 11384 input->filename = (char *) systemIdCanonic; 11385 else 11386 xmlFree(systemIdCanonic); 11387 input->line = 1; 11388 input->col = 1; 11389 input->base = ctxt->input->cur; 11390 input->cur = ctxt->input->cur; 11391 input->free = NULL; 11392 11393 /* 11394 * let's parse that entity knowing it's an external subset. 11395 */ 11396 ctxt->inSubset = 2; 11397 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11398 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11399 ExternalID, SystemID); 11400 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 11401 11402 if (ctxt->myDoc != NULL) { 11403 if (ctxt->wellFormed) { 11404 ret = ctxt->myDoc->extSubset; 11405 ctxt->myDoc->extSubset = NULL; 11406 if (ret != NULL) { 11407 xmlNodePtr tmp; 11408 11409 ret->doc = NULL; 11410 tmp = ret->children; 11411 while (tmp != NULL) { 11412 tmp->doc = NULL; 11413 tmp = tmp->next; 11414 } 11415 } 11416 } else { 11417 ret = NULL; 11418 } 11419 xmlFreeDoc(ctxt->myDoc); 11420 ctxt->myDoc = NULL; 11421 } 11422 if (sax != NULL) ctxt->sax = NULL; 11423 xmlFreeParserCtxt(ctxt); 11424 11425 return(ret); 11426} 11427 11428 11429/** 11430 * xmlParseDTD: 11431 * @ExternalID: a NAME* containing the External ID of the DTD 11432 * @SystemID: a NAME* containing the URL to the DTD 11433 * 11434 * Load and parse an external subset. 11435 * 11436 * Returns the resulting xmlDtdPtr or NULL in case of error. 11437 */ 11438 11439xmlDtdPtr 11440xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 11441 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 11442} 11443#endif /* LIBXML_VALID_ENABLED */ 11444 11445/************************************************************************ 11446 * * 11447 * Front ends when parsing an Entity * 11448 * * 11449 ************************************************************************/ 11450 11451/** 11452 * xmlParseCtxtExternalEntity: 11453 * @ctx: the existing parsing context 11454 * @URL: the URL for the entity to load 11455 * @ID: the System ID for the entity to load 11456 * @lst: the return value for the set of parsed nodes 11457 * 11458 * Parse an external general entity within an existing parsing context 11459 * An external general parsed entity is well-formed if it matches the 11460 * production labeled extParsedEnt. 11461 * 11462 * [78] extParsedEnt ::= TextDecl? content 11463 * 11464 * Returns 0 if the entity is well formed, -1 in case of args problem and 11465 * the parser error code otherwise 11466 */ 11467 11468int 11469xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 11470 const xmlChar *ID, xmlNodePtr *lst) { 11471 xmlParserCtxtPtr ctxt; 11472 xmlDocPtr newDoc; 11473 xmlNodePtr newRoot; 11474 xmlSAXHandlerPtr oldsax = NULL; 11475 int ret = 0; 11476 xmlChar start[4]; 11477 xmlCharEncoding enc; 11478 xmlParserInputPtr inputStream; 11479 char *directory = NULL; 11480 11481 if (ctx == NULL) return(-1); 11482 11483 if (ctx->depth > 40) { 11484 return(XML_ERR_ENTITY_LOOP); 11485 } 11486 11487 if (lst != NULL) 11488 *lst = NULL; 11489 if ((URL == NULL) && (ID == NULL)) 11490 return(-1); 11491 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 11492 return(-1); 11493 11494 ctxt = xmlNewParserCtxt(); 11495 if (ctxt == NULL) { 11496 return(-1); 11497 } 11498 11499 ctxt->userData = ctxt; 11500 ctxt->_private = ctx->_private; 11501 11502 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11503 if (inputStream == NULL) { 11504 xmlFreeParserCtxt(ctxt); 11505 return(-1); 11506 } 11507 11508 inputPush(ctxt, inputStream); 11509 11510 if ((ctxt->directory == NULL) && (directory == NULL)) 11511 directory = xmlParserGetDirectory((char *)URL); 11512 if ((ctxt->directory == NULL) && (directory != NULL)) 11513 ctxt->directory = directory; 11514 11515 oldsax = ctxt->sax; 11516 ctxt->sax = ctx->sax; 11517 xmlDetectSAX2(ctxt); 11518 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11519 if (newDoc == NULL) { 11520 xmlFreeParserCtxt(ctxt); 11521 return(-1); 11522 } 11523 if (ctx->myDoc->dict) { 11524 newDoc->dict = ctx->myDoc->dict; 11525 xmlDictReference(newDoc->dict); 11526 } 11527 if (ctx->myDoc != NULL) { 11528 newDoc->intSubset = ctx->myDoc->intSubset; 11529 newDoc->extSubset = ctx->myDoc->extSubset; 11530 } 11531 if (ctx->myDoc->URL != NULL) { 11532 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 11533 } 11534 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11535 if (newRoot == NULL) { 11536 ctxt->sax = oldsax; 11537 xmlFreeParserCtxt(ctxt); 11538 newDoc->intSubset = NULL; 11539 newDoc->extSubset = NULL; 11540 xmlFreeDoc(newDoc); 11541 return(-1); 11542 } 11543 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11544 nodePush(ctxt, newDoc->children); 11545 if (ctx->myDoc == NULL) { 11546 ctxt->myDoc = newDoc; 11547 } else { 11548 ctxt->myDoc = ctx->myDoc; 11549 newDoc->children->doc = ctx->myDoc; 11550 } 11551 11552 /* 11553 * Get the 4 first bytes and decode the charset 11554 * if enc != XML_CHAR_ENCODING_NONE 11555 * plug some encoding conversion routines. 11556 */ 11557 GROW 11558 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11559 start[0] = RAW; 11560 start[1] = NXT(1); 11561 start[2] = NXT(2); 11562 start[3] = NXT(3); 11563 enc = xmlDetectCharEncoding(start, 4); 11564 if (enc != XML_CHAR_ENCODING_NONE) { 11565 xmlSwitchEncoding(ctxt, enc); 11566 } 11567 } 11568 11569 /* 11570 * Parse a possible text declaration first 11571 */ 11572 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11573 xmlParseTextDecl(ctxt); 11574 } 11575 11576 /* 11577 * Doing validity checking on chunk doesn't make sense 11578 */ 11579 ctxt->instate = XML_PARSER_CONTENT; 11580 ctxt->validate = ctx->validate; 11581 ctxt->valid = ctx->valid; 11582 ctxt->loadsubset = ctx->loadsubset; 11583 ctxt->depth = ctx->depth + 1; 11584 ctxt->replaceEntities = ctx->replaceEntities; 11585 if (ctxt->validate) { 11586 ctxt->vctxt.error = ctx->vctxt.error; 11587 ctxt->vctxt.warning = ctx->vctxt.warning; 11588 } else { 11589 ctxt->vctxt.error = NULL; 11590 ctxt->vctxt.warning = NULL; 11591 } 11592 ctxt->vctxt.nodeTab = NULL; 11593 ctxt->vctxt.nodeNr = 0; 11594 ctxt->vctxt.nodeMax = 0; 11595 ctxt->vctxt.node = NULL; 11596 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11597 ctxt->dict = ctx->dict; 11598 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11599 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11600 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11601 ctxt->dictNames = ctx->dictNames; 11602 ctxt->attsDefault = ctx->attsDefault; 11603 ctxt->attsSpecial = ctx->attsSpecial; 11604 ctxt->linenumbers = ctx->linenumbers; 11605 11606 xmlParseContent(ctxt); 11607 11608 ctx->validate = ctxt->validate; 11609 ctx->valid = ctxt->valid; 11610 if ((RAW == '<') && (NXT(1) == '/')) { 11611 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11612 } else if (RAW != 0) { 11613 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11614 } 11615 if (ctxt->node != newDoc->children) { 11616 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11617 } 11618 11619 if (!ctxt->wellFormed) { 11620 if (ctxt->errNo == 0) 11621 ret = 1; 11622 else 11623 ret = ctxt->errNo; 11624 } else { 11625 if (lst != NULL) { 11626 xmlNodePtr cur; 11627 11628 /* 11629 * Return the newly created nodeset after unlinking it from 11630 * they pseudo parent. 11631 */ 11632 cur = newDoc->children->children; 11633 *lst = cur; 11634 while (cur != NULL) { 11635 cur->parent = NULL; 11636 cur = cur->next; 11637 } 11638 newDoc->children->children = NULL; 11639 } 11640 ret = 0; 11641 } 11642 ctxt->sax = oldsax; 11643 ctxt->dict = NULL; 11644 ctxt->attsDefault = NULL; 11645 ctxt->attsSpecial = NULL; 11646 xmlFreeParserCtxt(ctxt); 11647 newDoc->intSubset = NULL; 11648 newDoc->extSubset = NULL; 11649 xmlFreeDoc(newDoc); 11650 11651 return(ret); 11652} 11653 11654/** 11655 * xmlParseExternalEntityPrivate: 11656 * @doc: the document the chunk pertains to 11657 * @oldctxt: the previous parser context if available 11658 * @sax: the SAX handler bloc (possibly NULL) 11659 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11660 * @depth: Used for loop detection, use 0 11661 * @URL: the URL for the entity to load 11662 * @ID: the System ID for the entity to load 11663 * @list: the return value for the set of parsed nodes 11664 * 11665 * Private version of xmlParseExternalEntity() 11666 * 11667 * Returns 0 if the entity is well formed, -1 in case of args problem and 11668 * the parser error code otherwise 11669 */ 11670 11671static xmlParserErrors 11672xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 11673 xmlSAXHandlerPtr sax, 11674 void *user_data, int depth, const xmlChar *URL, 11675 const xmlChar *ID, xmlNodePtr *list) { 11676 xmlParserCtxtPtr ctxt; 11677 xmlDocPtr newDoc; 11678 xmlNodePtr newRoot; 11679 xmlSAXHandlerPtr oldsax = NULL; 11680 xmlParserErrors ret = XML_ERR_OK; 11681 xmlChar start[4]; 11682 xmlCharEncoding enc; 11683 11684 if (depth > 40) { 11685 return(XML_ERR_ENTITY_LOOP); 11686 } 11687 11688 11689 11690 if (list != NULL) 11691 *list = NULL; 11692 if ((URL == NULL) && (ID == NULL)) 11693 return(XML_ERR_INTERNAL_ERROR); 11694 if (doc == NULL) 11695 return(XML_ERR_INTERNAL_ERROR); 11696 11697 11698 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 11699 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11700 ctxt->userData = ctxt; 11701 if (oldctxt != NULL) { 11702 ctxt->_private = oldctxt->_private; 11703 ctxt->loadsubset = oldctxt->loadsubset; 11704 ctxt->validate = oldctxt->validate; 11705 ctxt->external = oldctxt->external; 11706 ctxt->record_info = oldctxt->record_info; 11707 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 11708 ctxt->node_seq.length = oldctxt->node_seq.length; 11709 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 11710 } else { 11711 /* 11712 * Doing validity checking on chunk without context 11713 * doesn't make sense 11714 */ 11715 ctxt->_private = NULL; 11716 ctxt->validate = 0; 11717 ctxt->external = 2; 11718 ctxt->loadsubset = 0; 11719 } 11720 if (sax != NULL) { 11721 oldsax = ctxt->sax; 11722 ctxt->sax = sax; 11723 if (user_data != NULL) 11724 ctxt->userData = user_data; 11725 } 11726 xmlDetectSAX2(ctxt); 11727 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11728 if (newDoc == NULL) { 11729 ctxt->node_seq.maximum = 0; 11730 ctxt->node_seq.length = 0; 11731 ctxt->node_seq.buffer = NULL; 11732 xmlFreeParserCtxt(ctxt); 11733 return(XML_ERR_INTERNAL_ERROR); 11734 } 11735 newDoc->intSubset = doc->intSubset; 11736 newDoc->extSubset = doc->extSubset; 11737 newDoc->dict = doc->dict; 11738 xmlDictReference(newDoc->dict); 11739 11740 if (doc->URL != NULL) { 11741 newDoc->URL = xmlStrdup(doc->URL); 11742 } 11743 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11744 if (newRoot == NULL) { 11745 if (sax != NULL) 11746 ctxt->sax = oldsax; 11747 ctxt->node_seq.maximum = 0; 11748 ctxt->node_seq.length = 0; 11749 ctxt->node_seq.buffer = NULL; 11750 xmlFreeParserCtxt(ctxt); 11751 newDoc->intSubset = NULL; 11752 newDoc->extSubset = NULL; 11753 xmlFreeDoc(newDoc); 11754 return(XML_ERR_INTERNAL_ERROR); 11755 } 11756 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11757 nodePush(ctxt, newDoc->children); 11758 ctxt->myDoc = doc; 11759 newRoot->doc = doc; 11760 11761 /* 11762 * Get the 4 first bytes and decode the charset 11763 * if enc != XML_CHAR_ENCODING_NONE 11764 * plug some encoding conversion routines. 11765 */ 11766 GROW; 11767 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11768 start[0] = RAW; 11769 start[1] = NXT(1); 11770 start[2] = NXT(2); 11771 start[3] = NXT(3); 11772 enc = xmlDetectCharEncoding(start, 4); 11773 if (enc != XML_CHAR_ENCODING_NONE) { 11774 xmlSwitchEncoding(ctxt, enc); 11775 } 11776 } 11777 11778 /* 11779 * Parse a possible text declaration first 11780 */ 11781 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11782 xmlParseTextDecl(ctxt); 11783 } 11784 11785 ctxt->instate = XML_PARSER_CONTENT; 11786 ctxt->depth = depth; 11787 11788 xmlParseContent(ctxt); 11789 11790 if ((RAW == '<') && (NXT(1) == '/')) { 11791 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11792 } else if (RAW != 0) { 11793 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11794 } 11795 if (ctxt->node != newDoc->children) { 11796 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11797 } 11798 11799 if (!ctxt->wellFormed) { 11800 if (ctxt->errNo == 0) 11801 ret = XML_ERR_INTERNAL_ERROR; 11802 else 11803 ret = (xmlParserErrors)ctxt->errNo; 11804 } else { 11805 if (list != NULL) { 11806 xmlNodePtr cur; 11807 11808 /* 11809 * Return the newly created nodeset after unlinking it from 11810 * they pseudo parent. 11811 */ 11812 cur = newDoc->children->children; 11813 *list = cur; 11814 while (cur != NULL) { 11815 cur->parent = NULL; 11816 cur = cur->next; 11817 } 11818 newDoc->children->children = NULL; 11819 } 11820 ret = XML_ERR_OK; 11821 } 11822 if (sax != NULL) 11823 ctxt->sax = oldsax; 11824 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 11825 oldctxt->node_seq.length = ctxt->node_seq.length; 11826 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 11827 ctxt->node_seq.maximum = 0; 11828 ctxt->node_seq.length = 0; 11829 ctxt->node_seq.buffer = NULL; 11830 xmlFreeParserCtxt(ctxt); 11831 newDoc->intSubset = NULL; 11832 newDoc->extSubset = NULL; 11833 xmlFreeDoc(newDoc); 11834 11835 return(ret); 11836} 11837 11838#ifdef LIBXML_SAX1_ENABLED 11839/** 11840 * xmlParseExternalEntity: 11841 * @doc: the document the chunk pertains to 11842 * @sax: the SAX handler bloc (possibly NULL) 11843 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11844 * @depth: Used for loop detection, use 0 11845 * @URL: the URL for the entity to load 11846 * @ID: the System ID for the entity to load 11847 * @lst: the return value for the set of parsed nodes 11848 * 11849 * Parse an external general entity 11850 * An external general parsed entity is well-formed if it matches the 11851 * production labeled extParsedEnt. 11852 * 11853 * [78] extParsedEnt ::= TextDecl? content 11854 * 11855 * Returns 0 if the entity is well formed, -1 in case of args problem and 11856 * the parser error code otherwise 11857 */ 11858 11859int 11860xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 11861 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 11862 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 11863 ID, lst)); 11864} 11865 11866/** 11867 * xmlParseBalancedChunkMemory: 11868 * @doc: the document the chunk pertains to 11869 * @sax: the SAX handler bloc (possibly NULL) 11870 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11871 * @depth: Used for loop detection, use 0 11872 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11873 * @lst: the return value for the set of parsed nodes 11874 * 11875 * Parse a well-balanced chunk of an XML document 11876 * called by the parser 11877 * The allowed sequence for the Well Balanced Chunk is the one defined by 11878 * the content production in the XML grammar: 11879 * 11880 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11881 * 11882 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11883 * the parser error code otherwise 11884 */ 11885 11886int 11887xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11888 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 11889 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 11890 depth, string, lst, 0 ); 11891} 11892#endif /* LIBXML_SAX1_ENABLED */ 11893 11894/** 11895 * xmlParseBalancedChunkMemoryInternal: 11896 * @oldctxt: the existing parsing context 11897 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11898 * @user_data: the user data field for the parser context 11899 * @lst: the return value for the set of parsed nodes 11900 * 11901 * 11902 * Parse a well-balanced chunk of an XML document 11903 * called by the parser 11904 * The allowed sequence for the Well Balanced Chunk is the one defined by 11905 * the content production in the XML grammar: 11906 * 11907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11908 * 11909 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 11910 * error code otherwise 11911 * 11912 * In case recover is set to 1, the nodelist will not be empty even if 11913 * the parsed chunk is not well balanced. 11914 */ 11915static xmlParserErrors 11916xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 11917 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 11918 xmlParserCtxtPtr ctxt; 11919 xmlDocPtr newDoc = NULL; 11920 xmlNodePtr newRoot; 11921 xmlSAXHandlerPtr oldsax = NULL; 11922 xmlNodePtr content = NULL; 11923 xmlNodePtr last = NULL; 11924 int size; 11925 xmlParserErrors ret = XML_ERR_OK; 11926 11927 if (oldctxt->depth > 40) { 11928 return(XML_ERR_ENTITY_LOOP); 11929 } 11930 11931 11932 if (lst != NULL) 11933 *lst = NULL; 11934 if (string == NULL) 11935 return(XML_ERR_INTERNAL_ERROR); 11936 11937 size = xmlStrlen(string); 11938 11939 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11940 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11941 if (user_data != NULL) 11942 ctxt->userData = user_data; 11943 else 11944 ctxt->userData = ctxt; 11945 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11946 ctxt->dict = oldctxt->dict; 11947 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11948 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11949 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11950 11951 oldsax = ctxt->sax; 11952 ctxt->sax = oldctxt->sax; 11953 xmlDetectSAX2(ctxt); 11954 ctxt->replaceEntities = oldctxt->replaceEntities; 11955 ctxt->options = oldctxt->options; 11956 11957 ctxt->_private = oldctxt->_private; 11958 if (oldctxt->myDoc == NULL) { 11959 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11960 if (newDoc == NULL) { 11961 ctxt->sax = oldsax; 11962 ctxt->dict = NULL; 11963 xmlFreeParserCtxt(ctxt); 11964 return(XML_ERR_INTERNAL_ERROR); 11965 } 11966 newDoc->dict = ctxt->dict; 11967 xmlDictReference(newDoc->dict); 11968 ctxt->myDoc = newDoc; 11969 } else { 11970 ctxt->myDoc = oldctxt->myDoc; 11971 content = ctxt->myDoc->children; 11972 last = ctxt->myDoc->last; 11973 } 11974 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 11975 if (newRoot == NULL) { 11976 ctxt->sax = oldsax; 11977 ctxt->dict = NULL; 11978 xmlFreeParserCtxt(ctxt); 11979 if (newDoc != NULL) { 11980 xmlFreeDoc(newDoc); 11981 } 11982 return(XML_ERR_INTERNAL_ERROR); 11983 } 11984 ctxt->myDoc->children = NULL; 11985 ctxt->myDoc->last = NULL; 11986 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 11987 nodePush(ctxt, ctxt->myDoc->children); 11988 ctxt->instate = XML_PARSER_CONTENT; 11989 ctxt->depth = oldctxt->depth + 1; 11990 11991 ctxt->validate = 0; 11992 ctxt->loadsubset = oldctxt->loadsubset; 11993 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 11994 /* 11995 * ID/IDREF registration will be done in xmlValidateElement below 11996 */ 11997 ctxt->loadsubset |= XML_SKIP_IDS; 11998 } 11999 ctxt->dictNames = oldctxt->dictNames; 12000 ctxt->attsDefault = oldctxt->attsDefault; 12001 ctxt->attsSpecial = oldctxt->attsSpecial; 12002 12003 xmlParseContent(ctxt); 12004 if ((RAW == '<') && (NXT(1) == '/')) { 12005 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12006 } else if (RAW != 0) { 12007 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12008 } 12009 if (ctxt->node != ctxt->myDoc->children) { 12010 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12011 } 12012 12013 if (!ctxt->wellFormed) { 12014 if (ctxt->errNo == 0) 12015 ret = XML_ERR_INTERNAL_ERROR; 12016 else 12017 ret = (xmlParserErrors)ctxt->errNo; 12018 } else { 12019 ret = XML_ERR_OK; 12020 } 12021 12022 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12023 xmlNodePtr cur; 12024 12025 /* 12026 * Return the newly created nodeset after unlinking it from 12027 * they pseudo parent. 12028 */ 12029 cur = ctxt->myDoc->children->children; 12030 *lst = cur; 12031 while (cur != NULL) { 12032#ifdef LIBXML_VALID_ENABLED 12033 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12034 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12035 (cur->type == XML_ELEMENT_NODE)) { 12036 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12037 oldctxt->myDoc, cur); 12038 } 12039#endif /* LIBXML_VALID_ENABLED */ 12040 cur->parent = NULL; 12041 cur = cur->next; 12042 } 12043 ctxt->myDoc->children->children = NULL; 12044 } 12045 if (ctxt->myDoc != NULL) { 12046 xmlFreeNode(ctxt->myDoc->children); 12047 ctxt->myDoc->children = content; 12048 ctxt->myDoc->last = last; 12049 } 12050 12051 ctxt->sax = oldsax; 12052 ctxt->dict = NULL; 12053 ctxt->attsDefault = NULL; 12054 ctxt->attsSpecial = NULL; 12055 xmlFreeParserCtxt(ctxt); 12056 if (newDoc != NULL) { 12057 xmlFreeDoc(newDoc); 12058 } 12059 12060 return(ret); 12061} 12062 12063/** 12064 * xmlParseInNodeContext: 12065 * @node: the context node 12066 * @data: the input string 12067 * @datalen: the input string length in bytes 12068 * @options: a combination of xmlParserOption 12069 * @lst: the return value for the set of parsed nodes 12070 * 12071 * Parse a well-balanced chunk of an XML document 12072 * within the context (DTD, namespaces, etc ...) of the given node. 12073 * 12074 * The allowed sequence for the data is a Well Balanced Chunk defined by 12075 * the content production in the XML grammar: 12076 * 12077 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12078 * 12079 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12080 * error code otherwise 12081 */ 12082xmlParserErrors 12083xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12084 int options, xmlNodePtr *lst) { 12085#ifdef SAX2 12086 xmlParserCtxtPtr ctxt; 12087 xmlDocPtr doc = NULL; 12088 xmlNodePtr fake, cur; 12089 int nsnr = 0; 12090 12091 xmlParserErrors ret = XML_ERR_OK; 12092 12093 /* 12094 * check all input parameters, grab the document 12095 */ 12096 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 12097 return(XML_ERR_INTERNAL_ERROR); 12098 switch (node->type) { 12099 case XML_ELEMENT_NODE: 12100 case XML_ATTRIBUTE_NODE: 12101 case XML_TEXT_NODE: 12102 case XML_CDATA_SECTION_NODE: 12103 case XML_ENTITY_REF_NODE: 12104 case XML_PI_NODE: 12105 case XML_COMMENT_NODE: 12106 case XML_DOCUMENT_NODE: 12107 case XML_HTML_DOCUMENT_NODE: 12108 break; 12109 default: 12110 return(XML_ERR_INTERNAL_ERROR); 12111 12112 } 12113 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 12114 (node->type != XML_DOCUMENT_NODE) && 12115 (node->type != XML_HTML_DOCUMENT_NODE)) 12116 node = node->parent; 12117 if (node == NULL) 12118 return(XML_ERR_INTERNAL_ERROR); 12119 if (node->type == XML_ELEMENT_NODE) 12120 doc = node->doc; 12121 else 12122 doc = (xmlDocPtr) node; 12123 if (doc == NULL) 12124 return(XML_ERR_INTERNAL_ERROR); 12125 12126 /* 12127 * allocate a context and set-up everything not related to the 12128 * node position in the tree 12129 */ 12130 if (doc->type == XML_DOCUMENT_NODE) 12131 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 12132#ifdef LIBXML_HTML_ENABLED 12133 else if (doc->type == XML_HTML_DOCUMENT_NODE) 12134 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 12135#endif 12136 else 12137 return(XML_ERR_INTERNAL_ERROR); 12138 12139 if (ctxt == NULL) 12140 return(XML_ERR_NO_MEMORY); 12141 fake = xmlNewComment(NULL); 12142 if (fake == NULL) { 12143 xmlFreeParserCtxt(ctxt); 12144 return(XML_ERR_NO_MEMORY); 12145 } 12146 xmlAddChild(node, fake); 12147 12148 /* 12149 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 12150 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 12151 * we must wait until the last moment to free the original one. 12152 */ 12153 if (doc->dict != NULL) { 12154 if (ctxt->dict != NULL) 12155 xmlDictFree(ctxt->dict); 12156 ctxt->dict = doc->dict; 12157 } else 12158 options |= XML_PARSE_NODICT; 12159 12160 xmlCtxtUseOptions(ctxt, options); 12161 xmlDetectSAX2(ctxt); 12162 ctxt->myDoc = doc; 12163 12164 if (node->type == XML_ELEMENT_NODE) { 12165 nodePush(ctxt, node); 12166 /* 12167 * initialize the SAX2 namespaces stack 12168 */ 12169 cur = node; 12170 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 12171 xmlNsPtr ns = cur->nsDef; 12172 const xmlChar *iprefix, *ihref; 12173 12174 while (ns != NULL) { 12175 if (ctxt->dict) { 12176 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 12177 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 12178 } else { 12179 iprefix = ns->prefix; 12180 ihref = ns->href; 12181 } 12182 12183 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 12184 nsPush(ctxt, iprefix, ihref); 12185 nsnr++; 12186 } 12187 ns = ns->next; 12188 } 12189 cur = cur->parent; 12190 } 12191 ctxt->instate = XML_PARSER_CONTENT; 12192 } 12193 12194 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 12195 /* 12196 * ID/IDREF registration will be done in xmlValidateElement below 12197 */ 12198 ctxt->loadsubset |= XML_SKIP_IDS; 12199 } 12200 12201#ifdef LIBXML_HTML_ENABLED 12202 if (doc->type == XML_HTML_DOCUMENT_NODE) 12203 __htmlParseContent(ctxt); 12204 else 12205#endif 12206 xmlParseContent(ctxt); 12207 12208 nsPop(ctxt, nsnr); 12209 if ((RAW == '<') && (NXT(1) == '/')) { 12210 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12211 } else if (RAW != 0) { 12212 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12213 } 12214 if ((ctxt->node != NULL) && (ctxt->node != node)) { 12215 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12216 ctxt->wellFormed = 0; 12217 } 12218 12219 if (!ctxt->wellFormed) { 12220 if (ctxt->errNo == 0) 12221 ret = XML_ERR_INTERNAL_ERROR; 12222 else 12223 ret = (xmlParserErrors)ctxt->errNo; 12224 } else { 12225 ret = XML_ERR_OK; 12226 } 12227 12228 /* 12229 * Return the newly created nodeset after unlinking it from 12230 * the pseudo sibling. 12231 */ 12232 12233 cur = fake->next; 12234 fake->next = NULL; 12235 node->last = fake; 12236 12237 if (cur != NULL) { 12238 cur->prev = NULL; 12239 } 12240 12241 *lst = cur; 12242 12243 while (cur != NULL) { 12244 cur->parent = NULL; 12245 cur = cur->next; 12246 } 12247 12248 xmlUnlinkNode(fake); 12249 xmlFreeNode(fake); 12250 12251 12252 if (ret != XML_ERR_OK) { 12253 xmlFreeNodeList(*lst); 12254 *lst = NULL; 12255 } 12256 12257 if (doc->dict != NULL) 12258 ctxt->dict = NULL; 12259 xmlFreeParserCtxt(ctxt); 12260 12261 return(ret); 12262#else /* !SAX2 */ 12263 return(XML_ERR_INTERNAL_ERROR); 12264#endif 12265} 12266 12267#ifdef LIBXML_SAX1_ENABLED 12268/** 12269 * xmlParseBalancedChunkMemoryRecover: 12270 * @doc: the document the chunk pertains to 12271 * @sax: the SAX handler bloc (possibly NULL) 12272 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12273 * @depth: Used for loop detection, use 0 12274 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12275 * @lst: the return value for the set of parsed nodes 12276 * @recover: return nodes even if the data is broken (use 0) 12277 * 12278 * 12279 * Parse a well-balanced chunk of an XML document 12280 * called by the parser 12281 * The allowed sequence for the Well Balanced Chunk is the one defined by 12282 * the content production in the XML grammar: 12283 * 12284 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12285 * 12286 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12287 * the parser error code otherwise 12288 * 12289 * In case recover is set to 1, the nodelist will not be empty even if 12290 * the parsed chunk is not well balanced, assuming the parsing succeeded to 12291 * some extent. 12292 */ 12293int 12294xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12295 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 12296 int recover) { 12297 xmlParserCtxtPtr ctxt; 12298 xmlDocPtr newDoc; 12299 xmlSAXHandlerPtr oldsax = NULL; 12300 xmlNodePtr content, newRoot; 12301 int size; 12302 int ret = 0; 12303 12304 if (depth > 40) { 12305 return(XML_ERR_ENTITY_LOOP); 12306 } 12307 12308 12309 if (lst != NULL) 12310 *lst = NULL; 12311 if (string == NULL) 12312 return(-1); 12313 12314 size = xmlStrlen(string); 12315 12316 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12317 if (ctxt == NULL) return(-1); 12318 ctxt->userData = ctxt; 12319 if (sax != NULL) { 12320 oldsax = ctxt->sax; 12321 ctxt->sax = sax; 12322 if (user_data != NULL) 12323 ctxt->userData = user_data; 12324 } 12325 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12326 if (newDoc == NULL) { 12327 xmlFreeParserCtxt(ctxt); 12328 return(-1); 12329 } 12330 if ((doc != NULL) && (doc->dict != NULL)) { 12331 xmlDictFree(ctxt->dict); 12332 ctxt->dict = doc->dict; 12333 xmlDictReference(ctxt->dict); 12334 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12335 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12336 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12337 ctxt->dictNames = 1; 12338 } else { 12339 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); 12340 } 12341 if (doc != NULL) { 12342 newDoc->intSubset = doc->intSubset; 12343 newDoc->extSubset = doc->extSubset; 12344 } 12345 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12346 if (newRoot == NULL) { 12347 if (sax != NULL) 12348 ctxt->sax = oldsax; 12349 xmlFreeParserCtxt(ctxt); 12350 newDoc->intSubset = NULL; 12351 newDoc->extSubset = NULL; 12352 xmlFreeDoc(newDoc); 12353 return(-1); 12354 } 12355 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12356 nodePush(ctxt, newRoot); 12357 if (doc == NULL) { 12358 ctxt->myDoc = newDoc; 12359 } else { 12360 ctxt->myDoc = newDoc; 12361 newDoc->children->doc = doc; 12362 /* Ensure that doc has XML spec namespace */ 12363 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 12364 newDoc->oldNs = doc->oldNs; 12365 } 12366 ctxt->instate = XML_PARSER_CONTENT; 12367 ctxt->depth = depth; 12368 12369 /* 12370 * Doing validity checking on chunk doesn't make sense 12371 */ 12372 ctxt->validate = 0; 12373 ctxt->loadsubset = 0; 12374 xmlDetectSAX2(ctxt); 12375 12376 if ( doc != NULL ){ 12377 content = doc->children; 12378 doc->children = NULL; 12379 xmlParseContent(ctxt); 12380 doc->children = content; 12381 } 12382 else { 12383 xmlParseContent(ctxt); 12384 } 12385 if ((RAW == '<') && (NXT(1) == '/')) { 12386 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12387 } else if (RAW != 0) { 12388 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12389 } 12390 if (ctxt->node != newDoc->children) { 12391 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12392 } 12393 12394 if (!ctxt->wellFormed) { 12395 if (ctxt->errNo == 0) 12396 ret = 1; 12397 else 12398 ret = ctxt->errNo; 12399 } else { 12400 ret = 0; 12401 } 12402 12403 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 12404 xmlNodePtr cur; 12405 12406 /* 12407 * Return the newly created nodeset after unlinking it from 12408 * they pseudo parent. 12409 */ 12410 cur = newDoc->children->children; 12411 *lst = cur; 12412 while (cur != NULL) { 12413 xmlSetTreeDoc(cur, doc); 12414 cur->parent = NULL; 12415 cur = cur->next; 12416 } 12417 newDoc->children->children = NULL; 12418 } 12419 12420 if (sax != NULL) 12421 ctxt->sax = oldsax; 12422 xmlFreeParserCtxt(ctxt); 12423 newDoc->intSubset = NULL; 12424 newDoc->extSubset = NULL; 12425 newDoc->oldNs = NULL; 12426 xmlFreeDoc(newDoc); 12427 12428 return(ret); 12429} 12430 12431/** 12432 * xmlSAXParseEntity: 12433 * @sax: the SAX handler block 12434 * @filename: the filename 12435 * 12436 * parse an XML external entity out of context and build a tree. 12437 * It use the given SAX function block to handle the parsing callback. 12438 * If sax is NULL, fallback to the default DOM tree building routines. 12439 * 12440 * [78] extParsedEnt ::= TextDecl? content 12441 * 12442 * This correspond to a "Well Balanced" chunk 12443 * 12444 * Returns the resulting document tree 12445 */ 12446 12447xmlDocPtr 12448xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 12449 xmlDocPtr ret; 12450 xmlParserCtxtPtr ctxt; 12451 12452 ctxt = xmlCreateFileParserCtxt(filename); 12453 if (ctxt == NULL) { 12454 return(NULL); 12455 } 12456 if (sax != NULL) { 12457 if (ctxt->sax != NULL) 12458 xmlFree(ctxt->sax); 12459 ctxt->sax = sax; 12460 ctxt->userData = NULL; 12461 } 12462 12463 xmlParseExtParsedEnt(ctxt); 12464 12465 if (ctxt->wellFormed) 12466 ret = ctxt->myDoc; 12467 else { 12468 ret = NULL; 12469 xmlFreeDoc(ctxt->myDoc); 12470 ctxt->myDoc = NULL; 12471 } 12472 if (sax != NULL) 12473 ctxt->sax = NULL; 12474 xmlFreeParserCtxt(ctxt); 12475 12476 return(ret); 12477} 12478 12479/** 12480 * xmlParseEntity: 12481 * @filename: the filename 12482 * 12483 * parse an XML external entity out of context and build a tree. 12484 * 12485 * [78] extParsedEnt ::= TextDecl? content 12486 * 12487 * This correspond to a "Well Balanced" chunk 12488 * 12489 * Returns the resulting document tree 12490 */ 12491 12492xmlDocPtr 12493xmlParseEntity(const char *filename) { 12494 return(xmlSAXParseEntity(NULL, filename)); 12495} 12496#endif /* LIBXML_SAX1_ENABLED */ 12497 12498/** 12499 * xmlCreateEntityParserCtxt: 12500 * @URL: the entity URL 12501 * @ID: the entity PUBLIC ID 12502 * @base: a possible base for the target URI 12503 * 12504 * Create a parser context for an external entity 12505 * Automatic support for ZLIB/Compress compressed document is provided 12506 * by default if found at compile-time. 12507 * 12508 * Returns the new parser context or NULL 12509 */ 12510xmlParserCtxtPtr 12511xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 12512 const xmlChar *base) { 12513 xmlParserCtxtPtr ctxt; 12514 xmlParserInputPtr inputStream; 12515 char *directory = NULL; 12516 xmlChar *uri; 12517 12518 ctxt = xmlNewParserCtxt(); 12519 if (ctxt == NULL) { 12520 return(NULL); 12521 } 12522 12523 uri = xmlBuildURI(URL, base); 12524 12525 if (uri == NULL) { 12526 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 12527 if (inputStream == NULL) { 12528 xmlFreeParserCtxt(ctxt); 12529 return(NULL); 12530 } 12531 12532 inputPush(ctxt, inputStream); 12533 12534 if ((ctxt->directory == NULL) && (directory == NULL)) 12535 directory = xmlParserGetDirectory((char *)URL); 12536 if ((ctxt->directory == NULL) && (directory != NULL)) 12537 ctxt->directory = directory; 12538 } else { 12539 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 12540 if (inputStream == NULL) { 12541 xmlFree(uri); 12542 xmlFreeParserCtxt(ctxt); 12543 return(NULL); 12544 } 12545 12546 inputPush(ctxt, inputStream); 12547 12548 if ((ctxt->directory == NULL) && (directory == NULL)) 12549 directory = xmlParserGetDirectory((char *)uri); 12550 if ((ctxt->directory == NULL) && (directory != NULL)) 12551 ctxt->directory = directory; 12552 xmlFree(uri); 12553 } 12554 return(ctxt); 12555} 12556 12557/************************************************************************ 12558 * * 12559 * Front ends when parsing from a file * 12560 * * 12561 ************************************************************************/ 12562 12563/** 12564 * xmlCreateURLParserCtxt: 12565 * @filename: the filename or URL 12566 * @options: a combination of xmlParserOption 12567 * 12568 * Create a parser context for a file or URL content. 12569 * Automatic support for ZLIB/Compress compressed document is provided 12570 * by default if found at compile-time and for file accesses 12571 * 12572 * Returns the new parser context or NULL 12573 */ 12574xmlParserCtxtPtr 12575xmlCreateURLParserCtxt(const char *filename, int options) 12576{ 12577 xmlParserCtxtPtr ctxt; 12578 xmlParserInputPtr inputStream; 12579 char *directory = NULL; 12580 12581 ctxt = xmlNewParserCtxt(); 12582 if (ctxt == NULL) { 12583 xmlErrMemory(NULL, "cannot allocate parser context"); 12584 return(NULL); 12585 } 12586 12587 if (options) 12588 xmlCtxtUseOptions(ctxt, options); 12589 ctxt->linenumbers = 1; 12590 12591 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 12592 if (inputStream == NULL) { 12593 xmlFreeParserCtxt(ctxt); 12594 return(NULL); 12595 } 12596 12597 inputPush(ctxt, inputStream); 12598 if ((ctxt->directory == NULL) && (directory == NULL)) 12599 directory = xmlParserGetDirectory(filename); 12600 if ((ctxt->directory == NULL) && (directory != NULL)) 12601 ctxt->directory = directory; 12602 12603 return(ctxt); 12604} 12605 12606/** 12607 * xmlCreateFileParserCtxt: 12608 * @filename: the filename 12609 * 12610 * Create a parser context for a file content. 12611 * Automatic support for ZLIB/Compress compressed document is provided 12612 * by default if found at compile-time. 12613 * 12614 * Returns the new parser context or NULL 12615 */ 12616xmlParserCtxtPtr 12617xmlCreateFileParserCtxt(const char *filename) 12618{ 12619 return(xmlCreateURLParserCtxt(filename, 0)); 12620} 12621 12622#ifdef LIBXML_SAX1_ENABLED 12623/** 12624 * xmlSAXParseFileWithData: 12625 * @sax: the SAX handler block 12626 * @filename: the filename 12627 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12628 * documents 12629 * @data: the userdata 12630 * 12631 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12632 * compressed document is provided by default if found at compile-time. 12633 * It use the given SAX function block to handle the parsing callback. 12634 * If sax is NULL, fallback to the default DOM tree building routines. 12635 * 12636 * User data (void *) is stored within the parser context in the 12637 * context's _private member, so it is available nearly everywhere in libxml 12638 * 12639 * Returns the resulting document tree 12640 */ 12641 12642xmlDocPtr 12643xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 12644 int recovery, void *data) { 12645 xmlDocPtr ret; 12646 xmlParserCtxtPtr ctxt; 12647 12648 xmlInitParser(); 12649 12650 ctxt = xmlCreateFileParserCtxt(filename); 12651 if (ctxt == NULL) { 12652 return(NULL); 12653 } 12654 if (sax != NULL) { 12655 if (ctxt->sax != NULL) 12656 xmlFree(ctxt->sax); 12657 ctxt->sax = sax; 12658 } 12659 xmlDetectSAX2(ctxt); 12660 if (data!=NULL) { 12661 ctxt->_private = data; 12662 } 12663 12664 if (ctxt->directory == NULL) 12665 ctxt->directory = xmlParserGetDirectory(filename); 12666 12667 ctxt->recovery = recovery; 12668 12669 xmlParseDocument(ctxt); 12670 12671 if ((ctxt->wellFormed) || recovery) { 12672 ret = ctxt->myDoc; 12673 if (ret != NULL) { 12674 if (ctxt->input->buf->compressed > 0) 12675 ret->compression = 9; 12676 else 12677 ret->compression = ctxt->input->buf->compressed; 12678 } 12679 } 12680 else { 12681 ret = NULL; 12682 xmlFreeDoc(ctxt->myDoc); 12683 ctxt->myDoc = NULL; 12684 } 12685 if (sax != NULL) 12686 ctxt->sax = NULL; 12687 xmlFreeParserCtxt(ctxt); 12688 12689 return(ret); 12690} 12691 12692/** 12693 * xmlSAXParseFile: 12694 * @sax: the SAX handler block 12695 * @filename: the filename 12696 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12697 * documents 12698 * 12699 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12700 * compressed document is provided by default if found at compile-time. 12701 * It use the given SAX function block to handle the parsing callback. 12702 * If sax is NULL, fallback to the default DOM tree building routines. 12703 * 12704 * Returns the resulting document tree 12705 */ 12706 12707xmlDocPtr 12708xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 12709 int recovery) { 12710 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 12711} 12712 12713/** 12714 * xmlRecoverDoc: 12715 * @cur: a pointer to an array of xmlChar 12716 * 12717 * parse an XML in-memory document and build a tree. 12718 * In the case the document is not Well Formed, a attempt to build a 12719 * tree is tried anyway 12720 * 12721 * Returns the resulting document tree or NULL in case of failure 12722 */ 12723 12724xmlDocPtr 12725xmlRecoverDoc(xmlChar *cur) { 12726 return(xmlSAXParseDoc(NULL, cur, 1)); 12727} 12728 12729/** 12730 * xmlParseFile: 12731 * @filename: the filename 12732 * 12733 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12734 * compressed document is provided by default if found at compile-time. 12735 * 12736 * Returns the resulting document tree if the file was wellformed, 12737 * NULL otherwise. 12738 */ 12739 12740xmlDocPtr 12741xmlParseFile(const char *filename) { 12742 return(xmlSAXParseFile(NULL, filename, 0)); 12743} 12744 12745/** 12746 * xmlRecoverFile: 12747 * @filename: the filename 12748 * 12749 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12750 * compressed document is provided by default if found at compile-time. 12751 * In the case the document is not Well Formed, it attempts to build 12752 * a tree anyway 12753 * 12754 * Returns the resulting document tree or NULL in case of failure 12755 */ 12756 12757xmlDocPtr 12758xmlRecoverFile(const char *filename) { 12759 return(xmlSAXParseFile(NULL, filename, 1)); 12760} 12761 12762 12763/** 12764 * xmlSetupParserForBuffer: 12765 * @ctxt: an XML parser context 12766 * @buffer: a xmlChar * buffer 12767 * @filename: a file name 12768 * 12769 * Setup the parser context to parse a new buffer; Clears any prior 12770 * contents from the parser context. The buffer parameter must not be 12771 * NULL, but the filename parameter can be 12772 */ 12773void 12774xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 12775 const char* filename) 12776{ 12777 xmlParserInputPtr input; 12778 12779 if ((ctxt == NULL) || (buffer == NULL)) 12780 return; 12781 12782 input = xmlNewInputStream(ctxt); 12783 if (input == NULL) { 12784 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 12785 xmlClearParserCtxt(ctxt); 12786 return; 12787 } 12788 12789 xmlClearParserCtxt(ctxt); 12790 if (filename != NULL) 12791 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 12792 input->base = buffer; 12793 input->cur = buffer; 12794 input->end = &buffer[xmlStrlen(buffer)]; 12795 inputPush(ctxt, input); 12796} 12797 12798/** 12799 * xmlSAXUserParseFile: 12800 * @sax: a SAX handler 12801 * @user_data: The user data returned on SAX callbacks 12802 * @filename: a file name 12803 * 12804 * parse an XML file and call the given SAX handler routines. 12805 * Automatic support for ZLIB/Compress compressed document is provided 12806 * 12807 * Returns 0 in case of success or a error number otherwise 12808 */ 12809int 12810xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 12811 const char *filename) { 12812 int ret = 0; 12813 xmlParserCtxtPtr ctxt; 12814 12815 ctxt = xmlCreateFileParserCtxt(filename); 12816 if (ctxt == NULL) return -1; 12817 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12818 xmlFree(ctxt->sax); 12819 ctxt->sax = sax; 12820 xmlDetectSAX2(ctxt); 12821 12822 if (user_data != NULL) 12823 ctxt->userData = user_data; 12824 12825 xmlParseDocument(ctxt); 12826 12827 if (ctxt->wellFormed) 12828 ret = 0; 12829 else { 12830 if (ctxt->errNo != 0) 12831 ret = ctxt->errNo; 12832 else 12833 ret = -1; 12834 } 12835 if (sax != NULL) 12836 ctxt->sax = NULL; 12837 if (ctxt->myDoc != NULL) { 12838 xmlFreeDoc(ctxt->myDoc); 12839 ctxt->myDoc = NULL; 12840 } 12841 xmlFreeParserCtxt(ctxt); 12842 12843 return ret; 12844} 12845#endif /* LIBXML_SAX1_ENABLED */ 12846 12847/************************************************************************ 12848 * * 12849 * Front ends when parsing from memory * 12850 * * 12851 ************************************************************************/ 12852 12853/** 12854 * xmlCreateMemoryParserCtxt: 12855 * @buffer: a pointer to a char array 12856 * @size: the size of the array 12857 * 12858 * Create a parser context for an XML in-memory document. 12859 * 12860 * Returns the new parser context or NULL 12861 */ 12862xmlParserCtxtPtr 12863xmlCreateMemoryParserCtxt(const char *buffer, int size) { 12864 xmlParserCtxtPtr ctxt; 12865 xmlParserInputPtr input; 12866 xmlParserInputBufferPtr buf; 12867 12868 if (buffer == NULL) 12869 return(NULL); 12870 if (size <= 0) 12871 return(NULL); 12872 12873 ctxt = xmlNewParserCtxt(); 12874 if (ctxt == NULL) 12875 return(NULL); 12876 12877 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 12878 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12879 if (buf == NULL) { 12880 xmlFreeParserCtxt(ctxt); 12881 return(NULL); 12882 } 12883 12884 input = xmlNewInputStream(ctxt); 12885 if (input == NULL) { 12886 xmlFreeParserInputBuffer(buf); 12887 xmlFreeParserCtxt(ctxt); 12888 return(NULL); 12889 } 12890 12891 input->filename = NULL; 12892 input->buf = buf; 12893 input->base = input->buf->buffer->content; 12894 input->cur = input->buf->buffer->content; 12895 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 12896 12897 inputPush(ctxt, input); 12898 return(ctxt); 12899} 12900 12901#ifdef LIBXML_SAX1_ENABLED 12902/** 12903 * xmlSAXParseMemoryWithData: 12904 * @sax: the SAX handler block 12905 * @buffer: an pointer to a char array 12906 * @size: the size of the array 12907 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12908 * documents 12909 * @data: the userdata 12910 * 12911 * parse an XML in-memory block and use the given SAX function block 12912 * to handle the parsing callback. If sax is NULL, fallback to the default 12913 * DOM tree building routines. 12914 * 12915 * User data (void *) is stored within the parser context in the 12916 * context's _private member, so it is available nearly everywhere in libxml 12917 * 12918 * Returns the resulting document tree 12919 */ 12920 12921xmlDocPtr 12922xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 12923 int size, int recovery, void *data) { 12924 xmlDocPtr ret; 12925 xmlParserCtxtPtr ctxt; 12926 12927 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12928 if (ctxt == NULL) return(NULL); 12929 if (sax != NULL) { 12930 if (ctxt->sax != NULL) 12931 xmlFree(ctxt->sax); 12932 ctxt->sax = sax; 12933 } 12934 xmlDetectSAX2(ctxt); 12935 if (data!=NULL) { 12936 ctxt->_private=data; 12937 } 12938 12939 ctxt->recovery = recovery; 12940 12941 xmlParseDocument(ctxt); 12942 12943 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 12944 else { 12945 ret = NULL; 12946 xmlFreeDoc(ctxt->myDoc); 12947 ctxt->myDoc = NULL; 12948 } 12949 if (sax != NULL) 12950 ctxt->sax = NULL; 12951 xmlFreeParserCtxt(ctxt); 12952 12953 return(ret); 12954} 12955 12956/** 12957 * xmlSAXParseMemory: 12958 * @sax: the SAX handler block 12959 * @buffer: an pointer to a char array 12960 * @size: the size of the array 12961 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 12962 * documents 12963 * 12964 * parse an XML in-memory block and use the given SAX function block 12965 * to handle the parsing callback. If sax is NULL, fallback to the default 12966 * DOM tree building routines. 12967 * 12968 * Returns the resulting document tree 12969 */ 12970xmlDocPtr 12971xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 12972 int size, int recovery) { 12973 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 12974} 12975 12976/** 12977 * xmlParseMemory: 12978 * @buffer: an pointer to a char array 12979 * @size: the size of the array 12980 * 12981 * parse an XML in-memory block and build a tree. 12982 * 12983 * Returns the resulting document tree 12984 */ 12985 12986xmlDocPtr xmlParseMemory(const char *buffer, int size) { 12987 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 12988} 12989 12990/** 12991 * xmlRecoverMemory: 12992 * @buffer: an pointer to a char array 12993 * @size: the size of the array 12994 * 12995 * parse an XML in-memory block and build a tree. 12996 * In the case the document is not Well Formed, an attempt to 12997 * build a tree is tried anyway 12998 * 12999 * Returns the resulting document tree or NULL in case of error 13000 */ 13001 13002xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13003 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13004} 13005 13006/** 13007 * xmlSAXUserParseMemory: 13008 * @sax: a SAX handler 13009 * @user_data: The user data returned on SAX callbacks 13010 * @buffer: an in-memory XML document input 13011 * @size: the length of the XML document in bytes 13012 * 13013 * A better SAX parsing routine. 13014 * parse an XML in-memory buffer and call the given SAX handler routines. 13015 * 13016 * Returns 0 in case of success or a error number otherwise 13017 */ 13018int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13019 const char *buffer, int size) { 13020 int ret = 0; 13021 xmlParserCtxtPtr ctxt; 13022 13023 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13024 if (ctxt == NULL) return -1; 13025 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13026 xmlFree(ctxt->sax); 13027 ctxt->sax = sax; 13028 xmlDetectSAX2(ctxt); 13029 13030 if (user_data != NULL) 13031 ctxt->userData = user_data; 13032 13033 xmlParseDocument(ctxt); 13034 13035 if (ctxt->wellFormed) 13036 ret = 0; 13037 else { 13038 if (ctxt->errNo != 0) 13039 ret = ctxt->errNo; 13040 else 13041 ret = -1; 13042 } 13043 if (sax != NULL) 13044 ctxt->sax = NULL; 13045 if (ctxt->myDoc != NULL) { 13046 xmlFreeDoc(ctxt->myDoc); 13047 ctxt->myDoc = NULL; 13048 } 13049 xmlFreeParserCtxt(ctxt); 13050 13051 return ret; 13052} 13053#endif /* LIBXML_SAX1_ENABLED */ 13054 13055/** 13056 * xmlCreateDocParserCtxt: 13057 * @cur: a pointer to an array of xmlChar 13058 * 13059 * Creates a parser context for an XML in-memory document. 13060 * 13061 * Returns the new parser context or NULL 13062 */ 13063xmlParserCtxtPtr 13064xmlCreateDocParserCtxt(const xmlChar *cur) { 13065 int len; 13066 13067 if (cur == NULL) 13068 return(NULL); 13069 len = xmlStrlen(cur); 13070 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 13071} 13072 13073#ifdef LIBXML_SAX1_ENABLED 13074/** 13075 * xmlSAXParseDoc: 13076 * @sax: the SAX handler block 13077 * @cur: a pointer to an array of xmlChar 13078 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13079 * documents 13080 * 13081 * parse an XML in-memory document and build a tree. 13082 * It use the given SAX function block to handle the parsing callback. 13083 * If sax is NULL, fallback to the default DOM tree building routines. 13084 * 13085 * Returns the resulting document tree 13086 */ 13087 13088xmlDocPtr 13089xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 13090 xmlDocPtr ret; 13091 xmlParserCtxtPtr ctxt; 13092 xmlSAXHandlerPtr oldsax = NULL; 13093 13094 if (cur == NULL) return(NULL); 13095 13096 13097 ctxt = xmlCreateDocParserCtxt(cur); 13098 if (ctxt == NULL) return(NULL); 13099 if (sax != NULL) { 13100 oldsax = ctxt->sax; 13101 ctxt->sax = sax; 13102 ctxt->userData = NULL; 13103 } 13104 xmlDetectSAX2(ctxt); 13105 13106 xmlParseDocument(ctxt); 13107 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13108 else { 13109 ret = NULL; 13110 xmlFreeDoc(ctxt->myDoc); 13111 ctxt->myDoc = NULL; 13112 } 13113 if (sax != NULL) 13114 ctxt->sax = oldsax; 13115 xmlFreeParserCtxt(ctxt); 13116 13117 return(ret); 13118} 13119 13120/** 13121 * xmlParseDoc: 13122 * @cur: a pointer to an array of xmlChar 13123 * 13124 * parse an XML in-memory document and build a tree. 13125 * 13126 * Returns the resulting document tree 13127 */ 13128 13129xmlDocPtr 13130xmlParseDoc(const xmlChar *cur) { 13131 return(xmlSAXParseDoc(NULL, cur, 0)); 13132} 13133#endif /* LIBXML_SAX1_ENABLED */ 13134 13135#ifdef LIBXML_LEGACY_ENABLED 13136/************************************************************************ 13137 * * 13138 * Specific function to keep track of entities references * 13139 * and used by the XSLT debugger * 13140 * * 13141 ************************************************************************/ 13142 13143static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 13144 13145/** 13146 * xmlAddEntityReference: 13147 * @ent : A valid entity 13148 * @firstNode : A valid first node for children of entity 13149 * @lastNode : A valid last node of children entity 13150 * 13151 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 13152 */ 13153static void 13154xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 13155 xmlNodePtr lastNode) 13156{ 13157 if (xmlEntityRefFunc != NULL) { 13158 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 13159 } 13160} 13161 13162 13163/** 13164 * xmlSetEntityReferenceFunc: 13165 * @func: A valid function 13166 * 13167 * Set the function to call call back when a xml reference has been made 13168 */ 13169void 13170xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 13171{ 13172 xmlEntityRefFunc = func; 13173} 13174#endif /* LIBXML_LEGACY_ENABLED */ 13175 13176/************************************************************************ 13177 * * 13178 * Miscellaneous * 13179 * * 13180 ************************************************************************/ 13181 13182#ifdef LIBXML_XPATH_ENABLED 13183#include <libxml/xpath.h> 13184#endif 13185 13186extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 13187static int xmlParserInitialized = 0; 13188 13189/** 13190 * xmlInitParser: 13191 * 13192 * Initialization function for the XML parser. 13193 * This is not reentrant. Call once before processing in case of 13194 * use in multithreaded programs. 13195 */ 13196 13197void 13198xmlInitParser(void) { 13199 if (xmlParserInitialized != 0) 13200 return; 13201 13202#ifdef LIBXML_THREAD_ENABLED 13203 __xmlGlobalInitMutexLock(); 13204 if (xmlParserInitialized == 0) { 13205#endif 13206 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 13207 (xmlGenericError == NULL)) 13208 initGenericErrorDefaultFunc(NULL); 13209 xmlInitGlobals(); 13210 xmlInitThreads(); 13211 xmlInitMemory(); 13212 xmlInitCharEncodingHandlers(); 13213 xmlDefaultSAXHandlerInit(); 13214 xmlRegisterDefaultInputCallbacks(); 13215#ifdef LIBXML_OUTPUT_ENABLED 13216 xmlRegisterDefaultOutputCallbacks(); 13217#endif /* LIBXML_OUTPUT_ENABLED */ 13218#ifdef LIBXML_HTML_ENABLED 13219 htmlInitAutoClose(); 13220 htmlDefaultSAXHandlerInit(); 13221#endif 13222#ifdef LIBXML_XPATH_ENABLED 13223 xmlXPathInit(); 13224#endif 13225 xmlParserInitialized = 1; 13226#ifdef LIBXML_THREAD_ENABLED 13227 } 13228 __xmlGlobalInitMutexUnlock(); 13229#endif 13230} 13231 13232/** 13233 * xmlCleanupParser: 13234 * 13235 * This function name is somewhat misleading. It does not clean up 13236 * parser state, it cleans up memory allocated by the library itself. 13237 * It is a cleanup function for the XML library. It tries to reclaim all 13238 * related global memory allocated for the library processing. 13239 * It doesn't deallocate any document related memory. One should 13240 * call xmlCleanupParser() only when the process has finished using 13241 * the library and all XML/HTML documents built with it. 13242 * See also xmlInitParser() which has the opposite function of preparing 13243 * the library for operations. 13244 */ 13245 13246void 13247xmlCleanupParser(void) { 13248 if (!xmlParserInitialized) 13249 return; 13250 13251 xmlCleanupCharEncodingHandlers(); 13252#ifdef LIBXML_CATALOG_ENABLED 13253 xmlCatalogCleanup(); 13254#endif 13255 xmlDictCleanup(); 13256 xmlCleanupInputCallbacks(); 13257#ifdef LIBXML_OUTPUT_ENABLED 13258 xmlCleanupOutputCallbacks(); 13259#endif 13260#ifdef LIBXML_SCHEMAS_ENABLED 13261 xmlSchemaCleanupTypes(); 13262 xmlRelaxNGCleanupTypes(); 13263#endif 13264 xmlCleanupGlobals(); 13265 xmlResetLastError(); 13266 xmlCleanupThreads(); /* must be last if called not from the main thread */ 13267 xmlCleanupMemory(); 13268 xmlParserInitialized = 0; 13269} 13270 13271/************************************************************************ 13272 * * 13273 * New set (2.6.0) of simpler and more flexible APIs * 13274 * * 13275 ************************************************************************/ 13276 13277/** 13278 * DICT_FREE: 13279 * @str: a string 13280 * 13281 * Free a string if it is not owned by the "dict" dictionnary in the 13282 * current scope 13283 */ 13284#define DICT_FREE(str) \ 13285 if ((str) && ((!dict) || \ 13286 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 13287 xmlFree((char *)(str)); 13288 13289/** 13290 * xmlCtxtReset: 13291 * @ctxt: an XML parser context 13292 * 13293 * Reset a parser context 13294 */ 13295void 13296xmlCtxtReset(xmlParserCtxtPtr ctxt) 13297{ 13298 xmlParserInputPtr input; 13299 xmlDictPtr dict; 13300 13301 if (ctxt == NULL) 13302 return; 13303 13304 dict = ctxt->dict; 13305 13306 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 13307 xmlFreeInputStream(input); 13308 } 13309 ctxt->inputNr = 0; 13310 ctxt->input = NULL; 13311 13312 ctxt->spaceNr = 0; 13313 if (ctxt->spaceTab != NULL) { 13314 ctxt->spaceTab[0] = -1; 13315 ctxt->space = &ctxt->spaceTab[0]; 13316 } else { 13317 ctxt->space = NULL; 13318 } 13319 13320 13321 ctxt->nodeNr = 0; 13322 ctxt->node = NULL; 13323 13324 ctxt->nameNr = 0; 13325 ctxt->name = NULL; 13326 13327 DICT_FREE(ctxt->version); 13328 ctxt->version = NULL; 13329 DICT_FREE(ctxt->encoding); 13330 ctxt->encoding = NULL; 13331 DICT_FREE(ctxt->directory); 13332 ctxt->directory = NULL; 13333 DICT_FREE(ctxt->extSubURI); 13334 ctxt->extSubURI = NULL; 13335 DICT_FREE(ctxt->extSubSystem); 13336 ctxt->extSubSystem = NULL; 13337 if (ctxt->myDoc != NULL) 13338 xmlFreeDoc(ctxt->myDoc); 13339 ctxt->myDoc = NULL; 13340 13341 ctxt->standalone = -1; 13342 ctxt->hasExternalSubset = 0; 13343 ctxt->hasPErefs = 0; 13344 ctxt->html = 0; 13345 ctxt->external = 0; 13346 ctxt->instate = XML_PARSER_START; 13347 ctxt->token = 0; 13348 13349 ctxt->wellFormed = 1; 13350 ctxt->nsWellFormed = 1; 13351 ctxt->disableSAX = 0; 13352 ctxt->valid = 1; 13353#if 0 13354 ctxt->vctxt.userData = ctxt; 13355 ctxt->vctxt.error = xmlParserValidityError; 13356 ctxt->vctxt.warning = xmlParserValidityWarning; 13357#endif 13358 ctxt->record_info = 0; 13359 ctxt->nbChars = 0; 13360 ctxt->checkIndex = 0; 13361 ctxt->inSubset = 0; 13362 ctxt->errNo = XML_ERR_OK; 13363 ctxt->depth = 0; 13364 ctxt->charset = XML_CHAR_ENCODING_UTF8; 13365 ctxt->catalogs = NULL; 13366 xmlInitNodeInfoSeq(&ctxt->node_seq); 13367 13368 if (ctxt->attsDefault != NULL) { 13369 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 13370 ctxt->attsDefault = NULL; 13371 } 13372 if (ctxt->attsSpecial != NULL) { 13373 xmlHashFree(ctxt->attsSpecial, NULL); 13374 ctxt->attsSpecial = NULL; 13375 } 13376 13377#ifdef LIBXML_CATALOG_ENABLED 13378 if (ctxt->catalogs != NULL) 13379 xmlCatalogFreeLocal(ctxt->catalogs); 13380#endif 13381 if (ctxt->lastError.code != XML_ERR_OK) 13382 xmlResetError(&ctxt->lastError); 13383} 13384 13385/** 13386 * xmlCtxtResetPush: 13387 * @ctxt: an XML parser context 13388 * @chunk: a pointer to an array of chars 13389 * @size: number of chars in the array 13390 * @filename: an optional file name or URI 13391 * @encoding: the document encoding, or NULL 13392 * 13393 * Reset a push parser context 13394 * 13395 * Returns 0 in case of success and 1 in case of error 13396 */ 13397int 13398xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 13399 int size, const char *filename, const char *encoding) 13400{ 13401 xmlParserInputPtr inputStream; 13402 xmlParserInputBufferPtr buf; 13403 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 13404 13405 if (ctxt == NULL) 13406 return(1); 13407 13408 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 13409 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 13410 13411 buf = xmlAllocParserInputBuffer(enc); 13412 if (buf == NULL) 13413 return(1); 13414 13415 if (ctxt == NULL) { 13416 xmlFreeParserInputBuffer(buf); 13417 return(1); 13418 } 13419 13420 xmlCtxtReset(ctxt); 13421 13422 if (ctxt->pushTab == NULL) { 13423 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 13424 sizeof(xmlChar *)); 13425 if (ctxt->pushTab == NULL) { 13426 xmlErrMemory(ctxt, NULL); 13427 xmlFreeParserInputBuffer(buf); 13428 return(1); 13429 } 13430 } 13431 13432 if (filename == NULL) { 13433 ctxt->directory = NULL; 13434 } else { 13435 ctxt->directory = xmlParserGetDirectory(filename); 13436 } 13437 13438 inputStream = xmlNewInputStream(ctxt); 13439 if (inputStream == NULL) { 13440 xmlFreeParserInputBuffer(buf); 13441 return(1); 13442 } 13443 13444 if (filename == NULL) 13445 inputStream->filename = NULL; 13446 else 13447 inputStream->filename = (char *) 13448 xmlCanonicPath((const xmlChar *) filename); 13449 inputStream->buf = buf; 13450 inputStream->base = inputStream->buf->buffer->content; 13451 inputStream->cur = inputStream->buf->buffer->content; 13452 inputStream->end = 13453 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 13454 13455 inputPush(ctxt, inputStream); 13456 13457 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 13458 (ctxt->input->buf != NULL)) { 13459 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 13460 int cur = ctxt->input->cur - ctxt->input->base; 13461 13462 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 13463 13464 ctxt->input->base = ctxt->input->buf->buffer->content + base; 13465 ctxt->input->cur = ctxt->input->base + cur; 13466 ctxt->input->end = 13467 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 13468 use]; 13469#ifdef DEBUG_PUSH 13470 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 13471#endif 13472 } 13473 13474 if (encoding != NULL) { 13475 xmlCharEncodingHandlerPtr hdlr; 13476 13477 hdlr = xmlFindCharEncodingHandler(encoding); 13478 if (hdlr != NULL) { 13479 xmlSwitchToEncoding(ctxt, hdlr); 13480 } else { 13481 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 13482 "Unsupported encoding %s\n", BAD_CAST encoding); 13483 } 13484 } else if (enc != XML_CHAR_ENCODING_NONE) { 13485 xmlSwitchEncoding(ctxt, enc); 13486 } 13487 13488 return(0); 13489} 13490 13491/** 13492 * xmlCtxtUseOptions: 13493 * @ctxt: an XML parser context 13494 * @options: a combination of xmlParserOption 13495 * 13496 * Applies the options to the parser context 13497 * 13498 * Returns 0 in case of success, the set of unknown or unimplemented options 13499 * in case of error. 13500 */ 13501int 13502xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 13503{ 13504 if (ctxt == NULL) 13505 return(-1); 13506 if (options & XML_PARSE_RECOVER) { 13507 ctxt->recovery = 1; 13508 options -= XML_PARSE_RECOVER; 13509 } else 13510 ctxt->recovery = 0; 13511 if (options & XML_PARSE_DTDLOAD) { 13512 ctxt->loadsubset = XML_DETECT_IDS; 13513 options -= XML_PARSE_DTDLOAD; 13514 } else 13515 ctxt->loadsubset = 0; 13516 if (options & XML_PARSE_DTDATTR) { 13517 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 13518 options -= XML_PARSE_DTDATTR; 13519 } 13520 if (options & XML_PARSE_NOENT) { 13521 ctxt->replaceEntities = 1; 13522 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 13523 options -= XML_PARSE_NOENT; 13524 } else 13525 ctxt->replaceEntities = 0; 13526 if (options & XML_PARSE_PEDANTIC) { 13527 ctxt->pedantic = 1; 13528 options -= XML_PARSE_PEDANTIC; 13529 } else 13530 ctxt->pedantic = 0; 13531 if (options & XML_PARSE_NOBLANKS) { 13532 ctxt->keepBlanks = 0; 13533 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 13534 options -= XML_PARSE_NOBLANKS; 13535 } else 13536 ctxt->keepBlanks = 1; 13537 if (options & XML_PARSE_DTDVALID) { 13538 ctxt->validate = 1; 13539 if (options & XML_PARSE_NOWARNING) 13540 ctxt->vctxt.warning = NULL; 13541 if (options & XML_PARSE_NOERROR) 13542 ctxt->vctxt.error = NULL; 13543 options -= XML_PARSE_DTDVALID; 13544 } else 13545 ctxt->validate = 0; 13546 if (options & XML_PARSE_NOWARNING) { 13547 ctxt->sax->warning = NULL; 13548 options -= XML_PARSE_NOWARNING; 13549 } 13550 if (options & XML_PARSE_NOERROR) { 13551 ctxt->sax->error = NULL; 13552 ctxt->sax->fatalError = NULL; 13553 options -= XML_PARSE_NOERROR; 13554 } 13555#ifdef LIBXML_SAX1_ENABLED 13556 if (options & XML_PARSE_SAX1) { 13557 ctxt->sax->startElement = xmlSAX2StartElement; 13558 ctxt->sax->endElement = xmlSAX2EndElement; 13559 ctxt->sax->startElementNs = NULL; 13560 ctxt->sax->endElementNs = NULL; 13561 ctxt->sax->initialized = 1; 13562 options -= XML_PARSE_SAX1; 13563 } 13564#endif /* LIBXML_SAX1_ENABLED */ 13565 if (options & XML_PARSE_NODICT) { 13566 ctxt->dictNames = 0; 13567 options -= XML_PARSE_NODICT; 13568 } else { 13569 ctxt->dictNames = 1; 13570 } 13571 if (options & XML_PARSE_NOCDATA) { 13572 ctxt->sax->cdataBlock = NULL; 13573 options -= XML_PARSE_NOCDATA; 13574 } 13575 if (options & XML_PARSE_NSCLEAN) { 13576 ctxt->options |= XML_PARSE_NSCLEAN; 13577 options -= XML_PARSE_NSCLEAN; 13578 } 13579 if (options & XML_PARSE_NONET) { 13580 ctxt->options |= XML_PARSE_NONET; 13581 options -= XML_PARSE_NONET; 13582 } 13583 if (options & XML_PARSE_COMPACT) { 13584 ctxt->options |= XML_PARSE_COMPACT; 13585 options -= XML_PARSE_COMPACT; 13586 } 13587 ctxt->linenumbers = 1; 13588 return (options); 13589} 13590 13591/** 13592 * xmlDoRead: 13593 * @ctxt: an XML parser context 13594 * @URL: the base URL to use for the document 13595 * @encoding: the document encoding, or NULL 13596 * @options: a combination of xmlParserOption 13597 * @reuse: keep the context for reuse 13598 * 13599 * Common front-end for the xmlRead functions 13600 * 13601 * Returns the resulting document tree or NULL 13602 */ 13603static xmlDocPtr 13604xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 13605 int options, int reuse) 13606{ 13607 xmlDocPtr ret; 13608 13609 xmlCtxtUseOptions(ctxt, options); 13610 if (encoding != NULL) { 13611 xmlCharEncodingHandlerPtr hdlr; 13612 13613 hdlr = xmlFindCharEncodingHandler(encoding); 13614 if (hdlr != NULL) 13615 xmlSwitchToEncoding(ctxt, hdlr); 13616 } 13617 if ((URL != NULL) && (ctxt->input != NULL) && 13618 (ctxt->input->filename == NULL)) 13619 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 13620 xmlParseDocument(ctxt); 13621 if ((ctxt->wellFormed) || ctxt->recovery) 13622 ret = ctxt->myDoc; 13623 else { 13624 ret = NULL; 13625 if (ctxt->myDoc != NULL) { 13626 xmlFreeDoc(ctxt->myDoc); 13627 } 13628 } 13629 ctxt->myDoc = NULL; 13630 if (!reuse) { 13631 xmlFreeParserCtxt(ctxt); 13632 } 13633 13634 return (ret); 13635} 13636 13637/** 13638 * xmlReadDoc: 13639 * @cur: a pointer to a zero terminated string 13640 * @URL: the base URL to use for the document 13641 * @encoding: the document encoding, or NULL 13642 * @options: a combination of xmlParserOption 13643 * 13644 * parse an XML in-memory document and build a tree. 13645 * 13646 * Returns the resulting document tree 13647 */ 13648xmlDocPtr 13649xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 13650{ 13651 xmlParserCtxtPtr ctxt; 13652 13653 if (cur == NULL) 13654 return (NULL); 13655 13656 ctxt = xmlCreateDocParserCtxt(cur); 13657 if (ctxt == NULL) 13658 return (NULL); 13659 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13660} 13661 13662/** 13663 * xmlReadFile: 13664 * @filename: a file or URL 13665 * @encoding: the document encoding, or NULL 13666 * @options: a combination of xmlParserOption 13667 * 13668 * parse an XML file from the filesystem or the network. 13669 * 13670 * Returns the resulting document tree 13671 */ 13672xmlDocPtr 13673xmlReadFile(const char *filename, const char *encoding, int options) 13674{ 13675 xmlParserCtxtPtr ctxt; 13676 13677 ctxt = xmlCreateURLParserCtxt(filename, options); 13678 if (ctxt == NULL) 13679 return (NULL); 13680 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 13681} 13682 13683/** 13684 * xmlReadMemory: 13685 * @buffer: a pointer to a char array 13686 * @size: the size of the array 13687 * @URL: the base URL to use for the document 13688 * @encoding: the document encoding, or NULL 13689 * @options: a combination of xmlParserOption 13690 * 13691 * parse an XML in-memory document and build a tree. 13692 * 13693 * Returns the resulting document tree 13694 */ 13695xmlDocPtr 13696xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 13697{ 13698 xmlParserCtxtPtr ctxt; 13699 13700 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13701 if (ctxt == NULL) 13702 return (NULL); 13703 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13704} 13705 13706/** 13707 * xmlReadFd: 13708 * @fd: an open file descriptor 13709 * @URL: the base URL to use for the document 13710 * @encoding: the document encoding, or NULL 13711 * @options: a combination of xmlParserOption 13712 * 13713 * parse an XML from a file descriptor and build a tree. 13714 * NOTE that the file descriptor will not be closed when the 13715 * reader is closed or reset. 13716 * 13717 * Returns the resulting document tree 13718 */ 13719xmlDocPtr 13720xmlReadFd(int fd, const char *URL, const char *encoding, int options) 13721{ 13722 xmlParserCtxtPtr ctxt; 13723 xmlParserInputBufferPtr input; 13724 xmlParserInputPtr stream; 13725 13726 if (fd < 0) 13727 return (NULL); 13728 13729 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13730 if (input == NULL) 13731 return (NULL); 13732 input->closecallback = NULL; 13733 ctxt = xmlNewParserCtxt(); 13734 if (ctxt == NULL) { 13735 xmlFreeParserInputBuffer(input); 13736 return (NULL); 13737 } 13738 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13739 if (stream == NULL) { 13740 xmlFreeParserInputBuffer(input); 13741 xmlFreeParserCtxt(ctxt); 13742 return (NULL); 13743 } 13744 inputPush(ctxt, stream); 13745 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13746} 13747 13748/** 13749 * xmlReadIO: 13750 * @ioread: an I/O read function 13751 * @ioclose: an I/O close function 13752 * @ioctx: an I/O handler 13753 * @URL: the base URL to use for the document 13754 * @encoding: the document encoding, or NULL 13755 * @options: a combination of xmlParserOption 13756 * 13757 * parse an XML document from I/O functions and source and build a tree. 13758 * 13759 * Returns the resulting document tree 13760 */ 13761xmlDocPtr 13762xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 13763 void *ioctx, const char *URL, const char *encoding, int options) 13764{ 13765 xmlParserCtxtPtr ctxt; 13766 xmlParserInputBufferPtr input; 13767 xmlParserInputPtr stream; 13768 13769 if (ioread == NULL) 13770 return (NULL); 13771 13772 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13773 XML_CHAR_ENCODING_NONE); 13774 if (input == NULL) 13775 return (NULL); 13776 ctxt = xmlNewParserCtxt(); 13777 if (ctxt == NULL) { 13778 xmlFreeParserInputBuffer(input); 13779 return (NULL); 13780 } 13781 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13782 if (stream == NULL) { 13783 xmlFreeParserInputBuffer(input); 13784 xmlFreeParserCtxt(ctxt); 13785 return (NULL); 13786 } 13787 inputPush(ctxt, stream); 13788 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13789} 13790 13791/** 13792 * xmlCtxtReadDoc: 13793 * @ctxt: an XML parser context 13794 * @cur: a pointer to a zero terminated string 13795 * @URL: the base URL to use for the document 13796 * @encoding: the document encoding, or NULL 13797 * @options: a combination of xmlParserOption 13798 * 13799 * parse an XML in-memory document and build a tree. 13800 * This reuses the existing @ctxt parser context 13801 * 13802 * Returns the resulting document tree 13803 */ 13804xmlDocPtr 13805xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 13806 const char *URL, const char *encoding, int options) 13807{ 13808 xmlParserInputPtr stream; 13809 13810 if (cur == NULL) 13811 return (NULL); 13812 if (ctxt == NULL) 13813 return (NULL); 13814 13815 xmlCtxtReset(ctxt); 13816 13817 stream = xmlNewStringInputStream(ctxt, cur); 13818 if (stream == NULL) { 13819 return (NULL); 13820 } 13821 inputPush(ctxt, stream); 13822 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13823} 13824 13825/** 13826 * xmlCtxtReadFile: 13827 * @ctxt: an XML parser context 13828 * @filename: a file or URL 13829 * @encoding: the document encoding, or NULL 13830 * @options: a combination of xmlParserOption 13831 * 13832 * parse an XML file from the filesystem or the network. 13833 * This reuses the existing @ctxt parser context 13834 * 13835 * Returns the resulting document tree 13836 */ 13837xmlDocPtr 13838xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 13839 const char *encoding, int options) 13840{ 13841 xmlParserInputPtr stream; 13842 13843 if (filename == NULL) 13844 return (NULL); 13845 if (ctxt == NULL) 13846 return (NULL); 13847 13848 xmlCtxtReset(ctxt); 13849 13850 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 13851 if (stream == NULL) { 13852 return (NULL); 13853 } 13854 inputPush(ctxt, stream); 13855 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 13856} 13857 13858/** 13859 * xmlCtxtReadMemory: 13860 * @ctxt: an XML parser context 13861 * @buffer: a pointer to a char array 13862 * @size: the size of the array 13863 * @URL: the base URL to use for the document 13864 * @encoding: the document encoding, or NULL 13865 * @options: a combination of xmlParserOption 13866 * 13867 * parse an XML in-memory document and build a tree. 13868 * This reuses the existing @ctxt parser context 13869 * 13870 * Returns the resulting document tree 13871 */ 13872xmlDocPtr 13873xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 13874 const char *URL, const char *encoding, int options) 13875{ 13876 xmlParserInputBufferPtr input; 13877 xmlParserInputPtr stream; 13878 13879 if (ctxt == NULL) 13880 return (NULL); 13881 if (buffer == NULL) 13882 return (NULL); 13883 13884 xmlCtxtReset(ctxt); 13885 13886 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13887 if (input == NULL) { 13888 return(NULL); 13889 } 13890 13891 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13892 if (stream == NULL) { 13893 xmlFreeParserInputBuffer(input); 13894 return(NULL); 13895 } 13896 13897 inputPush(ctxt, stream); 13898 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13899} 13900 13901/** 13902 * xmlCtxtReadFd: 13903 * @ctxt: an XML parser context 13904 * @fd: an open file descriptor 13905 * @URL: the base URL to use for the document 13906 * @encoding: the document encoding, or NULL 13907 * @options: a combination of xmlParserOption 13908 * 13909 * parse an XML from a file descriptor and build a tree. 13910 * This reuses the existing @ctxt parser context 13911 * NOTE that the file descriptor will not be closed when the 13912 * reader is closed or reset. 13913 * 13914 * Returns the resulting document tree 13915 */ 13916xmlDocPtr 13917xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 13918 const char *URL, const char *encoding, int options) 13919{ 13920 xmlParserInputBufferPtr input; 13921 xmlParserInputPtr stream; 13922 13923 if (fd < 0) 13924 return (NULL); 13925 if (ctxt == NULL) 13926 return (NULL); 13927 13928 xmlCtxtReset(ctxt); 13929 13930 13931 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13932 if (input == NULL) 13933 return (NULL); 13934 input->closecallback = NULL; 13935 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13936 if (stream == NULL) { 13937 xmlFreeParserInputBuffer(input); 13938 return (NULL); 13939 } 13940 inputPush(ctxt, stream); 13941 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13942} 13943 13944/** 13945 * xmlCtxtReadIO: 13946 * @ctxt: an XML parser context 13947 * @ioread: an I/O read function 13948 * @ioclose: an I/O close function 13949 * @ioctx: an I/O handler 13950 * @URL: the base URL to use for the document 13951 * @encoding: the document encoding, or NULL 13952 * @options: a combination of xmlParserOption 13953 * 13954 * parse an XML document from I/O functions and source and build a tree. 13955 * This reuses the existing @ctxt parser context 13956 * 13957 * Returns the resulting document tree 13958 */ 13959xmlDocPtr 13960xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 13961 xmlInputCloseCallback ioclose, void *ioctx, 13962 const char *URL, 13963 const char *encoding, int options) 13964{ 13965 xmlParserInputBufferPtr input; 13966 xmlParserInputPtr stream; 13967 13968 if (ioread == NULL) 13969 return (NULL); 13970 if (ctxt == NULL) 13971 return (NULL); 13972 13973 xmlCtxtReset(ctxt); 13974 13975 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13976 XML_CHAR_ENCODING_NONE); 13977 if (input == NULL) 13978 return (NULL); 13979 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13980 if (stream == NULL) { 13981 xmlFreeParserInputBuffer(input); 13982 return (NULL); 13983 } 13984 inputPush(ctxt, stream); 13985 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13986} 13987 13988#define bottom_parser 13989#include "elfgcchack.h" 13990