parser.c revision 68b6e02bfdb9d206d1bd89d0c8650c9b66fcfdab
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60#ifdef LIBXML_SCHEMAS_ENABLED 61#include <libxml/xmlschemastypes.h> 62#include <libxml/relaxng.h> 63#endif 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67#ifdef HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70#ifdef HAVE_SYS_STAT_H 71#include <sys/stat.h> 72#endif 73#ifdef HAVE_FCNTL_H 74#include <fcntl.h> 75#endif 76#ifdef HAVE_UNISTD_H 77#include <unistd.h> 78#endif 79#ifdef HAVE_ZLIB_H 80#include <zlib.h> 81#endif 82 83/** 84 * xmlParserMaxDepth: 85 * 86 * arbitrary depth limit for the XML documents that we allow to 87 * process. This is not a limitation of the parser but a safety 88 * boundary feature. 89 */ 90unsigned int xmlParserMaxDepth = 1024; 91 92#define SAX2 1 93 94#define XML_PARSER_BIG_BUFFER_SIZE 300 95#define XML_PARSER_BUFFER_SIZE 100 96 97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 98 99/* 100 * List of XML prefixed PI allowed by W3C specs 101 */ 102 103static const char *xmlW3CPIs[] = { 104 "xml-stylesheet", 105 NULL 106}; 107 108 109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 111 const xmlChar **str); 112 113static xmlParserErrors 114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 115 xmlSAXHandlerPtr sax, 116 void *user_data, int depth, const xmlChar *URL, 117 const xmlChar *ID, xmlNodePtr *list); 118 119#ifdef LIBXML_LEGACY_ENABLED 120static void 121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 122 xmlNodePtr lastNode); 123#endif /* LIBXML_LEGACY_ENABLED */ 124 125static xmlParserErrors 126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 127 const xmlChar *string, void *user_data, xmlNodePtr *lst); 128 129static int 130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 131 132/************************************************************************ 133 * * 134 * Some factorized error routines * 135 * * 136 ************************************************************************/ 137 138/** 139 * xmlErrAttributeDup: 140 * @ctxt: an XML parser context 141 * @prefix: the attribute prefix 142 * @localname: the attribute localname 143 * 144 * Handle a redefinition of attribute error 145 */ 146static void 147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 148 const xmlChar * localname) 149{ 150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 151 (ctxt->instate == XML_PARSER_EOF)) 152 return; 153 if (ctxt != NULL) 154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 155 if (prefix == NULL) 156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 157 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 158 (const char *) localname, NULL, NULL, 0, 0, 159 "Attribute %s redefined\n", localname); 160 else 161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 162 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 163 (const char *) prefix, (const char *) localname, 164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 165 localname); 166 if (ctxt != NULL) { 167 ctxt->wellFormed = 0; 168 if (ctxt->recovery == 0) 169 ctxt->disableSAX = 1; 170 } 171} 172 173/** 174 * xmlFatalErr: 175 * @ctxt: an XML parser context 176 * @error: the error number 177 * @extra: extra information string 178 * 179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 180 */ 181static void 182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 183{ 184 const char *errmsg; 185 186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 187 (ctxt->instate == XML_PARSER_EOF)) 188 return; 189 switch (error) { 190 case XML_ERR_INVALID_HEX_CHARREF: 191 errmsg = "CharRef: invalid hexadecimal value\n"; 192 break; 193 case XML_ERR_INVALID_DEC_CHARREF: 194 errmsg = "CharRef: invalid decimal value\n"; 195 break; 196 case XML_ERR_INVALID_CHARREF: 197 errmsg = "CharRef: invalid value\n"; 198 break; 199 case XML_ERR_INTERNAL_ERROR: 200 errmsg = "internal error"; 201 break; 202 case XML_ERR_PEREF_AT_EOF: 203 errmsg = "PEReference at end of document\n"; 204 break; 205 case XML_ERR_PEREF_IN_PROLOG: 206 errmsg = "PEReference in prolog\n"; 207 break; 208 case XML_ERR_PEREF_IN_EPILOG: 209 errmsg = "PEReference in epilog\n"; 210 break; 211 case XML_ERR_PEREF_NO_NAME: 212 errmsg = "PEReference: no name\n"; 213 break; 214 case XML_ERR_PEREF_SEMICOL_MISSING: 215 errmsg = "PEReference: expecting ';'\n"; 216 break; 217 case XML_ERR_ENTITY_LOOP: 218 errmsg = "Detected an entity reference loop\n"; 219 break; 220 case XML_ERR_ENTITY_NOT_STARTED: 221 errmsg = "EntityValue: \" or ' expected\n"; 222 break; 223 case XML_ERR_ENTITY_PE_INTERNAL: 224 errmsg = "PEReferences forbidden in internal subset\n"; 225 break; 226 case XML_ERR_ENTITY_NOT_FINISHED: 227 errmsg = "EntityValue: \" or ' expected\n"; 228 break; 229 case XML_ERR_ATTRIBUTE_NOT_STARTED: 230 errmsg = "AttValue: \" or ' expected\n"; 231 break; 232 case XML_ERR_LT_IN_ATTRIBUTE: 233 errmsg = "Unescaped '<' not allowed in attributes values\n"; 234 break; 235 case XML_ERR_LITERAL_NOT_STARTED: 236 errmsg = "SystemLiteral \" or ' expected\n"; 237 break; 238 case XML_ERR_LITERAL_NOT_FINISHED: 239 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 240 break; 241 case XML_ERR_MISPLACED_CDATA_END: 242 errmsg = "Sequence ']]>' not allowed in content\n"; 243 break; 244 case XML_ERR_URI_REQUIRED: 245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 246 break; 247 case XML_ERR_PUBID_REQUIRED: 248 errmsg = "PUBLIC, the Public Identifier is missing\n"; 249 break; 250 case XML_ERR_HYPHEN_IN_COMMENT: 251 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 252 break; 253 case XML_ERR_PI_NOT_STARTED: 254 errmsg = "xmlParsePI : no target name\n"; 255 break; 256 case XML_ERR_RESERVED_XML_NAME: 257 errmsg = "Invalid PI name\n"; 258 break; 259 case XML_ERR_NOTATION_NOT_STARTED: 260 errmsg = "NOTATION: Name expected here\n"; 261 break; 262 case XML_ERR_NOTATION_NOT_FINISHED: 263 errmsg = "'>' required to close NOTATION declaration\n"; 264 break; 265 case XML_ERR_VALUE_REQUIRED: 266 errmsg = "Entity value required\n"; 267 break; 268 case XML_ERR_URI_FRAGMENT: 269 errmsg = "Fragment not allowed"; 270 break; 271 case XML_ERR_ATTLIST_NOT_STARTED: 272 errmsg = "'(' required to start ATTLIST enumeration\n"; 273 break; 274 case XML_ERR_NMTOKEN_REQUIRED: 275 errmsg = "NmToken expected in ATTLIST enumeration\n"; 276 break; 277 case XML_ERR_ATTLIST_NOT_FINISHED: 278 errmsg = "')' required to finish ATTLIST enumeration\n"; 279 break; 280 case XML_ERR_MIXED_NOT_STARTED: 281 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 282 break; 283 case XML_ERR_PCDATA_REQUIRED: 284 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 285 break; 286 case XML_ERR_ELEMCONTENT_NOT_STARTED: 287 errmsg = "ContentDecl : Name or '(' expected\n"; 288 break; 289 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 290 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 291 break; 292 case XML_ERR_PEREF_IN_INT_SUBSET: 293 errmsg = 294 "PEReference: forbidden within markup decl in internal subset\n"; 295 break; 296 case XML_ERR_GT_REQUIRED: 297 errmsg = "expected '>'\n"; 298 break; 299 case XML_ERR_CONDSEC_INVALID: 300 errmsg = "XML conditional section '[' expected\n"; 301 break; 302 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 303 errmsg = "Content error in the external subset\n"; 304 break; 305 case XML_ERR_CONDSEC_INVALID_KEYWORD: 306 errmsg = 307 "conditional section INCLUDE or IGNORE keyword expected\n"; 308 break; 309 case XML_ERR_CONDSEC_NOT_FINISHED: 310 errmsg = "XML conditional section not closed\n"; 311 break; 312 case XML_ERR_XMLDECL_NOT_STARTED: 313 errmsg = "Text declaration '<?xml' required\n"; 314 break; 315 case XML_ERR_XMLDECL_NOT_FINISHED: 316 errmsg = "parsing XML declaration: '?>' expected\n"; 317 break; 318 case XML_ERR_EXT_ENTITY_STANDALONE: 319 errmsg = "external parsed entities cannot be standalone\n"; 320 break; 321 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 322 errmsg = "EntityRef: expecting ';'\n"; 323 break; 324 case XML_ERR_DOCTYPE_NOT_FINISHED: 325 errmsg = "DOCTYPE improperly terminated\n"; 326 break; 327 case XML_ERR_LTSLASH_REQUIRED: 328 errmsg = "EndTag: '</' not found\n"; 329 break; 330 case XML_ERR_EQUAL_REQUIRED: 331 errmsg = "expected '='\n"; 332 break; 333 case XML_ERR_STRING_NOT_CLOSED: 334 errmsg = "String not closed expecting \" or '\n"; 335 break; 336 case XML_ERR_STRING_NOT_STARTED: 337 errmsg = "String not started expecting ' or \"\n"; 338 break; 339 case XML_ERR_ENCODING_NAME: 340 errmsg = "Invalid XML encoding name\n"; 341 break; 342 case XML_ERR_STANDALONE_VALUE: 343 errmsg = "standalone accepts only 'yes' or 'no'\n"; 344 break; 345 case XML_ERR_DOCUMENT_EMPTY: 346 errmsg = "Document is empty\n"; 347 break; 348 case XML_ERR_DOCUMENT_END: 349 errmsg = "Extra content at the end of the document\n"; 350 break; 351 case XML_ERR_NOT_WELL_BALANCED: 352 errmsg = "chunk is not well balanced\n"; 353 break; 354 case XML_ERR_EXTRA_CONTENT: 355 errmsg = "extra content at the end of well balanced chunk\n"; 356 break; 357 case XML_ERR_VERSION_MISSING: 358 errmsg = "Malformed declaration expecting version\n"; 359 break; 360#if 0 361 case: 362 errmsg = "\n"; 363 break; 364#endif 365 default: 366 errmsg = "Unregistered error message\n"; 367 } 368 if (ctxt != NULL) 369 ctxt->errNo = error; 370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 372 info); 373 if (ctxt != NULL) { 374 ctxt->wellFormed = 0; 375 if (ctxt->recovery == 0) 376 ctxt->disableSAX = 1; 377 } 378} 379 380/** 381 * xmlFatalErrMsg: 382 * @ctxt: an XML parser context 383 * @error: the error number 384 * @msg: the error message 385 * 386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 387 */ 388static void 389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 390 const char *msg) 391{ 392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 393 (ctxt->instate == XML_PARSER_EOF)) 394 return; 395 if (ctxt != NULL) 396 ctxt->errNo = error; 397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 399 if (ctxt != NULL) { 400 ctxt->wellFormed = 0; 401 if (ctxt->recovery == 0) 402 ctxt->disableSAX = 1; 403 } 404} 405 406/** 407 * xmlWarningMsg: 408 * @ctxt: an XML parser context 409 * @error: the error number 410 * @msg: the error message 411 * @str1: extra data 412 * @str2: extra data 413 * 414 * Handle a warning. 415 */ 416static void 417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 418 const char *msg, const xmlChar *str1, const xmlChar *str2) 419{ 420 xmlStructuredErrorFunc schannel = NULL; 421 422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 423 (ctxt->instate == XML_PARSER_EOF)) 424 return; 425 if ((ctxt != NULL) && (ctxt->sax != NULL) && 426 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 427 schannel = ctxt->sax->serror; 428 __xmlRaiseError(schannel, 429 (ctxt->sax) ? ctxt->sax->warning : NULL, 430 ctxt->userData, 431 ctxt, NULL, XML_FROM_PARSER, error, 432 XML_ERR_WARNING, NULL, 0, 433 (const char *) str1, (const char *) str2, NULL, 0, 0, 434 msg, (const char *) str1, (const char *) str2); 435} 436 437/** 438 * xmlValidityError: 439 * @ctxt: an XML parser context 440 * @error: the error number 441 * @msg: the error message 442 * @str1: extra data 443 * 444 * Handle a validity error. 445 */ 446static void 447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 448 const char *msg, const xmlChar *str1) 449{ 450 xmlStructuredErrorFunc schannel = NULL; 451 452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 453 (ctxt->instate == XML_PARSER_EOF)) 454 return; 455 if (ctxt != NULL) { 456 ctxt->errNo = error; 457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 458 schannel = ctxt->sax->serror; 459 } 460 __xmlRaiseError(schannel, 461 ctxt->vctxt.error, ctxt->vctxt.userData, 462 ctxt, NULL, XML_FROM_DTD, error, 463 XML_ERR_ERROR, NULL, 0, (const char *) str1, 464 NULL, NULL, 0, 0, 465 msg, (const char *) str1); 466 if (ctxt != NULL) { 467 ctxt->valid = 0; 468 } 469} 470 471/** 472 * xmlFatalErrMsgInt: 473 * @ctxt: an XML parser context 474 * @error: the error number 475 * @msg: the error message 476 * @val: an integer value 477 * 478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 479 */ 480static void 481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 482 const char *msg, int val) 483{ 484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 485 (ctxt->instate == XML_PARSER_EOF)) 486 return; 487 if (ctxt != NULL) 488 ctxt->errNo = error; 489 __xmlRaiseError(NULL, NULL, NULL, 490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 492 if (ctxt != NULL) { 493 ctxt->wellFormed = 0; 494 if (ctxt->recovery == 0) 495 ctxt->disableSAX = 1; 496 } 497} 498 499/** 500 * xmlFatalErrMsgStrIntStr: 501 * @ctxt: an XML parser context 502 * @error: the error number 503 * @msg: the error message 504 * @str1: an string info 505 * @val: an integer value 506 * @str2: an string info 507 * 508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 509 */ 510static void 511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 512 const char *msg, const xmlChar *str1, int val, 513 const xmlChar *str2) 514{ 515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 516 (ctxt->instate == XML_PARSER_EOF)) 517 return; 518 if (ctxt != NULL) 519 ctxt->errNo = error; 520 __xmlRaiseError(NULL, NULL, NULL, 521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 522 NULL, 0, (const char *) str1, (const char *) str2, 523 NULL, val, 0, msg, str1, val, str2); 524 if (ctxt != NULL) { 525 ctxt->wellFormed = 0; 526 if (ctxt->recovery == 0) 527 ctxt->disableSAX = 1; 528 } 529} 530 531/** 532 * xmlFatalErrMsgStr: 533 * @ctxt: an XML parser context 534 * @error: the error number 535 * @msg: the error message 536 * @val: a string value 537 * 538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 539 */ 540static void 541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 542 const char *msg, const xmlChar * val) 543{ 544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 545 (ctxt->instate == XML_PARSER_EOF)) 546 return; 547 if (ctxt != NULL) 548 ctxt->errNo = error; 549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 550 XML_FROM_PARSER, error, XML_ERR_FATAL, 551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 552 val); 553 if (ctxt != NULL) { 554 ctxt->wellFormed = 0; 555 if (ctxt->recovery == 0) 556 ctxt->disableSAX = 1; 557 } 558} 559 560/** 561 * xmlErrMsgStr: 562 * @ctxt: an XML parser context 563 * @error: the error number 564 * @msg: the error message 565 * @val: a string value 566 * 567 * Handle a non fatal parser error 568 */ 569static void 570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 571 const char *msg, const xmlChar * val) 572{ 573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 574 (ctxt->instate == XML_PARSER_EOF)) 575 return; 576 if (ctxt != NULL) 577 ctxt->errNo = error; 578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 579 XML_FROM_PARSER, error, XML_ERR_ERROR, 580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 581 val); 582} 583 584/** 585 * xmlNsErr: 586 * @ctxt: an XML parser context 587 * @error: the error number 588 * @msg: the message 589 * @info1: extra information string 590 * @info2: extra information string 591 * 592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 593 */ 594static void 595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 596 const char *msg, 597 const xmlChar * info1, const xmlChar * info2, 598 const xmlChar * info3) 599{ 600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 601 (ctxt->instate == XML_PARSER_EOF)) 602 return; 603 if (ctxt != NULL) 604 ctxt->errNo = error; 605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 606 XML_ERR_ERROR, NULL, 0, (const char *) info1, 607 (const char *) info2, (const char *) info3, 0, 0, msg, 608 info1, info2, info3); 609 if (ctxt != NULL) 610 ctxt->nsWellFormed = 0; 611} 612 613/************************************************************************ 614 * * 615 * Library wide options * 616 * * 617 ************************************************************************/ 618 619/** 620 * xmlHasFeature: 621 * @feature: the feature to be examined 622 * 623 * Examines if the library has been compiled with a given feature. 624 * 625 * Returns a non-zero value if the feature exist, otherwise zero. 626 * Returns zero (0) if the feature does not exist or an unknown 627 * unknown feature is requested, non-zero otherwise. 628 */ 629int 630xmlHasFeature(xmlFeature feature) 631{ 632 switch (feature) { 633 case XML_WITH_THREAD: 634#ifdef LIBXML_THREAD_ENABLED 635 return(1); 636#else 637 return(0); 638#endif 639 case XML_WITH_TREE: 640#ifdef LIBXML_TREE_ENABLED 641 return(1); 642#else 643 return(0); 644#endif 645 case XML_WITH_OUTPUT: 646#ifdef LIBXML_OUTPUT_ENABLED 647 return(1); 648#else 649 return(0); 650#endif 651 case XML_WITH_PUSH: 652#ifdef LIBXML_PUSH_ENABLED 653 return(1); 654#else 655 return(0); 656#endif 657 case XML_WITH_READER: 658#ifdef LIBXML_READER_ENABLED 659 return(1); 660#else 661 return(0); 662#endif 663 case XML_WITH_PATTERN: 664#ifdef LIBXML_PATTERN_ENABLED 665 return(1); 666#else 667 return(0); 668#endif 669 case XML_WITH_WRITER: 670#ifdef LIBXML_WRITER_ENABLED 671 return(1); 672#else 673 return(0); 674#endif 675 case XML_WITH_SAX1: 676#ifdef LIBXML_SAX1_ENABLED 677 return(1); 678#else 679 return(0); 680#endif 681 case XML_WITH_FTP: 682#ifdef LIBXML_FTP_ENABLED 683 return(1); 684#else 685 return(0); 686#endif 687 case XML_WITH_HTTP: 688#ifdef LIBXML_HTTP_ENABLED 689 return(1); 690#else 691 return(0); 692#endif 693 case XML_WITH_VALID: 694#ifdef LIBXML_VALID_ENABLED 695 return(1); 696#else 697 return(0); 698#endif 699 case XML_WITH_HTML: 700#ifdef LIBXML_HTML_ENABLED 701 return(1); 702#else 703 return(0); 704#endif 705 case XML_WITH_LEGACY: 706#ifdef LIBXML_LEGACY_ENABLED 707 return(1); 708#else 709 return(0); 710#endif 711 case XML_WITH_C14N: 712#ifdef LIBXML_C14N_ENABLED 713 return(1); 714#else 715 return(0); 716#endif 717 case XML_WITH_CATALOG: 718#ifdef LIBXML_CATALOG_ENABLED 719 return(1); 720#else 721 return(0); 722#endif 723 case XML_WITH_XPATH: 724#ifdef LIBXML_XPATH_ENABLED 725 return(1); 726#else 727 return(0); 728#endif 729 case XML_WITH_XPTR: 730#ifdef LIBXML_XPTR_ENABLED 731 return(1); 732#else 733 return(0); 734#endif 735 case XML_WITH_XINCLUDE: 736#ifdef LIBXML_XINCLUDE_ENABLED 737 return(1); 738#else 739 return(0); 740#endif 741 case XML_WITH_ICONV: 742#ifdef LIBXML_ICONV_ENABLED 743 return(1); 744#else 745 return(0); 746#endif 747 case XML_WITH_ISO8859X: 748#ifdef LIBXML_ISO8859X_ENABLED 749 return(1); 750#else 751 return(0); 752#endif 753 case XML_WITH_UNICODE: 754#ifdef LIBXML_UNICODE_ENABLED 755 return(1); 756#else 757 return(0); 758#endif 759 case XML_WITH_REGEXP: 760#ifdef LIBXML_REGEXP_ENABLED 761 return(1); 762#else 763 return(0); 764#endif 765 case XML_WITH_AUTOMATA: 766#ifdef LIBXML_AUTOMATA_ENABLED 767 return(1); 768#else 769 return(0); 770#endif 771 case XML_WITH_EXPR: 772#ifdef LIBXML_EXPR_ENABLED 773 return(1); 774#else 775 return(0); 776#endif 777 case XML_WITH_SCHEMAS: 778#ifdef LIBXML_SCHEMAS_ENABLED 779 return(1); 780#else 781 return(0); 782#endif 783 case XML_WITH_SCHEMATRON: 784#ifdef LIBXML_SCHEMATRON_ENABLED 785 return(1); 786#else 787 return(0); 788#endif 789 case XML_WITH_MODULES: 790#ifdef LIBXML_MODULES_ENABLED 791 return(1); 792#else 793 return(0); 794#endif 795 case XML_WITH_DEBUG: 796#ifdef LIBXML_DEBUG_ENABLED 797 return(1); 798#else 799 return(0); 800#endif 801 case XML_WITH_DEBUG_MEM: 802#ifdef DEBUG_MEMORY_LOCATION 803 return(1); 804#else 805 return(0); 806#endif 807 case XML_WITH_DEBUG_RUN: 808#ifdef LIBXML_DEBUG_RUNTIME 809 return(1); 810#else 811 return(0); 812#endif 813 case XML_WITH_ZLIB: 814#ifdef LIBXML_ZLIB_ENABLED 815 return(1); 816#else 817 return(0); 818#endif 819 default: 820 break; 821 } 822 return(0); 823} 824 825/************************************************************************ 826 * * 827 * SAX2 defaulted attributes handling * 828 * * 829 ************************************************************************/ 830 831/** 832 * xmlDetectSAX2: 833 * @ctxt: an XML parser context 834 * 835 * Do the SAX2 detection and specific intialization 836 */ 837static void 838xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 839 if (ctxt == NULL) return; 840#ifdef LIBXML_SAX1_ENABLED 841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 842 ((ctxt->sax->startElementNs != NULL) || 843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 844#else 845 ctxt->sax2 = 1; 846#endif /* LIBXML_SAX1_ENABLED */ 847 848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 852 (ctxt->str_xml_ns == NULL)) { 853 xmlErrMemory(ctxt, NULL); 854 } 855} 856 857typedef struct _xmlDefAttrs xmlDefAttrs; 858typedef xmlDefAttrs *xmlDefAttrsPtr; 859struct _xmlDefAttrs { 860 int nbAttrs; /* number of defaulted attributes on that element */ 861 int maxAttrs; /* the size of the array */ 862 const xmlChar *values[4]; /* array of localname/prefix/values */ 863}; 864 865/** 866 * xmlAttrNormalizeSpace: 867 * @src: the source string 868 * @dst: the target string 869 * 870 * Normalize the space in non CDATA attribute values: 871 * If the attribute type is not CDATA, then the XML processor MUST further 872 * process the normalized attribute value by discarding any leading and 873 * trailing space (#x20) characters, and by replacing sequences of space 874 * (#x20) characters by a single space (#x20) character. 875 * Note that the size of dst need to be at least src, and if one doesn't need 876 * to preserve dst (and it doesn't come from a dictionary or read-only) then 877 * passing src as dst is just fine. 878 * 879 * Returns a pointer to the normalized value (dst) or NULL if no conversion 880 * is needed. 881 */ 882static xmlChar * 883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 884{ 885 if ((src == NULL) || (dst == NULL)) 886 return(NULL); 887 888 while (*src == 0x20) src++; 889 while (*src != 0) { 890 if (*src == 0x20) { 891 while (*src == 0x20) src++; 892 if (*src != 0) 893 *dst++ = 0x20; 894 } else { 895 *dst++ = *src++; 896 } 897 } 898 *dst = 0; 899 if (dst == src) 900 return(NULL); 901 return(dst); 902} 903 904/** 905 * xmlAttrNormalizeSpace2: 906 * @src: the source string 907 * 908 * Normalize the space in non CDATA attribute values, a slightly more complex 909 * front end to avoid allocation problems when running on attribute values 910 * coming from the input. 911 * 912 * Returns a pointer to the normalized value (dst) or NULL if no conversion 913 * is needed. 914 */ 915static const xmlChar * 916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) 917{ 918 int i; 919 int remove_head = 0; 920 int need_realloc = 0; 921 const xmlChar *cur; 922 923 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 924 return(NULL); 925 i = *len; 926 if (i <= 0) 927 return(NULL); 928 929 cur = src; 930 while (*cur == 0x20) { 931 cur++; 932 remove_head++; 933 } 934 while (*cur != 0) { 935 if (*cur == 0x20) { 936 cur++; 937 if ((*cur == 0x20) || (*cur == 0)) { 938 need_realloc = 1; 939 break; 940 } 941 } else 942 cur++; 943 } 944 if (need_realloc) { 945 xmlChar *ret; 946 947 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 948 if (ret == NULL) { 949 xmlErrMemory(ctxt, NULL); 950 return(NULL); 951 } 952 xmlAttrNormalizeSpace(ret, ret); 953 *len = (int) strlen((const char *)ret); 954 return(ret); 955 } else if (remove_head) { 956 *len -= remove_head; 957 return(src + remove_head); 958 } 959 return(NULL); 960} 961 962/** 963 * xmlAddDefAttrs: 964 * @ctxt: an XML parser context 965 * @fullname: the element fullname 966 * @fullattr: the attribute fullname 967 * @value: the attribute value 968 * 969 * Add a defaulted attribute for an element 970 */ 971static void 972xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 973 const xmlChar *fullname, 974 const xmlChar *fullattr, 975 const xmlChar *value) { 976 xmlDefAttrsPtr defaults; 977 int len; 978 const xmlChar *name; 979 const xmlChar *prefix; 980 981 /* 982 * Allows to detect attribute redefinitions 983 */ 984 if (ctxt->attsSpecial != NULL) { 985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 986 return; 987 } 988 989 if (ctxt->attsDefault == NULL) { 990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 991 if (ctxt->attsDefault == NULL) 992 goto mem_error; 993 } 994 995 /* 996 * split the element name into prefix:localname , the string found 997 * are within the DTD and then not associated to namespace names. 998 */ 999 name = xmlSplitQName3(fullname, &len); 1000 if (name == NULL) { 1001 name = xmlDictLookup(ctxt->dict, fullname, -1); 1002 prefix = NULL; 1003 } else { 1004 name = xmlDictLookup(ctxt->dict, name, -1); 1005 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1006 } 1007 1008 /* 1009 * make sure there is some storage 1010 */ 1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1012 if (defaults == NULL) { 1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1014 (4 * 4) * sizeof(const xmlChar *)); 1015 if (defaults == NULL) 1016 goto mem_error; 1017 defaults->nbAttrs = 0; 1018 defaults->maxAttrs = 4; 1019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1020 defaults, NULL) < 0) { 1021 xmlFree(defaults); 1022 goto mem_error; 1023 } 1024 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1025 xmlDefAttrsPtr temp; 1026 1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 1029 if (temp == NULL) 1030 goto mem_error; 1031 defaults = temp; 1032 defaults->maxAttrs *= 2; 1033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1034 defaults, NULL) < 0) { 1035 xmlFree(defaults); 1036 goto mem_error; 1037 } 1038 } 1039 1040 /* 1041 * Split the element name into prefix:localname , the string found 1042 * are within the DTD and hen not associated to namespace names. 1043 */ 1044 name = xmlSplitQName3(fullattr, &len); 1045 if (name == NULL) { 1046 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1047 prefix = NULL; 1048 } else { 1049 name = xmlDictLookup(ctxt->dict, name, -1); 1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1051 } 1052 1053 defaults->values[4 * defaults->nbAttrs] = name; 1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 1055 /* intern the string and precompute the end */ 1056 len = xmlStrlen(value); 1057 value = xmlDictLookup(ctxt->dict, value, len); 1058 defaults->values[4 * defaults->nbAttrs + 2] = value; 1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 1060 defaults->nbAttrs++; 1061 1062 return; 1063 1064mem_error: 1065 xmlErrMemory(ctxt, NULL); 1066 return; 1067} 1068 1069/** 1070 * xmlAddSpecialAttr: 1071 * @ctxt: an XML parser context 1072 * @fullname: the element fullname 1073 * @fullattr: the attribute fullname 1074 * @type: the attribute type 1075 * 1076 * Register this attribute type 1077 */ 1078static void 1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1080 const xmlChar *fullname, 1081 const xmlChar *fullattr, 1082 int type) 1083{ 1084 if (ctxt->attsSpecial == NULL) { 1085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1086 if (ctxt->attsSpecial == NULL) 1087 goto mem_error; 1088 } 1089 1090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1091 return; 1092 1093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1094 (void *) (long) type); 1095 return; 1096 1097mem_error: 1098 xmlErrMemory(ctxt, NULL); 1099 return; 1100} 1101 1102/** 1103 * xmlCleanSpecialAttrCallback: 1104 * 1105 * Removes CDATA attributes from the special attribute table 1106 */ 1107static void 1108xmlCleanSpecialAttrCallback(void *payload, void *data, 1109 const xmlChar *fullname, const xmlChar *fullattr, 1110 const xmlChar *unused ATTRIBUTE_UNUSED) { 1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1112 1113 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1115 } 1116} 1117 1118/** 1119 * xmlCleanSpecialAttr: 1120 * @ctxt: an XML parser context 1121 * 1122 * Trim the list of attributes defined to remove all those of type 1123 * CDATA as they are not special. This call should be done when finishing 1124 * to parse the DTD and before starting to parse the document root. 1125 */ 1126static void 1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1128{ 1129 if (ctxt->attsSpecial == NULL) 1130 return; 1131 1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1133 1134 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1135 xmlHashFree(ctxt->attsSpecial, NULL); 1136 ctxt->attsSpecial = NULL; 1137 } 1138 return; 1139} 1140 1141/** 1142 * xmlCheckLanguageID: 1143 * @lang: pointer to the string value 1144 * 1145 * Checks that the value conforms to the LanguageID production: 1146 * 1147 * NOTE: this is somewhat deprecated, those productions were removed from 1148 * the XML Second edition. 1149 * 1150 * [33] LanguageID ::= Langcode ('-' Subcode)* 1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1155 * [38] Subcode ::= ([a-z] | [A-Z])+ 1156 * 1157 * Returns 1 if correct 0 otherwise 1158 **/ 1159int 1160xmlCheckLanguageID(const xmlChar * lang) 1161{ 1162 const xmlChar *cur = lang; 1163 1164 if (cur == NULL) 1165 return (0); 1166 if (((cur[0] == 'i') && (cur[1] == '-')) || 1167 ((cur[0] == 'I') && (cur[1] == '-'))) { 1168 /* 1169 * IANA code 1170 */ 1171 cur += 2; 1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1173 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1174 cur++; 1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1176 ((cur[0] == 'X') && (cur[1] == '-'))) { 1177 /* 1178 * User code 1179 */ 1180 cur += 2; 1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1182 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1183 cur++; 1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1186 /* 1187 * ISO639 1188 */ 1189 cur++; 1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1191 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1192 cur++; 1193 else 1194 return (0); 1195 } else 1196 return (0); 1197 while (cur[0] != 0) { /* non input consuming */ 1198 if (cur[0] != '-') 1199 return (0); 1200 cur++; 1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1202 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1203 cur++; 1204 else 1205 return (0); 1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1207 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1208 cur++; 1209 } 1210 return (1); 1211} 1212 1213/************************************************************************ 1214 * * 1215 * Parser stacks related functions and macros * 1216 * * 1217 ************************************************************************/ 1218 1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1220 const xmlChar ** str); 1221 1222#ifdef SAX2 1223/** 1224 * nsPush: 1225 * @ctxt: an XML parser context 1226 * @prefix: the namespace prefix or NULL 1227 * @URL: the namespace name 1228 * 1229 * Pushes a new parser namespace on top of the ns stack 1230 * 1231 * Returns -1 in case of error, -2 if the namespace should be discarded 1232 * and the index in the stack otherwise. 1233 */ 1234static int 1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1236{ 1237 if (ctxt->options & XML_PARSE_NSCLEAN) { 1238 int i; 1239 for (i = 0;i < ctxt->nsNr;i += 2) { 1240 if (ctxt->nsTab[i] == prefix) { 1241 /* in scope */ 1242 if (ctxt->nsTab[i + 1] == URL) 1243 return(-2); 1244 /* out of scope keep it */ 1245 break; 1246 } 1247 } 1248 } 1249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1250 ctxt->nsMax = 10; 1251 ctxt->nsNr = 0; 1252 ctxt->nsTab = (const xmlChar **) 1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1254 if (ctxt->nsTab == NULL) { 1255 xmlErrMemory(ctxt, NULL); 1256 ctxt->nsMax = 0; 1257 return (-1); 1258 } 1259 } else if (ctxt->nsNr >= ctxt->nsMax) { 1260 ctxt->nsMax *= 2; 1261 ctxt->nsTab = (const xmlChar **) 1262 xmlRealloc((char *) ctxt->nsTab, 1263 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1264 if (ctxt->nsTab == NULL) { 1265 xmlErrMemory(ctxt, NULL); 1266 ctxt->nsMax /= 2; 1267 return (-1); 1268 } 1269 } 1270 ctxt->nsTab[ctxt->nsNr++] = prefix; 1271 ctxt->nsTab[ctxt->nsNr++] = URL; 1272 return (ctxt->nsNr); 1273} 1274/** 1275 * nsPop: 1276 * @ctxt: an XML parser context 1277 * @nr: the number to pop 1278 * 1279 * Pops the top @nr parser prefix/namespace from the ns stack 1280 * 1281 * Returns the number of namespaces removed 1282 */ 1283static int 1284nsPop(xmlParserCtxtPtr ctxt, int nr) 1285{ 1286 int i; 1287 1288 if (ctxt->nsTab == NULL) return(0); 1289 if (ctxt->nsNr < nr) { 1290 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1291 nr = ctxt->nsNr; 1292 } 1293 if (ctxt->nsNr <= 0) 1294 return (0); 1295 1296 for (i = 0;i < nr;i++) { 1297 ctxt->nsNr--; 1298 ctxt->nsTab[ctxt->nsNr] = NULL; 1299 } 1300 return(nr); 1301} 1302#endif 1303 1304static int 1305xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1306 const xmlChar **atts; 1307 int *attallocs; 1308 int maxatts; 1309 1310 if (ctxt->atts == NULL) { 1311 maxatts = 55; /* allow for 10 attrs by default */ 1312 atts = (const xmlChar **) 1313 xmlMalloc(maxatts * sizeof(xmlChar *)); 1314 if (atts == NULL) goto mem_error; 1315 ctxt->atts = atts; 1316 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1317 if (attallocs == NULL) goto mem_error; 1318 ctxt->attallocs = attallocs; 1319 ctxt->maxatts = maxatts; 1320 } else if (nr + 5 > ctxt->maxatts) { 1321 maxatts = (nr + 5) * 2; 1322 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1323 maxatts * sizeof(const xmlChar *)); 1324 if (atts == NULL) goto mem_error; 1325 ctxt->atts = atts; 1326 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1327 (maxatts / 5) * sizeof(int)); 1328 if (attallocs == NULL) goto mem_error; 1329 ctxt->attallocs = attallocs; 1330 ctxt->maxatts = maxatts; 1331 } 1332 return(ctxt->maxatts); 1333mem_error: 1334 xmlErrMemory(ctxt, NULL); 1335 return(-1); 1336} 1337 1338/** 1339 * inputPush: 1340 * @ctxt: an XML parser context 1341 * @value: the parser input 1342 * 1343 * Pushes a new parser input on top of the input stack 1344 * 1345 * Returns 0 in case of error, the index in the stack otherwise 1346 */ 1347int 1348inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1349{ 1350 if ((ctxt == NULL) || (value == NULL)) 1351 return(0); 1352 if (ctxt->inputNr >= ctxt->inputMax) { 1353 ctxt->inputMax *= 2; 1354 ctxt->inputTab = 1355 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1356 ctxt->inputMax * 1357 sizeof(ctxt->inputTab[0])); 1358 if (ctxt->inputTab == NULL) { 1359 xmlErrMemory(ctxt, NULL); 1360 return (0); 1361 } 1362 } 1363 ctxt->inputTab[ctxt->inputNr] = value; 1364 ctxt->input = value; 1365 return (ctxt->inputNr++); 1366} 1367/** 1368 * inputPop: 1369 * @ctxt: an XML parser context 1370 * 1371 * Pops the top parser input from the input stack 1372 * 1373 * Returns the input just removed 1374 */ 1375xmlParserInputPtr 1376inputPop(xmlParserCtxtPtr ctxt) 1377{ 1378 xmlParserInputPtr ret; 1379 1380 if (ctxt == NULL) 1381 return(NULL); 1382 if (ctxt->inputNr <= 0) 1383 return (NULL); 1384 ctxt->inputNr--; 1385 if (ctxt->inputNr > 0) 1386 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1387 else 1388 ctxt->input = NULL; 1389 ret = ctxt->inputTab[ctxt->inputNr]; 1390 ctxt->inputTab[ctxt->inputNr] = NULL; 1391 return (ret); 1392} 1393/** 1394 * nodePush: 1395 * @ctxt: an XML parser context 1396 * @value: the element node 1397 * 1398 * Pushes a new element node on top of the node stack 1399 * 1400 * Returns 0 in case of error, the index in the stack otherwise 1401 */ 1402int 1403nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1404{ 1405 if (ctxt == NULL) return(0); 1406 if (ctxt->nodeNr >= ctxt->nodeMax) { 1407 xmlNodePtr *tmp; 1408 1409 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1410 ctxt->nodeMax * 2 * 1411 sizeof(ctxt->nodeTab[0])); 1412 if (tmp == NULL) { 1413 xmlErrMemory(ctxt, NULL); 1414 return (0); 1415 } 1416 ctxt->nodeTab = tmp; 1417 ctxt->nodeMax *= 2; 1418 } 1419 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 1420 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1421 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 1422 xmlParserMaxDepth); 1423 ctxt->instate = XML_PARSER_EOF; 1424 return(0); 1425 } 1426 ctxt->nodeTab[ctxt->nodeNr] = value; 1427 ctxt->node = value; 1428 return (ctxt->nodeNr++); 1429} 1430/** 1431 * nodePop: 1432 * @ctxt: an XML parser context 1433 * 1434 * Pops the top element node from the node stack 1435 * 1436 * Returns the node just removed 1437 */ 1438xmlNodePtr 1439nodePop(xmlParserCtxtPtr ctxt) 1440{ 1441 xmlNodePtr ret; 1442 1443 if (ctxt == NULL) return(NULL); 1444 if (ctxt->nodeNr <= 0) 1445 return (NULL); 1446 ctxt->nodeNr--; 1447 if (ctxt->nodeNr > 0) 1448 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1449 else 1450 ctxt->node = NULL; 1451 ret = ctxt->nodeTab[ctxt->nodeNr]; 1452 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1453 return (ret); 1454} 1455 1456#ifdef LIBXML_PUSH_ENABLED 1457/** 1458 * nameNsPush: 1459 * @ctxt: an XML parser context 1460 * @value: the element name 1461 * @prefix: the element prefix 1462 * @URI: the element namespace name 1463 * 1464 * Pushes a new element name/prefix/URL on top of the name stack 1465 * 1466 * Returns -1 in case of error, the index in the stack otherwise 1467 */ 1468static int 1469nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1470 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1471{ 1472 if (ctxt->nameNr >= ctxt->nameMax) { 1473 const xmlChar * *tmp; 1474 void **tmp2; 1475 ctxt->nameMax *= 2; 1476 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1477 ctxt->nameMax * 1478 sizeof(ctxt->nameTab[0])); 1479 if (tmp == NULL) { 1480 ctxt->nameMax /= 2; 1481 goto mem_error; 1482 } 1483 ctxt->nameTab = tmp; 1484 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1485 ctxt->nameMax * 3 * 1486 sizeof(ctxt->pushTab[0])); 1487 if (tmp2 == NULL) { 1488 ctxt->nameMax /= 2; 1489 goto mem_error; 1490 } 1491 ctxt->pushTab = tmp2; 1492 } 1493 ctxt->nameTab[ctxt->nameNr] = value; 1494 ctxt->name = value; 1495 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1496 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1497 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1498 return (ctxt->nameNr++); 1499mem_error: 1500 xmlErrMemory(ctxt, NULL); 1501 return (-1); 1502} 1503/** 1504 * nameNsPop: 1505 * @ctxt: an XML parser context 1506 * 1507 * Pops the top element/prefix/URI name from the name stack 1508 * 1509 * Returns the name just removed 1510 */ 1511static const xmlChar * 1512nameNsPop(xmlParserCtxtPtr ctxt) 1513{ 1514 const xmlChar *ret; 1515 1516 if (ctxt->nameNr <= 0) 1517 return (NULL); 1518 ctxt->nameNr--; 1519 if (ctxt->nameNr > 0) 1520 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1521 else 1522 ctxt->name = NULL; 1523 ret = ctxt->nameTab[ctxt->nameNr]; 1524 ctxt->nameTab[ctxt->nameNr] = NULL; 1525 return (ret); 1526} 1527#endif /* LIBXML_PUSH_ENABLED */ 1528 1529/** 1530 * namePush: 1531 * @ctxt: an XML parser context 1532 * @value: the element name 1533 * 1534 * Pushes a new element name on top of the name stack 1535 * 1536 * Returns -1 in case of error, the index in the stack otherwise 1537 */ 1538int 1539namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1540{ 1541 if (ctxt == NULL) return (-1); 1542 1543 if (ctxt->nameNr >= ctxt->nameMax) { 1544 const xmlChar * *tmp; 1545 ctxt->nameMax *= 2; 1546 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1547 ctxt->nameMax * 1548 sizeof(ctxt->nameTab[0])); 1549 if (tmp == NULL) { 1550 ctxt->nameMax /= 2; 1551 goto mem_error; 1552 } 1553 ctxt->nameTab = tmp; 1554 } 1555 ctxt->nameTab[ctxt->nameNr] = value; 1556 ctxt->name = value; 1557 return (ctxt->nameNr++); 1558mem_error: 1559 xmlErrMemory(ctxt, NULL); 1560 return (-1); 1561} 1562/** 1563 * namePop: 1564 * @ctxt: an XML parser context 1565 * 1566 * Pops the top element name from the name stack 1567 * 1568 * Returns the name just removed 1569 */ 1570const xmlChar * 1571namePop(xmlParserCtxtPtr ctxt) 1572{ 1573 const xmlChar *ret; 1574 1575 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1576 return (NULL); 1577 ctxt->nameNr--; 1578 if (ctxt->nameNr > 0) 1579 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1580 else 1581 ctxt->name = NULL; 1582 ret = ctxt->nameTab[ctxt->nameNr]; 1583 ctxt->nameTab[ctxt->nameNr] = NULL; 1584 return (ret); 1585} 1586 1587static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1588 if (ctxt->spaceNr >= ctxt->spaceMax) { 1589 ctxt->spaceMax *= 2; 1590 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1591 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1592 if (ctxt->spaceTab == NULL) { 1593 xmlErrMemory(ctxt, NULL); 1594 return(0); 1595 } 1596 } 1597 ctxt->spaceTab[ctxt->spaceNr] = val; 1598 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1599 return(ctxt->spaceNr++); 1600} 1601 1602static int spacePop(xmlParserCtxtPtr ctxt) { 1603 int ret; 1604 if (ctxt->spaceNr <= 0) return(0); 1605 ctxt->spaceNr--; 1606 if (ctxt->spaceNr > 0) 1607 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1608 else 1609 ctxt->space = &ctxt->spaceTab[0]; 1610 ret = ctxt->spaceTab[ctxt->spaceNr]; 1611 ctxt->spaceTab[ctxt->spaceNr] = -1; 1612 return(ret); 1613} 1614 1615/* 1616 * Macros for accessing the content. Those should be used only by the parser, 1617 * and not exported. 1618 * 1619 * Dirty macros, i.e. one often need to make assumption on the context to 1620 * use them 1621 * 1622 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1623 * To be used with extreme caution since operations consuming 1624 * characters may move the input buffer to a different location ! 1625 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1626 * This should be used internally by the parser 1627 * only to compare to ASCII values otherwise it would break when 1628 * running with UTF-8 encoding. 1629 * RAW same as CUR but in the input buffer, bypass any token 1630 * extraction that may have been done 1631 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1632 * to compare on ASCII based substring. 1633 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1634 * strings without newlines within the parser. 1635 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1636 * defined char within the parser. 1637 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1638 * 1639 * NEXT Skip to the next character, this does the proper decoding 1640 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1641 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1642 * CUR_CHAR(l) returns the current unicode character (int), set l 1643 * to the number of xmlChars used for the encoding [0-5]. 1644 * CUR_SCHAR same but operate on a string instead of the context 1645 * COPY_BUF copy the current unicode char to the target buffer, increment 1646 * the index 1647 * GROW, SHRINK handling of input buffers 1648 */ 1649 1650#define RAW (*ctxt->input->cur) 1651#define CUR (*ctxt->input->cur) 1652#define NXT(val) ctxt->input->cur[(val)] 1653#define CUR_PTR ctxt->input->cur 1654 1655#define CMP4( s, c1, c2, c3, c4 ) \ 1656 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1657 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1658#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1659 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1660#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1661 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1662#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1663 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1664#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1665 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1666#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1667 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1668 ((unsigned char *) s)[ 8 ] == c9 ) 1669#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1670 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1671 ((unsigned char *) s)[ 9 ] == c10 ) 1672 1673#define SKIP(val) do { \ 1674 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1675 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1676 if ((*ctxt->input->cur == 0) && \ 1677 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1678 xmlPopInput(ctxt); \ 1679 } while (0) 1680 1681#define SKIPL(val) do { \ 1682 int skipl; \ 1683 for(skipl=0; skipl<val; skipl++) { \ 1684 if (*(ctxt->input->cur) == '\n') { \ 1685 ctxt->input->line++; ctxt->input->col = 1; \ 1686 } else ctxt->input->col++; \ 1687 ctxt->nbChars++; \ 1688 ctxt->input->cur++; \ 1689 } \ 1690 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1691 if ((*ctxt->input->cur == 0) && \ 1692 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1693 xmlPopInput(ctxt); \ 1694 } while (0) 1695 1696#define SHRINK if ((ctxt->progressive == 0) && \ 1697 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1698 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1699 xmlSHRINK (ctxt); 1700 1701static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1702 xmlParserInputShrink(ctxt->input); 1703 if ((*ctxt->input->cur == 0) && 1704 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1705 xmlPopInput(ctxt); 1706 } 1707 1708#define GROW if ((ctxt->progressive == 0) && \ 1709 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1710 xmlGROW (ctxt); 1711 1712static void xmlGROW (xmlParserCtxtPtr ctxt) { 1713 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1714 if ((*ctxt->input->cur == 0) && 1715 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1716 xmlPopInput(ctxt); 1717} 1718 1719#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1720 1721#define NEXT xmlNextChar(ctxt) 1722 1723#define NEXT1 { \ 1724 ctxt->input->col++; \ 1725 ctxt->input->cur++; \ 1726 ctxt->nbChars++; \ 1727 if (*ctxt->input->cur == 0) \ 1728 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1729 } 1730 1731#define NEXTL(l) do { \ 1732 if (*(ctxt->input->cur) == '\n') { \ 1733 ctxt->input->line++; ctxt->input->col = 1; \ 1734 } else ctxt->input->col++; \ 1735 ctxt->input->cur += l; \ 1736 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1737 } while (0) 1738 1739#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1740#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1741 1742#define COPY_BUF(l,b,i,v) \ 1743 if (l == 1) b[i++] = (xmlChar) v; \ 1744 else i += xmlCopyCharMultiByte(&b[i],v) 1745 1746/** 1747 * xmlSkipBlankChars: 1748 * @ctxt: the XML parser context 1749 * 1750 * skip all blanks character found at that point in the input streams. 1751 * It pops up finished entities in the process if allowable at that point. 1752 * 1753 * Returns the number of space chars skipped 1754 */ 1755 1756int 1757xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1758 int res = 0; 1759 1760 /* 1761 * It's Okay to use CUR/NEXT here since all the blanks are on 1762 * the ASCII range. 1763 */ 1764 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1765 const xmlChar *cur; 1766 /* 1767 * if we are in the document content, go really fast 1768 */ 1769 cur = ctxt->input->cur; 1770 while (IS_BLANK_CH(*cur)) { 1771 if (*cur == '\n') { 1772 ctxt->input->line++; ctxt->input->col = 1; 1773 } 1774 cur++; 1775 res++; 1776 if (*cur == 0) { 1777 ctxt->input->cur = cur; 1778 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1779 cur = ctxt->input->cur; 1780 } 1781 } 1782 ctxt->input->cur = cur; 1783 } else { 1784 int cur; 1785 do { 1786 cur = CUR; 1787 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1788 NEXT; 1789 cur = CUR; 1790 res++; 1791 } 1792 while ((cur == 0) && (ctxt->inputNr > 1) && 1793 (ctxt->instate != XML_PARSER_COMMENT)) { 1794 xmlPopInput(ctxt); 1795 cur = CUR; 1796 } 1797 /* 1798 * Need to handle support of entities branching here 1799 */ 1800 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1801 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1802 } 1803 return(res); 1804} 1805 1806/************************************************************************ 1807 * * 1808 * Commodity functions to handle entities * 1809 * * 1810 ************************************************************************/ 1811 1812/** 1813 * xmlPopInput: 1814 * @ctxt: an XML parser context 1815 * 1816 * xmlPopInput: the current input pointed by ctxt->input came to an end 1817 * pop it and return the next char. 1818 * 1819 * Returns the current xmlChar in the parser context 1820 */ 1821xmlChar 1822xmlPopInput(xmlParserCtxtPtr ctxt) { 1823 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1824 if (xmlParserDebugEntities) 1825 xmlGenericError(xmlGenericErrorContext, 1826 "Popping input %d\n", ctxt->inputNr); 1827 xmlFreeInputStream(inputPop(ctxt)); 1828 if ((*ctxt->input->cur == 0) && 1829 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1830 return(xmlPopInput(ctxt)); 1831 return(CUR); 1832} 1833 1834/** 1835 * xmlPushInput: 1836 * @ctxt: an XML parser context 1837 * @input: an XML parser input fragment (entity, XML fragment ...). 1838 * 1839 * xmlPushInput: switch to a new input stream which is stacked on top 1840 * of the previous one(s). 1841 */ 1842void 1843xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1844 if (input == NULL) return; 1845 1846 if (xmlParserDebugEntities) { 1847 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1848 xmlGenericError(xmlGenericErrorContext, 1849 "%s(%d): ", ctxt->input->filename, 1850 ctxt->input->line); 1851 xmlGenericError(xmlGenericErrorContext, 1852 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1853 } 1854 inputPush(ctxt, input); 1855 GROW; 1856} 1857 1858/** 1859 * xmlParseCharRef: 1860 * @ctxt: an XML parser context 1861 * 1862 * parse Reference declarations 1863 * 1864 * [66] CharRef ::= '&#' [0-9]+ ';' | 1865 * '&#x' [0-9a-fA-F]+ ';' 1866 * 1867 * [ WFC: Legal Character ] 1868 * Characters referred to using character references must match the 1869 * production for Char. 1870 * 1871 * Returns the value parsed (as an int), 0 in case of error 1872 */ 1873int 1874xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1875 unsigned int val = 0; 1876 int count = 0; 1877 unsigned int outofrange = 0; 1878 1879 /* 1880 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1881 */ 1882 if ((RAW == '&') && (NXT(1) == '#') && 1883 (NXT(2) == 'x')) { 1884 SKIP(3); 1885 GROW; 1886 while (RAW != ';') { /* loop blocked by count */ 1887 if (count++ > 20) { 1888 count = 0; 1889 GROW; 1890 } 1891 if ((RAW >= '0') && (RAW <= '9')) 1892 val = val * 16 + (CUR - '0'); 1893 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1894 val = val * 16 + (CUR - 'a') + 10; 1895 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1896 val = val * 16 + (CUR - 'A') + 10; 1897 else { 1898 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1899 val = 0; 1900 break; 1901 } 1902 if (val > 0x10FFFF) 1903 outofrange = val; 1904 1905 NEXT; 1906 count++; 1907 } 1908 if (RAW == ';') { 1909 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1910 ctxt->input->col++; 1911 ctxt->nbChars ++; 1912 ctxt->input->cur++; 1913 } 1914 } else if ((RAW == '&') && (NXT(1) == '#')) { 1915 SKIP(2); 1916 GROW; 1917 while (RAW != ';') { /* loop blocked by count */ 1918 if (count++ > 20) { 1919 count = 0; 1920 GROW; 1921 } 1922 if ((RAW >= '0') && (RAW <= '9')) 1923 val = val * 10 + (CUR - '0'); 1924 else { 1925 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1926 val = 0; 1927 break; 1928 } 1929 if (val > 0x10FFFF) 1930 outofrange = val; 1931 1932 NEXT; 1933 count++; 1934 } 1935 if (RAW == ';') { 1936 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1937 ctxt->input->col++; 1938 ctxt->nbChars ++; 1939 ctxt->input->cur++; 1940 } 1941 } else { 1942 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1943 } 1944 1945 /* 1946 * [ WFC: Legal Character ] 1947 * Characters referred to using character references must match the 1948 * production for Char. 1949 */ 1950 if ((IS_CHAR(val) && (outofrange == 0))) { 1951 return(val); 1952 } else { 1953 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1954 "xmlParseCharRef: invalid xmlChar value %d\n", 1955 val); 1956 } 1957 return(0); 1958} 1959 1960/** 1961 * xmlParseStringCharRef: 1962 * @ctxt: an XML parser context 1963 * @str: a pointer to an index in the string 1964 * 1965 * parse Reference declarations, variant parsing from a string rather 1966 * than an an input flow. 1967 * 1968 * [66] CharRef ::= '&#' [0-9]+ ';' | 1969 * '&#x' [0-9a-fA-F]+ ';' 1970 * 1971 * [ WFC: Legal Character ] 1972 * Characters referred to using character references must match the 1973 * production for Char. 1974 * 1975 * Returns the value parsed (as an int), 0 in case of error, str will be 1976 * updated to the current value of the index 1977 */ 1978static int 1979xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1980 const xmlChar *ptr; 1981 xmlChar cur; 1982 unsigned int val = 0; 1983 unsigned int outofrange = 0; 1984 1985 if ((str == NULL) || (*str == NULL)) return(0); 1986 ptr = *str; 1987 cur = *ptr; 1988 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1989 ptr += 3; 1990 cur = *ptr; 1991 while (cur != ';') { /* Non input consuming loop */ 1992 if ((cur >= '0') && (cur <= '9')) 1993 val = val * 16 + (cur - '0'); 1994 else if ((cur >= 'a') && (cur <= 'f')) 1995 val = val * 16 + (cur - 'a') + 10; 1996 else if ((cur >= 'A') && (cur <= 'F')) 1997 val = val * 16 + (cur - 'A') + 10; 1998 else { 1999 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2000 val = 0; 2001 break; 2002 } 2003 if (val > 0x10FFFF) 2004 outofrange = val; 2005 2006 ptr++; 2007 cur = *ptr; 2008 } 2009 if (cur == ';') 2010 ptr++; 2011 } else if ((cur == '&') && (ptr[1] == '#')){ 2012 ptr += 2; 2013 cur = *ptr; 2014 while (cur != ';') { /* Non input consuming loops */ 2015 if ((cur >= '0') && (cur <= '9')) 2016 val = val * 10 + (cur - '0'); 2017 else { 2018 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2019 val = 0; 2020 break; 2021 } 2022 if (val > 0x10FFFF) 2023 outofrange = val; 2024 2025 ptr++; 2026 cur = *ptr; 2027 } 2028 if (cur == ';') 2029 ptr++; 2030 } else { 2031 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2032 return(0); 2033 } 2034 *str = ptr; 2035 2036 /* 2037 * [ WFC: Legal Character ] 2038 * Characters referred to using character references must match the 2039 * production for Char. 2040 */ 2041 if ((IS_CHAR(val) && (outofrange == 0))) { 2042 return(val); 2043 } else { 2044 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2045 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2046 val); 2047 } 2048 return(0); 2049} 2050 2051/** 2052 * xmlNewBlanksWrapperInputStream: 2053 * @ctxt: an XML parser context 2054 * @entity: an Entity pointer 2055 * 2056 * Create a new input stream for wrapping 2057 * blanks around a PEReference 2058 * 2059 * Returns the new input stream or NULL 2060 */ 2061 2062static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2063 2064static xmlParserInputPtr 2065xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2066 xmlParserInputPtr input; 2067 xmlChar *buffer; 2068 size_t length; 2069 if (entity == NULL) { 2070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2071 "xmlNewBlanksWrapperInputStream entity\n"); 2072 return(NULL); 2073 } 2074 if (xmlParserDebugEntities) 2075 xmlGenericError(xmlGenericErrorContext, 2076 "new blanks wrapper for entity: %s\n", entity->name); 2077 input = xmlNewInputStream(ctxt); 2078 if (input == NULL) { 2079 return(NULL); 2080 } 2081 length = xmlStrlen(entity->name) + 5; 2082 buffer = xmlMallocAtomic(length); 2083 if (buffer == NULL) { 2084 xmlErrMemory(ctxt, NULL); 2085 xmlFree(input); 2086 return(NULL); 2087 } 2088 buffer [0] = ' '; 2089 buffer [1] = '%'; 2090 buffer [length-3] = ';'; 2091 buffer [length-2] = ' '; 2092 buffer [length-1] = 0; 2093 memcpy(buffer + 2, entity->name, length - 5); 2094 input->free = deallocblankswrapper; 2095 input->base = buffer; 2096 input->cur = buffer; 2097 input->length = length; 2098 input->end = &buffer[length]; 2099 return(input); 2100} 2101 2102/** 2103 * xmlParserHandlePEReference: 2104 * @ctxt: the parser context 2105 * 2106 * [69] PEReference ::= '%' Name ';' 2107 * 2108 * [ WFC: No Recursion ] 2109 * A parsed entity must not contain a recursive 2110 * reference to itself, either directly or indirectly. 2111 * 2112 * [ WFC: Entity Declared ] 2113 * In a document without any DTD, a document with only an internal DTD 2114 * subset which contains no parameter entity references, or a document 2115 * with "standalone='yes'", ... ... The declaration of a parameter 2116 * entity must precede any reference to it... 2117 * 2118 * [ VC: Entity Declared ] 2119 * In a document with an external subset or external parameter entities 2120 * with "standalone='no'", ... ... The declaration of a parameter entity 2121 * must precede any reference to it... 2122 * 2123 * [ WFC: In DTD ] 2124 * Parameter-entity references may only appear in the DTD. 2125 * NOTE: misleading but this is handled. 2126 * 2127 * A PEReference may have been detected in the current input stream 2128 * the handling is done accordingly to 2129 * http://www.w3.org/TR/REC-xml#entproc 2130 * i.e. 2131 * - Included in literal in entity values 2132 * - Included as Parameter Entity reference within DTDs 2133 */ 2134void 2135xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2136 const xmlChar *name; 2137 xmlEntityPtr entity = NULL; 2138 xmlParserInputPtr input; 2139 2140 if (RAW != '%') return; 2141 switch(ctxt->instate) { 2142 case XML_PARSER_CDATA_SECTION: 2143 return; 2144 case XML_PARSER_COMMENT: 2145 return; 2146 case XML_PARSER_START_TAG: 2147 return; 2148 case XML_PARSER_END_TAG: 2149 return; 2150 case XML_PARSER_EOF: 2151 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2152 return; 2153 case XML_PARSER_PROLOG: 2154 case XML_PARSER_START: 2155 case XML_PARSER_MISC: 2156 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2157 return; 2158 case XML_PARSER_ENTITY_DECL: 2159 case XML_PARSER_CONTENT: 2160 case XML_PARSER_ATTRIBUTE_VALUE: 2161 case XML_PARSER_PI: 2162 case XML_PARSER_SYSTEM_LITERAL: 2163 case XML_PARSER_PUBLIC_LITERAL: 2164 /* we just ignore it there */ 2165 return; 2166 case XML_PARSER_EPILOG: 2167 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2168 return; 2169 case XML_PARSER_ENTITY_VALUE: 2170 /* 2171 * NOTE: in the case of entity values, we don't do the 2172 * substitution here since we need the literal 2173 * entity value to be able to save the internal 2174 * subset of the document. 2175 * This will be handled by xmlStringDecodeEntities 2176 */ 2177 return; 2178 case XML_PARSER_DTD: 2179 /* 2180 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2181 * In the internal DTD subset, parameter-entity references 2182 * can occur only where markup declarations can occur, not 2183 * within markup declarations. 2184 * In that case this is handled in xmlParseMarkupDecl 2185 */ 2186 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2187 return; 2188 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2189 return; 2190 break; 2191 case XML_PARSER_IGNORE: 2192 return; 2193 } 2194 2195 NEXT; 2196 name = xmlParseName(ctxt); 2197 if (xmlParserDebugEntities) 2198 xmlGenericError(xmlGenericErrorContext, 2199 "PEReference: %s\n", name); 2200 if (name == NULL) { 2201 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2202 } else { 2203 if (RAW == ';') { 2204 NEXT; 2205 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2206 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2207 if (entity == NULL) { 2208 2209 /* 2210 * [ WFC: Entity Declared ] 2211 * In a document without any DTD, a document with only an 2212 * internal DTD subset which contains no parameter entity 2213 * references, or a document with "standalone='yes'", ... 2214 * ... The declaration of a parameter entity must precede 2215 * any reference to it... 2216 */ 2217 if ((ctxt->standalone == 1) || 2218 ((ctxt->hasExternalSubset == 0) && 2219 (ctxt->hasPErefs == 0))) { 2220 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2221 "PEReference: %%%s; not found\n", name); 2222 } else { 2223 /* 2224 * [ VC: Entity Declared ] 2225 * In a document with an external subset or external 2226 * parameter entities with "standalone='no'", ... 2227 * ... The declaration of a parameter entity must precede 2228 * any reference to it... 2229 */ 2230 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2231 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2232 "PEReference: %%%s; not found\n", 2233 name); 2234 } else 2235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2236 "PEReference: %%%s; not found\n", 2237 name, NULL); 2238 ctxt->valid = 0; 2239 } 2240 } else if (ctxt->input->free != deallocblankswrapper) { 2241 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2242 xmlPushInput(ctxt, input); 2243 } else { 2244 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2245 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2246 xmlChar start[4]; 2247 xmlCharEncoding enc; 2248 2249 /* 2250 * handle the extra spaces added before and after 2251 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2252 * this is done independently. 2253 */ 2254 input = xmlNewEntityInputStream(ctxt, entity); 2255 xmlPushInput(ctxt, input); 2256 2257 /* 2258 * Get the 4 first bytes and decode the charset 2259 * if enc != XML_CHAR_ENCODING_NONE 2260 * plug some encoding conversion routines. 2261 * Note that, since we may have some non-UTF8 2262 * encoding (like UTF16, bug 135229), the 'length' 2263 * is not known, but we can calculate based upon 2264 * the amount of data in the buffer. 2265 */ 2266 GROW 2267 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2268 start[0] = RAW; 2269 start[1] = NXT(1); 2270 start[2] = NXT(2); 2271 start[3] = NXT(3); 2272 enc = xmlDetectCharEncoding(start, 4); 2273 if (enc != XML_CHAR_ENCODING_NONE) { 2274 xmlSwitchEncoding(ctxt, enc); 2275 } 2276 } 2277 2278 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2279 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2280 (IS_BLANK_CH(NXT(5)))) { 2281 xmlParseTextDecl(ctxt); 2282 } 2283 } else { 2284 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2285 "PEReference: %s is not a parameter entity\n", 2286 name); 2287 } 2288 } 2289 } else { 2290 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2291 } 2292 } 2293} 2294 2295/* 2296 * Macro used to grow the current buffer. 2297 */ 2298#define growBuffer(buffer) { \ 2299 xmlChar *tmp; \ 2300 buffer##_size *= 2; \ 2301 tmp = (xmlChar *) \ 2302 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2303 if (tmp == NULL) goto mem_error; \ 2304 buffer = tmp; \ 2305} 2306 2307/** 2308 * xmlStringLenDecodeEntities: 2309 * @ctxt: the parser context 2310 * @str: the input string 2311 * @len: the string length 2312 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2313 * @end: an end marker xmlChar, 0 if none 2314 * @end2: an end marker xmlChar, 0 if none 2315 * @end3: an end marker xmlChar, 0 if none 2316 * 2317 * Takes a entity string content and process to do the adequate substitutions. 2318 * 2319 * [67] Reference ::= EntityRef | CharRef 2320 * 2321 * [69] PEReference ::= '%' Name ';' 2322 * 2323 * Returns A newly allocated string with the substitution done. The caller 2324 * must deallocate it ! 2325 */ 2326xmlChar * 2327xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2328 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2329 xmlChar *buffer = NULL; 2330 int buffer_size = 0; 2331 2332 xmlChar *current = NULL; 2333 xmlChar *rep = NULL; 2334 const xmlChar *last; 2335 xmlEntityPtr ent; 2336 int c,l; 2337 int nbchars = 0; 2338 2339 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2340 return(NULL); 2341 last = str + len; 2342 2343 if (ctxt->depth > 40) { 2344 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2345 return(NULL); 2346 } 2347 2348 /* 2349 * allocate a translation buffer. 2350 */ 2351 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2352 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2353 if (buffer == NULL) goto mem_error; 2354 2355 /* 2356 * OK loop until we reach one of the ending char or a size limit. 2357 * we are operating on already parsed values. 2358 */ 2359 if (str < last) 2360 c = CUR_SCHAR(str, l); 2361 else 2362 c = 0; 2363 while ((c != 0) && (c != end) && /* non input consuming loop */ 2364 (c != end2) && (c != end3)) { 2365 2366 if (c == 0) break; 2367 if ((c == '&') && (str[1] == '#')) { 2368 int val = xmlParseStringCharRef(ctxt, &str); 2369 if (val != 0) { 2370 COPY_BUF(0,buffer,nbchars,val); 2371 } 2372 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2373 growBuffer(buffer); 2374 } 2375 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2376 if (xmlParserDebugEntities) 2377 xmlGenericError(xmlGenericErrorContext, 2378 "String decoding Entity Reference: %.30s\n", 2379 str); 2380 ent = xmlParseStringEntityRef(ctxt, &str); 2381 if ((ent != NULL) && 2382 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2383 if (ent->content != NULL) { 2384 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2385 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2386 growBuffer(buffer); 2387 } 2388 } else { 2389 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2390 "predefined entity has no content\n"); 2391 } 2392 } else if ((ent != NULL) && (ent->content != NULL)) { 2393 ctxt->depth++; 2394 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2395 0, 0, 0); 2396 ctxt->depth--; 2397 if (rep != NULL) { 2398 current = rep; 2399 while (*current != 0) { /* non input consuming loop */ 2400 buffer[nbchars++] = *current++; 2401 if (nbchars > 2402 buffer_size - XML_PARSER_BUFFER_SIZE) { 2403 growBuffer(buffer); 2404 } 2405 } 2406 xmlFree(rep); 2407 rep = NULL; 2408 } 2409 } else if (ent != NULL) { 2410 int i = xmlStrlen(ent->name); 2411 const xmlChar *cur = ent->name; 2412 2413 buffer[nbchars++] = '&'; 2414 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2415 growBuffer(buffer); 2416 } 2417 for (;i > 0;i--) 2418 buffer[nbchars++] = *cur++; 2419 buffer[nbchars++] = ';'; 2420 } 2421 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2422 if (xmlParserDebugEntities) 2423 xmlGenericError(xmlGenericErrorContext, 2424 "String decoding PE Reference: %.30s\n", str); 2425 ent = xmlParseStringPEReference(ctxt, &str); 2426 if (ent != NULL) { 2427 if (ent->content == NULL) { 2428 if (xmlLoadEntityContent(ctxt, ent) < 0) { 2429 } 2430 } 2431 ctxt->depth++; 2432 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2433 0, 0, 0); 2434 ctxt->depth--; 2435 if (rep != NULL) { 2436 current = rep; 2437 while (*current != 0) { /* non input consuming loop */ 2438 buffer[nbchars++] = *current++; 2439 if (nbchars > 2440 buffer_size - XML_PARSER_BUFFER_SIZE) { 2441 growBuffer(buffer); 2442 } 2443 } 2444 xmlFree(rep); 2445 rep = NULL; 2446 } 2447 } 2448 } else { 2449 COPY_BUF(l,buffer,nbchars,c); 2450 str += l; 2451 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2452 growBuffer(buffer); 2453 } 2454 } 2455 if (str < last) 2456 c = CUR_SCHAR(str, l); 2457 else 2458 c = 0; 2459 } 2460 buffer[nbchars++] = 0; 2461 return(buffer); 2462 2463mem_error: 2464 xmlErrMemory(ctxt, NULL); 2465 if (rep != NULL) 2466 xmlFree(rep); 2467 if (buffer != NULL) 2468 xmlFree(buffer); 2469 return(NULL); 2470} 2471 2472/** 2473 * xmlStringDecodeEntities: 2474 * @ctxt: the parser context 2475 * @str: the input string 2476 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2477 * @end: an end marker xmlChar, 0 if none 2478 * @end2: an end marker xmlChar, 0 if none 2479 * @end3: an end marker xmlChar, 0 if none 2480 * 2481 * Takes a entity string content and process to do the adequate substitutions. 2482 * 2483 * [67] Reference ::= EntityRef | CharRef 2484 * 2485 * [69] PEReference ::= '%' Name ';' 2486 * 2487 * Returns A newly allocated string with the substitution done. The caller 2488 * must deallocate it ! 2489 */ 2490xmlChar * 2491xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2492 xmlChar end, xmlChar end2, xmlChar end3) { 2493 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2494 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2495 end, end2, end3)); 2496} 2497 2498/************************************************************************ 2499 * * 2500 * Commodity functions, cleanup needed ? * 2501 * * 2502 ************************************************************************/ 2503 2504/** 2505 * areBlanks: 2506 * @ctxt: an XML parser context 2507 * @str: a xmlChar * 2508 * @len: the size of @str 2509 * @blank_chars: we know the chars are blanks 2510 * 2511 * Is this a sequence of blank chars that one can ignore ? 2512 * 2513 * Returns 1 if ignorable 0 otherwise. 2514 */ 2515 2516static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2517 int blank_chars) { 2518 int i, ret; 2519 xmlNodePtr lastChild; 2520 2521 /* 2522 * Don't spend time trying to differentiate them, the same callback is 2523 * used ! 2524 */ 2525 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2526 return(0); 2527 2528 /* 2529 * Check for xml:space value. 2530 */ 2531 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2532 (*(ctxt->space) == -2)) 2533 return(0); 2534 2535 /* 2536 * Check that the string is made of blanks 2537 */ 2538 if (blank_chars == 0) { 2539 for (i = 0;i < len;i++) 2540 if (!(IS_BLANK_CH(str[i]))) return(0); 2541 } 2542 2543 /* 2544 * Look if the element is mixed content in the DTD if available 2545 */ 2546 if (ctxt->node == NULL) return(0); 2547 if (ctxt->myDoc != NULL) { 2548 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2549 if (ret == 0) return(1); 2550 if (ret == 1) return(0); 2551 } 2552 2553 /* 2554 * Otherwise, heuristic :-\ 2555 */ 2556 if ((RAW != '<') && (RAW != 0xD)) return(0); 2557 if ((ctxt->node->children == NULL) && 2558 (RAW == '<') && (NXT(1) == '/')) return(0); 2559 2560 lastChild = xmlGetLastChild(ctxt->node); 2561 if (lastChild == NULL) { 2562 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2563 (ctxt->node->content != NULL)) return(0); 2564 } else if (xmlNodeIsText(lastChild)) 2565 return(0); 2566 else if ((ctxt->node->children != NULL) && 2567 (xmlNodeIsText(ctxt->node->children))) 2568 return(0); 2569 return(1); 2570} 2571 2572/************************************************************************ 2573 * * 2574 * Extra stuff for namespace support * 2575 * Relates to http://www.w3.org/TR/WD-xml-names * 2576 * * 2577 ************************************************************************/ 2578 2579/** 2580 * xmlSplitQName: 2581 * @ctxt: an XML parser context 2582 * @name: an XML parser context 2583 * @prefix: a xmlChar ** 2584 * 2585 * parse an UTF8 encoded XML qualified name string 2586 * 2587 * [NS 5] QName ::= (Prefix ':')? LocalPart 2588 * 2589 * [NS 6] Prefix ::= NCName 2590 * 2591 * [NS 7] LocalPart ::= NCName 2592 * 2593 * Returns the local part, and prefix is updated 2594 * to get the Prefix if any. 2595 */ 2596 2597xmlChar * 2598xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2599 xmlChar buf[XML_MAX_NAMELEN + 5]; 2600 xmlChar *buffer = NULL; 2601 int len = 0; 2602 int max = XML_MAX_NAMELEN; 2603 xmlChar *ret = NULL; 2604 const xmlChar *cur = name; 2605 int c; 2606 2607 if (prefix == NULL) return(NULL); 2608 *prefix = NULL; 2609 2610 if (cur == NULL) return(NULL); 2611 2612#ifndef XML_XML_NAMESPACE 2613 /* xml: prefix is not really a namespace */ 2614 if ((cur[0] == 'x') && (cur[1] == 'm') && 2615 (cur[2] == 'l') && (cur[3] == ':')) 2616 return(xmlStrdup(name)); 2617#endif 2618 2619 /* nasty but well=formed */ 2620 if (cur[0] == ':') 2621 return(xmlStrdup(name)); 2622 2623 c = *cur++; 2624 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2625 buf[len++] = c; 2626 c = *cur++; 2627 } 2628 if (len >= max) { 2629 /* 2630 * Okay someone managed to make a huge name, so he's ready to pay 2631 * for the processing speed. 2632 */ 2633 max = len * 2; 2634 2635 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2636 if (buffer == NULL) { 2637 xmlErrMemory(ctxt, NULL); 2638 return(NULL); 2639 } 2640 memcpy(buffer, buf, len); 2641 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2642 if (len + 10 > max) { 2643 xmlChar *tmp; 2644 2645 max *= 2; 2646 tmp = (xmlChar *) xmlRealloc(buffer, 2647 max * sizeof(xmlChar)); 2648 if (tmp == NULL) { 2649 xmlFree(buffer); 2650 xmlErrMemory(ctxt, NULL); 2651 return(NULL); 2652 } 2653 buffer = tmp; 2654 } 2655 buffer[len++] = c; 2656 c = *cur++; 2657 } 2658 buffer[len] = 0; 2659 } 2660 2661 if ((c == ':') && (*cur == 0)) { 2662 if (buffer != NULL) 2663 xmlFree(buffer); 2664 *prefix = NULL; 2665 return(xmlStrdup(name)); 2666 } 2667 2668 if (buffer == NULL) 2669 ret = xmlStrndup(buf, len); 2670 else { 2671 ret = buffer; 2672 buffer = NULL; 2673 max = XML_MAX_NAMELEN; 2674 } 2675 2676 2677 if (c == ':') { 2678 c = *cur; 2679 *prefix = ret; 2680 if (c == 0) { 2681 return(xmlStrndup(BAD_CAST "", 0)); 2682 } 2683 len = 0; 2684 2685 /* 2686 * Check that the first character is proper to start 2687 * a new name 2688 */ 2689 if (!(((c >= 0x61) && (c <= 0x7A)) || 2690 ((c >= 0x41) && (c <= 0x5A)) || 2691 (c == '_') || (c == ':'))) { 2692 int l; 2693 int first = CUR_SCHAR(cur, l); 2694 2695 if (!IS_LETTER(first) && (first != '_')) { 2696 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2697 "Name %s is not XML Namespace compliant\n", 2698 name); 2699 } 2700 } 2701 cur++; 2702 2703 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2704 buf[len++] = c; 2705 c = *cur++; 2706 } 2707 if (len >= max) { 2708 /* 2709 * Okay someone managed to make a huge name, so he's ready to pay 2710 * for the processing speed. 2711 */ 2712 max = len * 2; 2713 2714 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2715 if (buffer == NULL) { 2716 xmlErrMemory(ctxt, NULL); 2717 return(NULL); 2718 } 2719 memcpy(buffer, buf, len); 2720 while (c != 0) { /* tested bigname2.xml */ 2721 if (len + 10 > max) { 2722 xmlChar *tmp; 2723 2724 max *= 2; 2725 tmp = (xmlChar *) xmlRealloc(buffer, 2726 max * sizeof(xmlChar)); 2727 if (tmp == NULL) { 2728 xmlErrMemory(ctxt, NULL); 2729 xmlFree(buffer); 2730 return(NULL); 2731 } 2732 buffer = tmp; 2733 } 2734 buffer[len++] = c; 2735 c = *cur++; 2736 } 2737 buffer[len] = 0; 2738 } 2739 2740 if (buffer == NULL) 2741 ret = xmlStrndup(buf, len); 2742 else { 2743 ret = buffer; 2744 } 2745 } 2746 2747 return(ret); 2748} 2749 2750/************************************************************************ 2751 * * 2752 * The parser itself * 2753 * Relates to http://www.w3.org/TR/REC-xml * 2754 * * 2755 ************************************************************************/ 2756 2757static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2758static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2759 int *len, int *alloc, int normalize); 2760 2761/** 2762 * xmlParseName: 2763 * @ctxt: an XML parser context 2764 * 2765 * parse an XML name. 2766 * 2767 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2768 * CombiningChar | Extender 2769 * 2770 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2771 * 2772 * [6] Names ::= Name (#x20 Name)* 2773 * 2774 * Returns the Name parsed or NULL 2775 */ 2776 2777const xmlChar * 2778xmlParseName(xmlParserCtxtPtr ctxt) { 2779 const xmlChar *in; 2780 const xmlChar *ret; 2781 int count = 0; 2782 2783 GROW; 2784 2785 /* 2786 * Accelerator for simple ASCII names 2787 */ 2788 in = ctxt->input->cur; 2789 if (((*in >= 0x61) && (*in <= 0x7A)) || 2790 ((*in >= 0x41) && (*in <= 0x5A)) || 2791 (*in == '_') || (*in == ':')) { 2792 in++; 2793 while (((*in >= 0x61) && (*in <= 0x7A)) || 2794 ((*in >= 0x41) && (*in <= 0x5A)) || 2795 ((*in >= 0x30) && (*in <= 0x39)) || 2796 (*in == '_') || (*in == '-') || 2797 (*in == ':') || (*in == '.')) 2798 in++; 2799 if ((*in > 0) && (*in < 0x80)) { 2800 count = in - ctxt->input->cur; 2801 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2802 ctxt->input->cur = in; 2803 ctxt->nbChars += count; 2804 ctxt->input->col += count; 2805 if (ret == NULL) 2806 xmlErrMemory(ctxt, NULL); 2807 return(ret); 2808 } 2809 } 2810 return(xmlParseNameComplex(ctxt)); 2811} 2812 2813/** 2814 * xmlParseNameAndCompare: 2815 * @ctxt: an XML parser context 2816 * 2817 * parse an XML name and compares for match 2818 * (specialized for endtag parsing) 2819 * 2820 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2821 * and the name for mismatch 2822 */ 2823 2824static const xmlChar * 2825xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2826 register const xmlChar *cmp = other; 2827 register const xmlChar *in; 2828 const xmlChar *ret; 2829 2830 GROW; 2831 2832 in = ctxt->input->cur; 2833 while (*in != 0 && *in == *cmp) { 2834 ++in; 2835 ++cmp; 2836 ctxt->input->col++; 2837 } 2838 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2839 /* success */ 2840 ctxt->input->cur = in; 2841 return (const xmlChar*) 1; 2842 } 2843 /* failure (or end of input buffer), check with full function */ 2844 ret = xmlParseName (ctxt); 2845 /* strings coming from the dictionnary direct compare possible */ 2846 if (ret == other) { 2847 return (const xmlChar*) 1; 2848 } 2849 return ret; 2850} 2851 2852static const xmlChar * 2853xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2854 int len = 0, l; 2855 int c; 2856 int count = 0; 2857 2858 /* 2859 * Handler for more complex cases 2860 */ 2861 GROW; 2862 c = CUR_CHAR(l); 2863 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2864 (!IS_LETTER(c) && (c != '_') && 2865 (c != ':'))) { 2866 return(NULL); 2867 } 2868 2869 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2870 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2871 (c == '.') || (c == '-') || 2872 (c == '_') || (c == ':') || 2873 (IS_COMBINING(c)) || 2874 (IS_EXTENDER(c)))) { 2875 if (count++ > 100) { 2876 count = 0; 2877 GROW; 2878 } 2879 len += l; 2880 NEXTL(l); 2881 c = CUR_CHAR(l); 2882 } 2883 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 2884 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 2885 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2886} 2887 2888/** 2889 * xmlParseStringName: 2890 * @ctxt: an XML parser context 2891 * @str: a pointer to the string pointer (IN/OUT) 2892 * 2893 * parse an XML name. 2894 * 2895 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2896 * CombiningChar | Extender 2897 * 2898 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2899 * 2900 * [6] Names ::= Name (#x20 Name)* 2901 * 2902 * Returns the Name parsed or NULL. The @str pointer 2903 * is updated to the current location in the string. 2904 */ 2905 2906static xmlChar * 2907xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2908 xmlChar buf[XML_MAX_NAMELEN + 5]; 2909 const xmlChar *cur = *str; 2910 int len = 0, l; 2911 int c; 2912 2913 c = CUR_SCHAR(cur, l); 2914 if (!IS_LETTER(c) && (c != '_') && 2915 (c != ':')) { 2916 return(NULL); 2917 } 2918 2919 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2920 (c == '.') || (c == '-') || 2921 (c == '_') || (c == ':') || 2922 (IS_COMBINING(c)) || 2923 (IS_EXTENDER(c))) { 2924 COPY_BUF(l,buf,len,c); 2925 cur += l; 2926 c = CUR_SCHAR(cur, l); 2927 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2928 /* 2929 * Okay someone managed to make a huge name, so he's ready to pay 2930 * for the processing speed. 2931 */ 2932 xmlChar *buffer; 2933 int max = len * 2; 2934 2935 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2936 if (buffer == NULL) { 2937 xmlErrMemory(ctxt, NULL); 2938 return(NULL); 2939 } 2940 memcpy(buffer, buf, len); 2941 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2942 /* test bigentname.xml */ 2943 (c == '.') || (c == '-') || 2944 (c == '_') || (c == ':') || 2945 (IS_COMBINING(c)) || 2946 (IS_EXTENDER(c))) { 2947 if (len + 10 > max) { 2948 xmlChar *tmp; 2949 max *= 2; 2950 tmp = (xmlChar *) xmlRealloc(buffer, 2951 max * sizeof(xmlChar)); 2952 if (tmp == NULL) { 2953 xmlErrMemory(ctxt, NULL); 2954 xmlFree(buffer); 2955 return(NULL); 2956 } 2957 buffer = tmp; 2958 } 2959 COPY_BUF(l,buffer,len,c); 2960 cur += l; 2961 c = CUR_SCHAR(cur, l); 2962 } 2963 buffer[len] = 0; 2964 *str = cur; 2965 return(buffer); 2966 } 2967 } 2968 *str = cur; 2969 return(xmlStrndup(buf, len)); 2970} 2971 2972/** 2973 * xmlParseNmtoken: 2974 * @ctxt: an XML parser context 2975 * 2976 * parse an XML Nmtoken. 2977 * 2978 * [7] Nmtoken ::= (NameChar)+ 2979 * 2980 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 2981 * 2982 * Returns the Nmtoken parsed or NULL 2983 */ 2984 2985xmlChar * 2986xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2987 xmlChar buf[XML_MAX_NAMELEN + 5]; 2988 int len = 0, l; 2989 int c; 2990 int count = 0; 2991 2992 GROW; 2993 c = CUR_CHAR(l); 2994 2995 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2996 (c == '.') || (c == '-') || 2997 (c == '_') || (c == ':') || 2998 (IS_COMBINING(c)) || 2999 (IS_EXTENDER(c))) { 3000 if (count++ > 100) { 3001 count = 0; 3002 GROW; 3003 } 3004 COPY_BUF(l,buf,len,c); 3005 NEXTL(l); 3006 c = CUR_CHAR(l); 3007 if (len >= XML_MAX_NAMELEN) { 3008 /* 3009 * Okay someone managed to make a huge token, so he's ready to pay 3010 * for the processing speed. 3011 */ 3012 xmlChar *buffer; 3013 int max = len * 2; 3014 3015 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3016 if (buffer == NULL) { 3017 xmlErrMemory(ctxt, NULL); 3018 return(NULL); 3019 } 3020 memcpy(buffer, buf, len); 3021 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 3022 (c == '.') || (c == '-') || 3023 (c == '_') || (c == ':') || 3024 (IS_COMBINING(c)) || 3025 (IS_EXTENDER(c))) { 3026 if (count++ > 100) { 3027 count = 0; 3028 GROW; 3029 } 3030 if (len + 10 > max) { 3031 xmlChar *tmp; 3032 3033 max *= 2; 3034 tmp = (xmlChar *) xmlRealloc(buffer, 3035 max * sizeof(xmlChar)); 3036 if (tmp == NULL) { 3037 xmlErrMemory(ctxt, NULL); 3038 xmlFree(buffer); 3039 return(NULL); 3040 } 3041 buffer = tmp; 3042 } 3043 COPY_BUF(l,buffer,len,c); 3044 NEXTL(l); 3045 c = CUR_CHAR(l); 3046 } 3047 buffer[len] = 0; 3048 return(buffer); 3049 } 3050 } 3051 if (len == 0) 3052 return(NULL); 3053 return(xmlStrndup(buf, len)); 3054} 3055 3056/** 3057 * xmlParseEntityValue: 3058 * @ctxt: an XML parser context 3059 * @orig: if non-NULL store a copy of the original entity value 3060 * 3061 * parse a value for ENTITY declarations 3062 * 3063 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3064 * "'" ([^%&'] | PEReference | Reference)* "'" 3065 * 3066 * Returns the EntityValue parsed with reference substituted or NULL 3067 */ 3068 3069xmlChar * 3070xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3071 xmlChar *buf = NULL; 3072 int len = 0; 3073 int size = XML_PARSER_BUFFER_SIZE; 3074 int c, l; 3075 xmlChar stop; 3076 xmlChar *ret = NULL; 3077 const xmlChar *cur = NULL; 3078 xmlParserInputPtr input; 3079 3080 if (RAW == '"') stop = '"'; 3081 else if (RAW == '\'') stop = '\''; 3082 else { 3083 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3084 return(NULL); 3085 } 3086 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3087 if (buf == NULL) { 3088 xmlErrMemory(ctxt, NULL); 3089 return(NULL); 3090 } 3091 3092 /* 3093 * The content of the entity definition is copied in a buffer. 3094 */ 3095 3096 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3097 input = ctxt->input; 3098 GROW; 3099 NEXT; 3100 c = CUR_CHAR(l); 3101 /* 3102 * NOTE: 4.4.5 Included in Literal 3103 * When a parameter entity reference appears in a literal entity 3104 * value, ... a single or double quote character in the replacement 3105 * text is always treated as a normal data character and will not 3106 * terminate the literal. 3107 * In practice it means we stop the loop only when back at parsing 3108 * the initial entity and the quote is found 3109 */ 3110 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3111 (ctxt->input != input))) { 3112 if (len + 5 >= size) { 3113 xmlChar *tmp; 3114 3115 size *= 2; 3116 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3117 if (tmp == NULL) { 3118 xmlErrMemory(ctxt, NULL); 3119 xmlFree(buf); 3120 return(NULL); 3121 } 3122 buf = tmp; 3123 } 3124 COPY_BUF(l,buf,len,c); 3125 NEXTL(l); 3126 /* 3127 * Pop-up of finished entities. 3128 */ 3129 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3130 xmlPopInput(ctxt); 3131 3132 GROW; 3133 c = CUR_CHAR(l); 3134 if (c == 0) { 3135 GROW; 3136 c = CUR_CHAR(l); 3137 } 3138 } 3139 buf[len] = 0; 3140 3141 /* 3142 * Raise problem w.r.t. '&' and '%' being used in non-entities 3143 * reference constructs. Note Charref will be handled in 3144 * xmlStringDecodeEntities() 3145 */ 3146 cur = buf; 3147 while (*cur != 0) { /* non input consuming */ 3148 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3149 xmlChar *name; 3150 xmlChar tmp = *cur; 3151 3152 cur++; 3153 name = xmlParseStringName(ctxt, &cur); 3154 if ((name == NULL) || (*cur != ';')) { 3155 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3156 "EntityValue: '%c' forbidden except for entities references\n", 3157 tmp); 3158 } 3159 if ((tmp == '%') && (ctxt->inSubset == 1) && 3160 (ctxt->inputNr == 1)) { 3161 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3162 } 3163 if (name != NULL) 3164 xmlFree(name); 3165 if (*cur == 0) 3166 break; 3167 } 3168 cur++; 3169 } 3170 3171 /* 3172 * Then PEReference entities are substituted. 3173 */ 3174 if (c != stop) { 3175 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3176 xmlFree(buf); 3177 } else { 3178 NEXT; 3179 /* 3180 * NOTE: 4.4.7 Bypassed 3181 * When a general entity reference appears in the EntityValue in 3182 * an entity declaration, it is bypassed and left as is. 3183 * so XML_SUBSTITUTE_REF is not set here. 3184 */ 3185 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3186 0, 0, 0); 3187 if (orig != NULL) 3188 *orig = buf; 3189 else 3190 xmlFree(buf); 3191 } 3192 3193 return(ret); 3194} 3195 3196/** 3197 * xmlParseAttValueComplex: 3198 * @ctxt: an XML parser context 3199 * @len: the resulting attribute len 3200 * @normalize: wether to apply the inner normalization 3201 * 3202 * parse a value for an attribute, this is the fallback function 3203 * of xmlParseAttValue() when the attribute parsing requires handling 3204 * of non-ASCII characters, or normalization compaction. 3205 * 3206 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3207 */ 3208static xmlChar * 3209xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3210 xmlChar limit = 0; 3211 xmlChar *buf = NULL; 3212 xmlChar *rep = NULL; 3213 int len = 0; 3214 int buf_size = 0; 3215 int c, l, in_space = 0; 3216 xmlChar *current = NULL; 3217 xmlEntityPtr ent; 3218 3219 if (NXT(0) == '"') { 3220 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3221 limit = '"'; 3222 NEXT; 3223 } else if (NXT(0) == '\'') { 3224 limit = '\''; 3225 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3226 NEXT; 3227 } else { 3228 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3229 return(NULL); 3230 } 3231 3232 /* 3233 * allocate a translation buffer. 3234 */ 3235 buf_size = XML_PARSER_BUFFER_SIZE; 3236 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3237 if (buf == NULL) goto mem_error; 3238 3239 /* 3240 * OK loop until we reach one of the ending char or a size limit. 3241 */ 3242 c = CUR_CHAR(l); 3243 while ((NXT(0) != limit) && /* checked */ 3244 (IS_CHAR(c)) && (c != '<')) { 3245 if (c == 0) break; 3246 if (c == '&') { 3247 in_space = 0; 3248 if (NXT(1) == '#') { 3249 int val = xmlParseCharRef(ctxt); 3250 3251 if (val == '&') { 3252 if (ctxt->replaceEntities) { 3253 if (len > buf_size - 10) { 3254 growBuffer(buf); 3255 } 3256 buf[len++] = '&'; 3257 } else { 3258 /* 3259 * The reparsing will be done in xmlStringGetNodeList() 3260 * called by the attribute() function in SAX.c 3261 */ 3262 if (len > buf_size - 10) { 3263 growBuffer(buf); 3264 } 3265 buf[len++] = '&'; 3266 buf[len++] = '#'; 3267 buf[len++] = '3'; 3268 buf[len++] = '8'; 3269 buf[len++] = ';'; 3270 } 3271 } else if (val != 0) { 3272 if (len > buf_size - 10) { 3273 growBuffer(buf); 3274 } 3275 len += xmlCopyChar(0, &buf[len], val); 3276 } 3277 } else { 3278 ent = xmlParseEntityRef(ctxt); 3279 if ((ent != NULL) && 3280 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3281 if (len > buf_size - 10) { 3282 growBuffer(buf); 3283 } 3284 if ((ctxt->replaceEntities == 0) && 3285 (ent->content[0] == '&')) { 3286 buf[len++] = '&'; 3287 buf[len++] = '#'; 3288 buf[len++] = '3'; 3289 buf[len++] = '8'; 3290 buf[len++] = ';'; 3291 } else { 3292 buf[len++] = ent->content[0]; 3293 } 3294 } else if ((ent != NULL) && 3295 (ctxt->replaceEntities != 0)) { 3296 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3297 rep = xmlStringDecodeEntities(ctxt, ent->content, 3298 XML_SUBSTITUTE_REF, 3299 0, 0, 0); 3300 if (rep != NULL) { 3301 current = rep; 3302 while (*current != 0) { /* non input consuming */ 3303 buf[len++] = *current++; 3304 if (len > buf_size - 10) { 3305 growBuffer(buf); 3306 } 3307 } 3308 xmlFree(rep); 3309 rep = NULL; 3310 } 3311 } else { 3312 if (len > buf_size - 10) { 3313 growBuffer(buf); 3314 } 3315 if (ent->content != NULL) 3316 buf[len++] = ent->content[0]; 3317 } 3318 } else if (ent != NULL) { 3319 int i = xmlStrlen(ent->name); 3320 const xmlChar *cur = ent->name; 3321 3322 /* 3323 * This may look absurd but is needed to detect 3324 * entities problems 3325 */ 3326 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3327 (ent->content != NULL)) { 3328 xmlChar *rep; 3329 rep = xmlStringDecodeEntities(ctxt, ent->content, 3330 XML_SUBSTITUTE_REF, 0, 0, 0); 3331 if (rep != NULL) { 3332 xmlFree(rep); 3333 rep = NULL; 3334 } 3335 } 3336 3337 /* 3338 * Just output the reference 3339 */ 3340 buf[len++] = '&'; 3341 if (len > buf_size - i - 10) { 3342 growBuffer(buf); 3343 } 3344 for (;i > 0;i--) 3345 buf[len++] = *cur++; 3346 buf[len++] = ';'; 3347 } 3348 } 3349 } else { 3350 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3351 if ((len != 0) || (!normalize)) { 3352 if ((!normalize) || (!in_space)) { 3353 COPY_BUF(l,buf,len,0x20); 3354 if (len > buf_size - 10) { 3355 growBuffer(buf); 3356 } 3357 } 3358 in_space = 1; 3359 } 3360 } else { 3361 in_space = 0; 3362 COPY_BUF(l,buf,len,c); 3363 if (len > buf_size - 10) { 3364 growBuffer(buf); 3365 } 3366 } 3367 NEXTL(l); 3368 } 3369 GROW; 3370 c = CUR_CHAR(l); 3371 } 3372 if ((in_space) && (normalize)) { 3373 while (buf[len - 1] == 0x20) len--; 3374 } 3375 buf[len] = 0; 3376 if (RAW == '<') { 3377 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3378 } else if (RAW != limit) { 3379 if ((c != 0) && (!IS_CHAR(c))) { 3380 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3381 "invalid character in attribute value\n"); 3382 } else { 3383 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3384 "AttValue: ' expected\n"); 3385 } 3386 } else 3387 NEXT; 3388 if (attlen != NULL) *attlen = len; 3389 return(buf); 3390 3391mem_error: 3392 xmlErrMemory(ctxt, NULL); 3393 if (buf != NULL) 3394 xmlFree(buf); 3395 if (rep != NULL) 3396 xmlFree(rep); 3397 return(NULL); 3398} 3399 3400/** 3401 * xmlParseAttValue: 3402 * @ctxt: an XML parser context 3403 * 3404 * parse a value for an attribute 3405 * Note: the parser won't do substitution of entities here, this 3406 * will be handled later in xmlStringGetNodeList 3407 * 3408 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3409 * "'" ([^<&'] | Reference)* "'" 3410 * 3411 * 3.3.3 Attribute-Value Normalization: 3412 * Before the value of an attribute is passed to the application or 3413 * checked for validity, the XML processor must normalize it as follows: 3414 * - a character reference is processed by appending the referenced 3415 * character to the attribute value 3416 * - an entity reference is processed by recursively processing the 3417 * replacement text of the entity 3418 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3419 * appending #x20 to the normalized value, except that only a single 3420 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3421 * parsed entity or the literal entity value of an internal parsed entity 3422 * - other characters are processed by appending them to the normalized value 3423 * If the declared value is not CDATA, then the XML processor must further 3424 * process the normalized attribute value by discarding any leading and 3425 * trailing space (#x20) characters, and by replacing sequences of space 3426 * (#x20) characters by a single space (#x20) character. 3427 * All attributes for which no declaration has been read should be treated 3428 * by a non-validating parser as if declared CDATA. 3429 * 3430 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3431 */ 3432 3433 3434xmlChar * 3435xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3436 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3437 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3438} 3439 3440/** 3441 * xmlParseSystemLiteral: 3442 * @ctxt: an XML parser context 3443 * 3444 * parse an XML Literal 3445 * 3446 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3447 * 3448 * Returns the SystemLiteral parsed or NULL 3449 */ 3450 3451xmlChar * 3452xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3453 xmlChar *buf = NULL; 3454 int len = 0; 3455 int size = XML_PARSER_BUFFER_SIZE; 3456 int cur, l; 3457 xmlChar stop; 3458 int state = ctxt->instate; 3459 int count = 0; 3460 3461 SHRINK; 3462 if (RAW == '"') { 3463 NEXT; 3464 stop = '"'; 3465 } else if (RAW == '\'') { 3466 NEXT; 3467 stop = '\''; 3468 } else { 3469 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3470 return(NULL); 3471 } 3472 3473 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3474 if (buf == NULL) { 3475 xmlErrMemory(ctxt, NULL); 3476 return(NULL); 3477 } 3478 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3479 cur = CUR_CHAR(l); 3480 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3481 if (len + 5 >= size) { 3482 xmlChar *tmp; 3483 3484 size *= 2; 3485 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3486 if (tmp == NULL) { 3487 xmlFree(buf); 3488 xmlErrMemory(ctxt, NULL); 3489 ctxt->instate = (xmlParserInputState) state; 3490 return(NULL); 3491 } 3492 buf = tmp; 3493 } 3494 count++; 3495 if (count > 50) { 3496 GROW; 3497 count = 0; 3498 } 3499 COPY_BUF(l,buf,len,cur); 3500 NEXTL(l); 3501 cur = CUR_CHAR(l); 3502 if (cur == 0) { 3503 GROW; 3504 SHRINK; 3505 cur = CUR_CHAR(l); 3506 } 3507 } 3508 buf[len] = 0; 3509 ctxt->instate = (xmlParserInputState) state; 3510 if (!IS_CHAR(cur)) { 3511 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3512 } else { 3513 NEXT; 3514 } 3515 return(buf); 3516} 3517 3518/** 3519 * xmlParsePubidLiteral: 3520 * @ctxt: an XML parser context 3521 * 3522 * parse an XML public literal 3523 * 3524 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3525 * 3526 * Returns the PubidLiteral parsed or NULL. 3527 */ 3528 3529xmlChar * 3530xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3531 xmlChar *buf = NULL; 3532 int len = 0; 3533 int size = XML_PARSER_BUFFER_SIZE; 3534 xmlChar cur; 3535 xmlChar stop; 3536 int count = 0; 3537 xmlParserInputState oldstate = ctxt->instate; 3538 3539 SHRINK; 3540 if (RAW == '"') { 3541 NEXT; 3542 stop = '"'; 3543 } else if (RAW == '\'') { 3544 NEXT; 3545 stop = '\''; 3546 } else { 3547 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3548 return(NULL); 3549 } 3550 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3551 if (buf == NULL) { 3552 xmlErrMemory(ctxt, NULL); 3553 return(NULL); 3554 } 3555 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3556 cur = CUR; 3557 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3558 if (len + 1 >= size) { 3559 xmlChar *tmp; 3560 3561 size *= 2; 3562 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3563 if (tmp == NULL) { 3564 xmlErrMemory(ctxt, NULL); 3565 xmlFree(buf); 3566 return(NULL); 3567 } 3568 buf = tmp; 3569 } 3570 buf[len++] = cur; 3571 count++; 3572 if (count > 50) { 3573 GROW; 3574 count = 0; 3575 } 3576 NEXT; 3577 cur = CUR; 3578 if (cur == 0) { 3579 GROW; 3580 SHRINK; 3581 cur = CUR; 3582 } 3583 } 3584 buf[len] = 0; 3585 if (cur != stop) { 3586 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3587 } else { 3588 NEXT; 3589 } 3590 ctxt->instate = oldstate; 3591 return(buf); 3592} 3593 3594void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3595 3596/* 3597 * used for the test in the inner loop of the char data testing 3598 */ 3599static const unsigned char test_char_data[256] = { 3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3601 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3604 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 3605 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 3606 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 3607 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 3608 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 3609 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 3610 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 3611 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 3612 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 3613 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 3614 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 3615 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 3616 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 3617 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3618 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 3632}; 3633 3634/** 3635 * xmlParseCharData: 3636 * @ctxt: an XML parser context 3637 * @cdata: int indicating whether we are within a CDATA section 3638 * 3639 * parse a CharData section. 3640 * if we are within a CDATA section ']]>' marks an end of section. 3641 * 3642 * The right angle bracket (>) may be represented using the string ">", 3643 * and must, for compatibility, be escaped using ">" or a character 3644 * reference when it appears in the string "]]>" in content, when that 3645 * string is not marking the end of a CDATA section. 3646 * 3647 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3648 */ 3649 3650void 3651xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3652 const xmlChar *in; 3653 int nbchar = 0; 3654 int line = ctxt->input->line; 3655 int col = ctxt->input->col; 3656 int ccol; 3657 3658 SHRINK; 3659 GROW; 3660 /* 3661 * Accelerated common case where input don't need to be 3662 * modified before passing it to the handler. 3663 */ 3664 if (!cdata) { 3665 in = ctxt->input->cur; 3666 do { 3667get_more_space: 3668 while (*in == 0x20) { in++; ctxt->input->col++; } 3669 if (*in == 0xA) { 3670 do { 3671 ctxt->input->line++; ctxt->input->col = 1; 3672 in++; 3673 } while (*in == 0xA); 3674 goto get_more_space; 3675 } 3676 if (*in == '<') { 3677 nbchar = in - ctxt->input->cur; 3678 if (nbchar > 0) { 3679 const xmlChar *tmp = ctxt->input->cur; 3680 ctxt->input->cur = in; 3681 3682 if ((ctxt->sax != NULL) && 3683 (ctxt->sax->ignorableWhitespace != 3684 ctxt->sax->characters)) { 3685 if (areBlanks(ctxt, tmp, nbchar, 1)) { 3686 if (ctxt->sax->ignorableWhitespace != NULL) 3687 ctxt->sax->ignorableWhitespace(ctxt->userData, 3688 tmp, nbchar); 3689 } else { 3690 if (ctxt->sax->characters != NULL) 3691 ctxt->sax->characters(ctxt->userData, 3692 tmp, nbchar); 3693 if (*ctxt->space == -1) 3694 *ctxt->space = -2; 3695 } 3696 } else if ((ctxt->sax != NULL) && 3697 (ctxt->sax->characters != NULL)) { 3698 ctxt->sax->characters(ctxt->userData, 3699 tmp, nbchar); 3700 } 3701 } 3702 return; 3703 } 3704 3705get_more: 3706 ccol = ctxt->input->col; 3707 while (test_char_data[*in]) { 3708 in++; 3709 ccol++; 3710 } 3711 ctxt->input->col = ccol; 3712 if (*in == 0xA) { 3713 do { 3714 ctxt->input->line++; ctxt->input->col = 1; 3715 in++; 3716 } while (*in == 0xA); 3717 goto get_more; 3718 } 3719 if (*in == ']') { 3720 if ((in[1] == ']') && (in[2] == '>')) { 3721 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3722 ctxt->input->cur = in; 3723 return; 3724 } 3725 in++; 3726 ctxt->input->col++; 3727 goto get_more; 3728 } 3729 nbchar = in - ctxt->input->cur; 3730 if (nbchar > 0) { 3731 if ((ctxt->sax != NULL) && 3732 (ctxt->sax->ignorableWhitespace != 3733 ctxt->sax->characters) && 3734 (IS_BLANK_CH(*ctxt->input->cur))) { 3735 const xmlChar *tmp = ctxt->input->cur; 3736 ctxt->input->cur = in; 3737 3738 if (areBlanks(ctxt, tmp, nbchar, 0)) { 3739 if (ctxt->sax->ignorableWhitespace != NULL) 3740 ctxt->sax->ignorableWhitespace(ctxt->userData, 3741 tmp, nbchar); 3742 } else { 3743 if (ctxt->sax->characters != NULL) 3744 ctxt->sax->characters(ctxt->userData, 3745 tmp, nbchar); 3746 if (*ctxt->space == -1) 3747 *ctxt->space = -2; 3748 } 3749 line = ctxt->input->line; 3750 col = ctxt->input->col; 3751 } else if (ctxt->sax != NULL) { 3752 if (ctxt->sax->characters != NULL) 3753 ctxt->sax->characters(ctxt->userData, 3754 ctxt->input->cur, nbchar); 3755 line = ctxt->input->line; 3756 col = ctxt->input->col; 3757 } 3758 } 3759 ctxt->input->cur = in; 3760 if (*in == 0xD) { 3761 in++; 3762 if (*in == 0xA) { 3763 ctxt->input->cur = in; 3764 in++; 3765 ctxt->input->line++; ctxt->input->col = 1; 3766 continue; /* while */ 3767 } 3768 in--; 3769 } 3770 if (*in == '<') { 3771 return; 3772 } 3773 if (*in == '&') { 3774 return; 3775 } 3776 SHRINK; 3777 GROW; 3778 in = ctxt->input->cur; 3779 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3780 nbchar = 0; 3781 } 3782 ctxt->input->line = line; 3783 ctxt->input->col = col; 3784 xmlParseCharDataComplex(ctxt, cdata); 3785} 3786 3787/** 3788 * xmlParseCharDataComplex: 3789 * @ctxt: an XML parser context 3790 * @cdata: int indicating whether we are within a CDATA section 3791 * 3792 * parse a CharData section.this is the fallback function 3793 * of xmlParseCharData() when the parsing requires handling 3794 * of non-ASCII characters. 3795 */ 3796void 3797xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3798 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3799 int nbchar = 0; 3800 int cur, l; 3801 int count = 0; 3802 3803 SHRINK; 3804 GROW; 3805 cur = CUR_CHAR(l); 3806 while ((cur != '<') && /* checked */ 3807 (cur != '&') && 3808 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3809 if ((cur == ']') && (NXT(1) == ']') && 3810 (NXT(2) == '>')) { 3811 if (cdata) break; 3812 else { 3813 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3814 } 3815 } 3816 COPY_BUF(l,buf,nbchar,cur); 3817 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3818 buf[nbchar] = 0; 3819 3820 /* 3821 * OK the segment is to be consumed as chars. 3822 */ 3823 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3824 if (areBlanks(ctxt, buf, nbchar, 0)) { 3825 if (ctxt->sax->ignorableWhitespace != NULL) 3826 ctxt->sax->ignorableWhitespace(ctxt->userData, 3827 buf, nbchar); 3828 } else { 3829 if (ctxt->sax->characters != NULL) 3830 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3831 if ((ctxt->sax->characters != 3832 ctxt->sax->ignorableWhitespace) && 3833 (*ctxt->space == -1)) 3834 *ctxt->space = -2; 3835 } 3836 } 3837 nbchar = 0; 3838 } 3839 count++; 3840 if (count > 50) { 3841 GROW; 3842 count = 0; 3843 } 3844 NEXTL(l); 3845 cur = CUR_CHAR(l); 3846 } 3847 if (nbchar != 0) { 3848 buf[nbchar] = 0; 3849 /* 3850 * OK the segment is to be consumed as chars. 3851 */ 3852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3853 if (areBlanks(ctxt, buf, nbchar, 0)) { 3854 if (ctxt->sax->ignorableWhitespace != NULL) 3855 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3856 } else { 3857 if (ctxt->sax->characters != NULL) 3858 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3859 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 3860 (*ctxt->space == -1)) 3861 *ctxt->space = -2; 3862 } 3863 } 3864 } 3865 if ((cur != 0) && (!IS_CHAR(cur))) { 3866 /* Generate the error and skip the offending character */ 3867 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 3868 "PCDATA invalid Char value %d\n", 3869 cur); 3870 NEXTL(l); 3871 } 3872} 3873 3874/** 3875 * xmlParseExternalID: 3876 * @ctxt: an XML parser context 3877 * @publicID: a xmlChar** receiving PubidLiteral 3878 * @strict: indicate whether we should restrict parsing to only 3879 * production [75], see NOTE below 3880 * 3881 * Parse an External ID or a Public ID 3882 * 3883 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3884 * 'PUBLIC' S PubidLiteral S SystemLiteral 3885 * 3886 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3887 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3888 * 3889 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3890 * 3891 * Returns the function returns SystemLiteral and in the second 3892 * case publicID receives PubidLiteral, is strict is off 3893 * it is possible to return NULL and have publicID set. 3894 */ 3895 3896xmlChar * 3897xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3898 xmlChar *URI = NULL; 3899 3900 SHRINK; 3901 3902 *publicID = NULL; 3903 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3904 SKIP(6); 3905 if (!IS_BLANK_CH(CUR)) { 3906 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3907 "Space required after 'SYSTEM'\n"); 3908 } 3909 SKIP_BLANKS; 3910 URI = xmlParseSystemLiteral(ctxt); 3911 if (URI == NULL) { 3912 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3913 } 3914 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3915 SKIP(6); 3916 if (!IS_BLANK_CH(CUR)) { 3917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3918 "Space required after 'PUBLIC'\n"); 3919 } 3920 SKIP_BLANKS; 3921 *publicID = xmlParsePubidLiteral(ctxt); 3922 if (*publicID == NULL) { 3923 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3924 } 3925 if (strict) { 3926 /* 3927 * We don't handle [83] so "S SystemLiteral" is required. 3928 */ 3929 if (!IS_BLANK_CH(CUR)) { 3930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3931 "Space required after the Public Identifier\n"); 3932 } 3933 } else { 3934 /* 3935 * We handle [83] so we return immediately, if 3936 * "S SystemLiteral" is not detected. From a purely parsing 3937 * point of view that's a nice mess. 3938 */ 3939 const xmlChar *ptr; 3940 GROW; 3941 3942 ptr = CUR_PTR; 3943 if (!IS_BLANK_CH(*ptr)) return(NULL); 3944 3945 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3946 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3947 } 3948 SKIP_BLANKS; 3949 URI = xmlParseSystemLiteral(ctxt); 3950 if (URI == NULL) { 3951 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3952 } 3953 } 3954 return(URI); 3955} 3956 3957/** 3958 * xmlParseCommentComplex: 3959 * @ctxt: an XML parser context 3960 * @buf: the already parsed part of the buffer 3961 * @len: number of bytes filles in the buffer 3962 * @size: allocated size of the buffer 3963 * 3964 * Skip an XML (SGML) comment <!-- .... --> 3965 * The spec says that "For compatibility, the string "--" (double-hyphen) 3966 * must not occur within comments. " 3967 * This is the slow routine in case the accelerator for ascii didn't work 3968 * 3969 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3970 */ 3971static void 3972xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 3973 int q, ql; 3974 int r, rl; 3975 int cur, l; 3976 xmlParserInputPtr input = ctxt->input; 3977 int count = 0; 3978 3979 if (buf == NULL) { 3980 len = 0; 3981 size = XML_PARSER_BUFFER_SIZE; 3982 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3983 if (buf == NULL) { 3984 xmlErrMemory(ctxt, NULL); 3985 return; 3986 } 3987 } 3988 GROW; /* Assure there's enough input data */ 3989 q = CUR_CHAR(ql); 3990 if (q == 0) 3991 goto not_terminated; 3992 if (!IS_CHAR(q)) { 3993 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 3994 "xmlParseComment: invalid xmlChar value %d\n", 3995 q); 3996 xmlFree (buf); 3997 return; 3998 } 3999 NEXTL(ql); 4000 r = CUR_CHAR(rl); 4001 if (r == 0) 4002 goto not_terminated; 4003 if (!IS_CHAR(r)) { 4004 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4005 "xmlParseComment: invalid xmlChar value %d\n", 4006 q); 4007 xmlFree (buf); 4008 return; 4009 } 4010 NEXTL(rl); 4011 cur = CUR_CHAR(l); 4012 if (cur == 0) 4013 goto not_terminated; 4014 while (IS_CHAR(cur) && /* checked */ 4015 ((cur != '>') || 4016 (r != '-') || (q != '-'))) { 4017 if ((r == '-') && (q == '-')) { 4018 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4019 } 4020 if (len + 5 >= size) { 4021 xmlChar *new_buf; 4022 size *= 2; 4023 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4024 if (new_buf == NULL) { 4025 xmlFree (buf); 4026 xmlErrMemory(ctxt, NULL); 4027 return; 4028 } 4029 buf = new_buf; 4030 } 4031 COPY_BUF(ql,buf,len,q); 4032 q = r; 4033 ql = rl; 4034 r = cur; 4035 rl = l; 4036 4037 count++; 4038 if (count > 50) { 4039 GROW; 4040 count = 0; 4041 } 4042 NEXTL(l); 4043 cur = CUR_CHAR(l); 4044 if (cur == 0) { 4045 SHRINK; 4046 GROW; 4047 cur = CUR_CHAR(l); 4048 } 4049 } 4050 buf[len] = 0; 4051 if (cur == 0) { 4052 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4053 "Comment not terminated \n<!--%.50s\n", buf); 4054 } else if (!IS_CHAR(cur)) { 4055 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4056 "xmlParseComment: invalid xmlChar value %d\n", 4057 cur); 4058 } else { 4059 if (input != ctxt->input) { 4060 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4061 "Comment doesn't start and stop in the same entity\n"); 4062 } 4063 NEXT; 4064 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4065 (!ctxt->disableSAX)) 4066 ctxt->sax->comment(ctxt->userData, buf); 4067 } 4068 xmlFree(buf); 4069 return; 4070not_terminated: 4071 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4072 "Comment not terminated\n", NULL); 4073 xmlFree(buf); 4074 return; 4075} 4076 4077/** 4078 * xmlParseComment: 4079 * @ctxt: an XML parser context 4080 * 4081 * Skip an XML (SGML) comment <!-- .... --> 4082 * The spec says that "For compatibility, the string "--" (double-hyphen) 4083 * must not occur within comments. " 4084 * 4085 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4086 */ 4087void 4088xmlParseComment(xmlParserCtxtPtr ctxt) { 4089 xmlChar *buf = NULL; 4090 int size = XML_PARSER_BUFFER_SIZE; 4091 int len = 0; 4092 xmlParserInputState state; 4093 const xmlChar *in; 4094 int nbchar = 0, ccol; 4095 4096 /* 4097 * Check that there is a comment right here. 4098 */ 4099 if ((RAW != '<') || (NXT(1) != '!') || 4100 (NXT(2) != '-') || (NXT(3) != '-')) return; 4101 4102 state = ctxt->instate; 4103 ctxt->instate = XML_PARSER_COMMENT; 4104 SKIP(4); 4105 SHRINK; 4106 GROW; 4107 4108 /* 4109 * Accelerated common case where input don't need to be 4110 * modified before passing it to the handler. 4111 */ 4112 in = ctxt->input->cur; 4113 do { 4114 if (*in == 0xA) { 4115 do { 4116 ctxt->input->line++; ctxt->input->col = 1; 4117 in++; 4118 } while (*in == 0xA); 4119 } 4120get_more: 4121 ccol = ctxt->input->col; 4122 while (((*in > '-') && (*in <= 0x7F)) || 4123 ((*in >= 0x20) && (*in < '-')) || 4124 (*in == 0x09)) { 4125 in++; 4126 ccol++; 4127 } 4128 ctxt->input->col = ccol; 4129 if (*in == 0xA) { 4130 do { 4131 ctxt->input->line++; ctxt->input->col = 1; 4132 in++; 4133 } while (*in == 0xA); 4134 goto get_more; 4135 } 4136 nbchar = in - ctxt->input->cur; 4137 /* 4138 * save current set of data 4139 */ 4140 if (nbchar > 0) { 4141 if ((ctxt->sax != NULL) && 4142 (ctxt->sax->comment != NULL)) { 4143 if (buf == NULL) { 4144 if ((*in == '-') && (in[1] == '-')) 4145 size = nbchar + 1; 4146 else 4147 size = XML_PARSER_BUFFER_SIZE + nbchar; 4148 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4149 if (buf == NULL) { 4150 xmlErrMemory(ctxt, NULL); 4151 ctxt->instate = state; 4152 return; 4153 } 4154 len = 0; 4155 } else if (len + nbchar + 1 >= size) { 4156 xmlChar *new_buf; 4157 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4158 new_buf = (xmlChar *) xmlRealloc(buf, 4159 size * sizeof(xmlChar)); 4160 if (new_buf == NULL) { 4161 xmlFree (buf); 4162 xmlErrMemory(ctxt, NULL); 4163 ctxt->instate = state; 4164 return; 4165 } 4166 buf = new_buf; 4167 } 4168 memcpy(&buf[len], ctxt->input->cur, nbchar); 4169 len += nbchar; 4170 buf[len] = 0; 4171 } 4172 } 4173 ctxt->input->cur = in; 4174 if (*in == 0xA) { 4175 in++; 4176 ctxt->input->line++; ctxt->input->col = 1; 4177 } 4178 if (*in == 0xD) { 4179 in++; 4180 if (*in == 0xA) { 4181 ctxt->input->cur = in; 4182 in++; 4183 ctxt->input->line++; ctxt->input->col = 1; 4184 continue; /* while */ 4185 } 4186 in--; 4187 } 4188 SHRINK; 4189 GROW; 4190 in = ctxt->input->cur; 4191 if (*in == '-') { 4192 if (in[1] == '-') { 4193 if (in[2] == '>') { 4194 SKIP(3); 4195 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4196 (!ctxt->disableSAX)) { 4197 if (buf != NULL) 4198 ctxt->sax->comment(ctxt->userData, buf); 4199 else 4200 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4201 } 4202 if (buf != NULL) 4203 xmlFree(buf); 4204 ctxt->instate = state; 4205 return; 4206 } 4207 if (buf != NULL) 4208 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4209 "Comment not terminated \n<!--%.50s\n", 4210 buf); 4211 else 4212 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4213 "Comment not terminated \n", NULL); 4214 in++; 4215 ctxt->input->col++; 4216 } 4217 in++; 4218 ctxt->input->col++; 4219 goto get_more; 4220 } 4221 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4222 xmlParseCommentComplex(ctxt, buf, len, size); 4223 ctxt->instate = state; 4224 return; 4225} 4226 4227 4228/** 4229 * xmlParsePITarget: 4230 * @ctxt: an XML parser context 4231 * 4232 * parse the name of a PI 4233 * 4234 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4235 * 4236 * Returns the PITarget name or NULL 4237 */ 4238 4239const xmlChar * 4240xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4241 const xmlChar *name; 4242 4243 name = xmlParseName(ctxt); 4244 if ((name != NULL) && 4245 ((name[0] == 'x') || (name[0] == 'X')) && 4246 ((name[1] == 'm') || (name[1] == 'M')) && 4247 ((name[2] == 'l') || (name[2] == 'L'))) { 4248 int i; 4249 if ((name[0] == 'x') && (name[1] == 'm') && 4250 (name[2] == 'l') && (name[3] == 0)) { 4251 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4252 "XML declaration allowed only at the start of the document\n"); 4253 return(name); 4254 } else if (name[3] == 0) { 4255 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4256 return(name); 4257 } 4258 for (i = 0;;i++) { 4259 if (xmlW3CPIs[i] == NULL) break; 4260 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4261 return(name); 4262 } 4263 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4264 "xmlParsePITarget: invalid name prefix 'xml'\n", 4265 NULL, NULL); 4266 } 4267 return(name); 4268} 4269 4270#ifdef LIBXML_CATALOG_ENABLED 4271/** 4272 * xmlParseCatalogPI: 4273 * @ctxt: an XML parser context 4274 * @catalog: the PI value string 4275 * 4276 * parse an XML Catalog Processing Instruction. 4277 * 4278 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4279 * 4280 * Occurs only if allowed by the user and if happening in the Misc 4281 * part of the document before any doctype informations 4282 * This will add the given catalog to the parsing context in order 4283 * to be used if there is a resolution need further down in the document 4284 */ 4285 4286static void 4287xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4288 xmlChar *URL = NULL; 4289 const xmlChar *tmp, *base; 4290 xmlChar marker; 4291 4292 tmp = catalog; 4293 while (IS_BLANK_CH(*tmp)) tmp++; 4294 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4295 goto error; 4296 tmp += 7; 4297 while (IS_BLANK_CH(*tmp)) tmp++; 4298 if (*tmp != '=') { 4299 return; 4300 } 4301 tmp++; 4302 while (IS_BLANK_CH(*tmp)) tmp++; 4303 marker = *tmp; 4304 if ((marker != '\'') && (marker != '"')) 4305 goto error; 4306 tmp++; 4307 base = tmp; 4308 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4309 if (*tmp == 0) 4310 goto error; 4311 URL = xmlStrndup(base, tmp - base); 4312 tmp++; 4313 while (IS_BLANK_CH(*tmp)) tmp++; 4314 if (*tmp != 0) 4315 goto error; 4316 4317 if (URL != NULL) { 4318 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4319 xmlFree(URL); 4320 } 4321 return; 4322 4323error: 4324 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4325 "Catalog PI syntax error: %s\n", 4326 catalog, NULL); 4327 if (URL != NULL) 4328 xmlFree(URL); 4329} 4330#endif 4331 4332/** 4333 * xmlParsePI: 4334 * @ctxt: an XML parser context 4335 * 4336 * parse an XML Processing Instruction. 4337 * 4338 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4339 * 4340 * The processing is transfered to SAX once parsed. 4341 */ 4342 4343void 4344xmlParsePI(xmlParserCtxtPtr ctxt) { 4345 xmlChar *buf = NULL; 4346 int len = 0; 4347 int size = XML_PARSER_BUFFER_SIZE; 4348 int cur, l; 4349 const xmlChar *target; 4350 xmlParserInputState state; 4351 int count = 0; 4352 4353 if ((RAW == '<') && (NXT(1) == '?')) { 4354 xmlParserInputPtr input = ctxt->input; 4355 state = ctxt->instate; 4356 ctxt->instate = XML_PARSER_PI; 4357 /* 4358 * this is a Processing Instruction. 4359 */ 4360 SKIP(2); 4361 SHRINK; 4362 4363 /* 4364 * Parse the target name and check for special support like 4365 * namespace. 4366 */ 4367 target = xmlParsePITarget(ctxt); 4368 if (target != NULL) { 4369 if ((RAW == '?') && (NXT(1) == '>')) { 4370 if (input != ctxt->input) { 4371 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4372 "PI declaration doesn't start and stop in the same entity\n"); 4373 } 4374 SKIP(2); 4375 4376 /* 4377 * SAX: PI detected. 4378 */ 4379 if ((ctxt->sax) && (!ctxt->disableSAX) && 4380 (ctxt->sax->processingInstruction != NULL)) 4381 ctxt->sax->processingInstruction(ctxt->userData, 4382 target, NULL); 4383 ctxt->instate = state; 4384 return; 4385 } 4386 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4387 if (buf == NULL) { 4388 xmlErrMemory(ctxt, NULL); 4389 ctxt->instate = state; 4390 return; 4391 } 4392 cur = CUR; 4393 if (!IS_BLANK(cur)) { 4394 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4395 "ParsePI: PI %s space expected\n", target); 4396 } 4397 SKIP_BLANKS; 4398 cur = CUR_CHAR(l); 4399 while (IS_CHAR(cur) && /* checked */ 4400 ((cur != '?') || (NXT(1) != '>'))) { 4401 if (len + 5 >= size) { 4402 xmlChar *tmp; 4403 4404 size *= 2; 4405 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4406 if (tmp == NULL) { 4407 xmlErrMemory(ctxt, NULL); 4408 xmlFree(buf); 4409 ctxt->instate = state; 4410 return; 4411 } 4412 buf = tmp; 4413 } 4414 count++; 4415 if (count > 50) { 4416 GROW; 4417 count = 0; 4418 } 4419 COPY_BUF(l,buf,len,cur); 4420 NEXTL(l); 4421 cur = CUR_CHAR(l); 4422 if (cur == 0) { 4423 SHRINK; 4424 GROW; 4425 cur = CUR_CHAR(l); 4426 } 4427 } 4428 buf[len] = 0; 4429 if (cur != '?') { 4430 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4431 "ParsePI: PI %s never end ...\n", target); 4432 } else { 4433 if (input != ctxt->input) { 4434 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4435 "PI declaration doesn't start and stop in the same entity\n"); 4436 } 4437 SKIP(2); 4438 4439#ifdef LIBXML_CATALOG_ENABLED 4440 if (((state == XML_PARSER_MISC) || 4441 (state == XML_PARSER_START)) && 4442 (xmlStrEqual(target, XML_CATALOG_PI))) { 4443 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4444 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4445 (allow == XML_CATA_ALLOW_ALL)) 4446 xmlParseCatalogPI(ctxt, buf); 4447 } 4448#endif 4449 4450 4451 /* 4452 * SAX: PI detected. 4453 */ 4454 if ((ctxt->sax) && (!ctxt->disableSAX) && 4455 (ctxt->sax->processingInstruction != NULL)) 4456 ctxt->sax->processingInstruction(ctxt->userData, 4457 target, buf); 4458 } 4459 xmlFree(buf); 4460 } else { 4461 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4462 } 4463 ctxt->instate = state; 4464 } 4465} 4466 4467/** 4468 * xmlParseNotationDecl: 4469 * @ctxt: an XML parser context 4470 * 4471 * parse a notation declaration 4472 * 4473 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4474 * 4475 * Hence there is actually 3 choices: 4476 * 'PUBLIC' S PubidLiteral 4477 * 'PUBLIC' S PubidLiteral S SystemLiteral 4478 * and 'SYSTEM' S SystemLiteral 4479 * 4480 * See the NOTE on xmlParseExternalID(). 4481 */ 4482 4483void 4484xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4485 const xmlChar *name; 4486 xmlChar *Pubid; 4487 xmlChar *Systemid; 4488 4489 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4490 xmlParserInputPtr input = ctxt->input; 4491 SHRINK; 4492 SKIP(10); 4493 if (!IS_BLANK_CH(CUR)) { 4494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4495 "Space required after '<!NOTATION'\n"); 4496 return; 4497 } 4498 SKIP_BLANKS; 4499 4500 name = xmlParseName(ctxt); 4501 if (name == NULL) { 4502 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4503 return; 4504 } 4505 if (!IS_BLANK_CH(CUR)) { 4506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4507 "Space required after the NOTATION name'\n"); 4508 return; 4509 } 4510 SKIP_BLANKS; 4511 4512 /* 4513 * Parse the IDs. 4514 */ 4515 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4516 SKIP_BLANKS; 4517 4518 if (RAW == '>') { 4519 if (input != ctxt->input) { 4520 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4521 "Notation declaration doesn't start and stop in the same entity\n"); 4522 } 4523 NEXT; 4524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4525 (ctxt->sax->notationDecl != NULL)) 4526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4527 } else { 4528 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4529 } 4530 if (Systemid != NULL) xmlFree(Systemid); 4531 if (Pubid != NULL) xmlFree(Pubid); 4532 } 4533} 4534 4535/** 4536 * xmlParseEntityDecl: 4537 * @ctxt: an XML parser context 4538 * 4539 * parse <!ENTITY declarations 4540 * 4541 * [70] EntityDecl ::= GEDecl | PEDecl 4542 * 4543 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4544 * 4545 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4546 * 4547 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4548 * 4549 * [74] PEDef ::= EntityValue | ExternalID 4550 * 4551 * [76] NDataDecl ::= S 'NDATA' S Name 4552 * 4553 * [ VC: Notation Declared ] 4554 * The Name must match the declared name of a notation. 4555 */ 4556 4557void 4558xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4559 const xmlChar *name = NULL; 4560 xmlChar *value = NULL; 4561 xmlChar *URI = NULL, *literal = NULL; 4562 const xmlChar *ndata = NULL; 4563 int isParameter = 0; 4564 xmlChar *orig = NULL; 4565 int skipped; 4566 4567 /* GROW; done in the caller */ 4568 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 4569 xmlParserInputPtr input = ctxt->input; 4570 SHRINK; 4571 SKIP(8); 4572 skipped = SKIP_BLANKS; 4573 if (skipped == 0) { 4574 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4575 "Space required after '<!ENTITY'\n"); 4576 } 4577 4578 if (RAW == '%') { 4579 NEXT; 4580 skipped = SKIP_BLANKS; 4581 if (skipped == 0) { 4582 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4583 "Space required after '%'\n"); 4584 } 4585 isParameter = 1; 4586 } 4587 4588 name = xmlParseName(ctxt); 4589 if (name == NULL) { 4590 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4591 "xmlParseEntityDecl: no name\n"); 4592 return; 4593 } 4594 skipped = SKIP_BLANKS; 4595 if (skipped == 0) { 4596 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4597 "Space required after the entity name\n"); 4598 } 4599 4600 ctxt->instate = XML_PARSER_ENTITY_DECL; 4601 /* 4602 * handle the various case of definitions... 4603 */ 4604 if (isParameter) { 4605 if ((RAW == '"') || (RAW == '\'')) { 4606 value = xmlParseEntityValue(ctxt, &orig); 4607 if (value) { 4608 if ((ctxt->sax != NULL) && 4609 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4610 ctxt->sax->entityDecl(ctxt->userData, name, 4611 XML_INTERNAL_PARAMETER_ENTITY, 4612 NULL, NULL, value); 4613 } 4614 } else { 4615 URI = xmlParseExternalID(ctxt, &literal, 1); 4616 if ((URI == NULL) && (literal == NULL)) { 4617 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4618 } 4619 if (URI) { 4620 xmlURIPtr uri; 4621 4622 uri = xmlParseURI((const char *) URI); 4623 if (uri == NULL) { 4624 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4625 "Invalid URI: %s\n", URI); 4626 /* 4627 * This really ought to be a well formedness error 4628 * but the XML Core WG decided otherwise c.f. issue 4629 * E26 of the XML erratas. 4630 */ 4631 } else { 4632 if (uri->fragment != NULL) { 4633 /* 4634 * Okay this is foolish to block those but not 4635 * invalid URIs. 4636 */ 4637 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4638 } else { 4639 if ((ctxt->sax != NULL) && 4640 (!ctxt->disableSAX) && 4641 (ctxt->sax->entityDecl != NULL)) 4642 ctxt->sax->entityDecl(ctxt->userData, name, 4643 XML_EXTERNAL_PARAMETER_ENTITY, 4644 literal, URI, NULL); 4645 } 4646 xmlFreeURI(uri); 4647 } 4648 } 4649 } 4650 } else { 4651 if ((RAW == '"') || (RAW == '\'')) { 4652 value = xmlParseEntityValue(ctxt, &orig); 4653 if ((ctxt->sax != NULL) && 4654 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4655 ctxt->sax->entityDecl(ctxt->userData, name, 4656 XML_INTERNAL_GENERAL_ENTITY, 4657 NULL, NULL, value); 4658 /* 4659 * For expat compatibility in SAX mode. 4660 */ 4661 if ((ctxt->myDoc == NULL) || 4662 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4663 if (ctxt->myDoc == NULL) { 4664 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4665 if (ctxt->myDoc == NULL) { 4666 xmlErrMemory(ctxt, "New Doc failed"); 4667 return; 4668 } 4669 } 4670 if (ctxt->myDoc->intSubset == NULL) 4671 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4672 BAD_CAST "fake", NULL, NULL); 4673 4674 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4675 NULL, NULL, value); 4676 } 4677 } else { 4678 URI = xmlParseExternalID(ctxt, &literal, 1); 4679 if ((URI == NULL) && (literal == NULL)) { 4680 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4681 } 4682 if (URI) { 4683 xmlURIPtr uri; 4684 4685 uri = xmlParseURI((const char *)URI); 4686 if (uri == NULL) { 4687 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4688 "Invalid URI: %s\n", URI); 4689 /* 4690 * This really ought to be a well formedness error 4691 * but the XML Core WG decided otherwise c.f. issue 4692 * E26 of the XML erratas. 4693 */ 4694 } else { 4695 if (uri->fragment != NULL) { 4696 /* 4697 * Okay this is foolish to block those but not 4698 * invalid URIs. 4699 */ 4700 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4701 } 4702 xmlFreeURI(uri); 4703 } 4704 } 4705 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 4706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4707 "Space required before 'NDATA'\n"); 4708 } 4709 SKIP_BLANKS; 4710 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 4711 SKIP(5); 4712 if (!IS_BLANK_CH(CUR)) { 4713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4714 "Space required after 'NDATA'\n"); 4715 } 4716 SKIP_BLANKS; 4717 ndata = xmlParseName(ctxt); 4718 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4719 (ctxt->sax->unparsedEntityDecl != NULL)) 4720 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4721 literal, URI, ndata); 4722 } else { 4723 if ((ctxt->sax != NULL) && 4724 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4725 ctxt->sax->entityDecl(ctxt->userData, name, 4726 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4727 literal, URI, NULL); 4728 /* 4729 * For expat compatibility in SAX mode. 4730 * assuming the entity repalcement was asked for 4731 */ 4732 if ((ctxt->replaceEntities != 0) && 4733 ((ctxt->myDoc == NULL) || 4734 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4735 if (ctxt->myDoc == NULL) { 4736 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4737 if (ctxt->myDoc == NULL) { 4738 xmlErrMemory(ctxt, "New Doc failed"); 4739 return; 4740 } 4741 } 4742 4743 if (ctxt->myDoc->intSubset == NULL) 4744 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4745 BAD_CAST "fake", NULL, NULL); 4746 xmlSAX2EntityDecl(ctxt, name, 4747 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4748 literal, URI, NULL); 4749 } 4750 } 4751 } 4752 } 4753 SKIP_BLANKS; 4754 if (RAW != '>') { 4755 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4756 "xmlParseEntityDecl: entity %s not terminated\n", name); 4757 } else { 4758 if (input != ctxt->input) { 4759 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4760 "Entity declaration doesn't start and stop in the same entity\n"); 4761 } 4762 NEXT; 4763 } 4764 if (orig != NULL) { 4765 /* 4766 * Ugly mechanism to save the raw entity value. 4767 */ 4768 xmlEntityPtr cur = NULL; 4769 4770 if (isParameter) { 4771 if ((ctxt->sax != NULL) && 4772 (ctxt->sax->getParameterEntity != NULL)) 4773 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4774 } else { 4775 if ((ctxt->sax != NULL) && 4776 (ctxt->sax->getEntity != NULL)) 4777 cur = ctxt->sax->getEntity(ctxt->userData, name); 4778 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4779 cur = xmlSAX2GetEntity(ctxt, name); 4780 } 4781 } 4782 if (cur != NULL) { 4783 if (cur->orig != NULL) 4784 xmlFree(orig); 4785 else 4786 cur->orig = orig; 4787 } else 4788 xmlFree(orig); 4789 } 4790 if (value != NULL) xmlFree(value); 4791 if (URI != NULL) xmlFree(URI); 4792 if (literal != NULL) xmlFree(literal); 4793 } 4794} 4795 4796/** 4797 * xmlParseDefaultDecl: 4798 * @ctxt: an XML parser context 4799 * @value: Receive a possible fixed default value for the attribute 4800 * 4801 * Parse an attribute default declaration 4802 * 4803 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4804 * 4805 * [ VC: Required Attribute ] 4806 * if the default declaration is the keyword #REQUIRED, then the 4807 * attribute must be specified for all elements of the type in the 4808 * attribute-list declaration. 4809 * 4810 * [ VC: Attribute Default Legal ] 4811 * The declared default value must meet the lexical constraints of 4812 * the declared attribute type c.f. xmlValidateAttributeDecl() 4813 * 4814 * [ VC: Fixed Attribute Default ] 4815 * if an attribute has a default value declared with the #FIXED 4816 * keyword, instances of that attribute must match the default value. 4817 * 4818 * [ WFC: No < in Attribute Values ] 4819 * handled in xmlParseAttValue() 4820 * 4821 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4822 * or XML_ATTRIBUTE_FIXED. 4823 */ 4824 4825int 4826xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4827 int val; 4828 xmlChar *ret; 4829 4830 *value = NULL; 4831 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4832 SKIP(9); 4833 return(XML_ATTRIBUTE_REQUIRED); 4834 } 4835 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4836 SKIP(8); 4837 return(XML_ATTRIBUTE_IMPLIED); 4838 } 4839 val = XML_ATTRIBUTE_NONE; 4840 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4841 SKIP(6); 4842 val = XML_ATTRIBUTE_FIXED; 4843 if (!IS_BLANK_CH(CUR)) { 4844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4845 "Space required after '#FIXED'\n"); 4846 } 4847 SKIP_BLANKS; 4848 } 4849 ret = xmlParseAttValue(ctxt); 4850 ctxt->instate = XML_PARSER_DTD; 4851 if (ret == NULL) { 4852 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4853 "Attribute default value declaration error\n"); 4854 } else 4855 *value = ret; 4856 return(val); 4857} 4858 4859/** 4860 * xmlParseNotationType: 4861 * @ctxt: an XML parser context 4862 * 4863 * parse an Notation attribute type. 4864 * 4865 * Note: the leading 'NOTATION' S part has already being parsed... 4866 * 4867 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4868 * 4869 * [ VC: Notation Attributes ] 4870 * Values of this type must match one of the notation names included 4871 * in the declaration; all notation names in the declaration must be declared. 4872 * 4873 * Returns: the notation attribute tree built while parsing 4874 */ 4875 4876xmlEnumerationPtr 4877xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4878 const xmlChar *name; 4879 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4880 4881 if (RAW != '(') { 4882 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4883 return(NULL); 4884 } 4885 SHRINK; 4886 do { 4887 NEXT; 4888 SKIP_BLANKS; 4889 name = xmlParseName(ctxt); 4890 if (name == NULL) { 4891 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4892 "Name expected in NOTATION declaration\n"); 4893 return(ret); 4894 } 4895 cur = xmlCreateEnumeration(name); 4896 if (cur == NULL) return(ret); 4897 if (last == NULL) ret = last = cur; 4898 else { 4899 last->next = cur; 4900 last = cur; 4901 } 4902 SKIP_BLANKS; 4903 } while (RAW == '|'); 4904 if (RAW != ')') { 4905 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4906 if ((last != NULL) && (last != ret)) 4907 xmlFreeEnumeration(last); 4908 return(ret); 4909 } 4910 NEXT; 4911 return(ret); 4912} 4913 4914/** 4915 * xmlParseEnumerationType: 4916 * @ctxt: an XML parser context 4917 * 4918 * parse an Enumeration attribute type. 4919 * 4920 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4921 * 4922 * [ VC: Enumeration ] 4923 * Values of this type must match one of the Nmtoken tokens in 4924 * the declaration 4925 * 4926 * Returns: the enumeration attribute tree built while parsing 4927 */ 4928 4929xmlEnumerationPtr 4930xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4931 xmlChar *name; 4932 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4933 4934 if (RAW != '(') { 4935 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4936 return(NULL); 4937 } 4938 SHRINK; 4939 do { 4940 NEXT; 4941 SKIP_BLANKS; 4942 name = xmlParseNmtoken(ctxt); 4943 if (name == NULL) { 4944 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4945 return(ret); 4946 } 4947 cur = xmlCreateEnumeration(name); 4948 xmlFree(name); 4949 if (cur == NULL) return(ret); 4950 if (last == NULL) ret = last = cur; 4951 else { 4952 last->next = cur; 4953 last = cur; 4954 } 4955 SKIP_BLANKS; 4956 } while (RAW == '|'); 4957 if (RAW != ')') { 4958 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4959 return(ret); 4960 } 4961 NEXT; 4962 return(ret); 4963} 4964 4965/** 4966 * xmlParseEnumeratedType: 4967 * @ctxt: an XML parser context 4968 * @tree: the enumeration tree built while parsing 4969 * 4970 * parse an Enumerated attribute type. 4971 * 4972 * [57] EnumeratedType ::= NotationType | Enumeration 4973 * 4974 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4975 * 4976 * 4977 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4978 */ 4979 4980int 4981xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4982 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4983 SKIP(8); 4984 if (!IS_BLANK_CH(CUR)) { 4985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4986 "Space required after 'NOTATION'\n"); 4987 return(0); 4988 } 4989 SKIP_BLANKS; 4990 *tree = xmlParseNotationType(ctxt); 4991 if (*tree == NULL) return(0); 4992 return(XML_ATTRIBUTE_NOTATION); 4993 } 4994 *tree = xmlParseEnumerationType(ctxt); 4995 if (*tree == NULL) return(0); 4996 return(XML_ATTRIBUTE_ENUMERATION); 4997} 4998 4999/** 5000 * xmlParseAttributeType: 5001 * @ctxt: an XML parser context 5002 * @tree: the enumeration tree built while parsing 5003 * 5004 * parse the Attribute list def for an element 5005 * 5006 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5007 * 5008 * [55] StringType ::= 'CDATA' 5009 * 5010 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5011 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5012 * 5013 * Validity constraints for attribute values syntax are checked in 5014 * xmlValidateAttributeValue() 5015 * 5016 * [ VC: ID ] 5017 * Values of type ID must match the Name production. A name must not 5018 * appear more than once in an XML document as a value of this type; 5019 * i.e., ID values must uniquely identify the elements which bear them. 5020 * 5021 * [ VC: One ID per Element Type ] 5022 * No element type may have more than one ID attribute specified. 5023 * 5024 * [ VC: ID Attribute Default ] 5025 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5026 * 5027 * [ VC: IDREF ] 5028 * Values of type IDREF must match the Name production, and values 5029 * of type IDREFS must match Names; each IDREF Name must match the value 5030 * of an ID attribute on some element in the XML document; i.e. IDREF 5031 * values must match the value of some ID attribute. 5032 * 5033 * [ VC: Entity Name ] 5034 * Values of type ENTITY must match the Name production, values 5035 * of type ENTITIES must match Names; each Entity Name must match the 5036 * name of an unparsed entity declared in the DTD. 5037 * 5038 * [ VC: Name Token ] 5039 * Values of type NMTOKEN must match the Nmtoken production; values 5040 * of type NMTOKENS must match Nmtokens. 5041 * 5042 * Returns the attribute type 5043 */ 5044int 5045xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5046 SHRINK; 5047 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5048 SKIP(5); 5049 return(XML_ATTRIBUTE_CDATA); 5050 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5051 SKIP(6); 5052 return(XML_ATTRIBUTE_IDREFS); 5053 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5054 SKIP(5); 5055 return(XML_ATTRIBUTE_IDREF); 5056 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5057 SKIP(2); 5058 return(XML_ATTRIBUTE_ID); 5059 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5060 SKIP(6); 5061 return(XML_ATTRIBUTE_ENTITY); 5062 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5063 SKIP(8); 5064 return(XML_ATTRIBUTE_ENTITIES); 5065 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5066 SKIP(8); 5067 return(XML_ATTRIBUTE_NMTOKENS); 5068 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5069 SKIP(7); 5070 return(XML_ATTRIBUTE_NMTOKEN); 5071 } 5072 return(xmlParseEnumeratedType(ctxt, tree)); 5073} 5074 5075/** 5076 * xmlParseAttributeListDecl: 5077 * @ctxt: an XML parser context 5078 * 5079 * : parse the Attribute list def for an element 5080 * 5081 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5082 * 5083 * [53] AttDef ::= S Name S AttType S DefaultDecl 5084 * 5085 */ 5086void 5087xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5088 const xmlChar *elemName; 5089 const xmlChar *attrName; 5090 xmlEnumerationPtr tree; 5091 5092 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5093 xmlParserInputPtr input = ctxt->input; 5094 5095 SKIP(9); 5096 if (!IS_BLANK_CH(CUR)) { 5097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5098 "Space required after '<!ATTLIST'\n"); 5099 } 5100 SKIP_BLANKS; 5101 elemName = xmlParseName(ctxt); 5102 if (elemName == NULL) { 5103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5104 "ATTLIST: no name for Element\n"); 5105 return; 5106 } 5107 SKIP_BLANKS; 5108 GROW; 5109 while (RAW != '>') { 5110 const xmlChar *check = CUR_PTR; 5111 int type; 5112 int def; 5113 xmlChar *defaultValue = NULL; 5114 5115 GROW; 5116 tree = NULL; 5117 attrName = xmlParseName(ctxt); 5118 if (attrName == NULL) { 5119 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5120 "ATTLIST: no name for Attribute\n"); 5121 break; 5122 } 5123 GROW; 5124 if (!IS_BLANK_CH(CUR)) { 5125 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5126 "Space required after the attribute name\n"); 5127 break; 5128 } 5129 SKIP_BLANKS; 5130 5131 type = xmlParseAttributeType(ctxt, &tree); 5132 if (type <= 0) { 5133 break; 5134 } 5135 5136 GROW; 5137 if (!IS_BLANK_CH(CUR)) { 5138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5139 "Space required after the attribute type\n"); 5140 if (tree != NULL) 5141 xmlFreeEnumeration(tree); 5142 break; 5143 } 5144 SKIP_BLANKS; 5145 5146 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5147 if (def <= 0) { 5148 if (defaultValue != NULL) 5149 xmlFree(defaultValue); 5150 if (tree != NULL) 5151 xmlFreeEnumeration(tree); 5152 break; 5153 } 5154 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5155 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5156 5157 GROW; 5158 if (RAW != '>') { 5159 if (!IS_BLANK_CH(CUR)) { 5160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5161 "Space required after the attribute default value\n"); 5162 if (defaultValue != NULL) 5163 xmlFree(defaultValue); 5164 if (tree != NULL) 5165 xmlFreeEnumeration(tree); 5166 break; 5167 } 5168 SKIP_BLANKS; 5169 } 5170 if (check == CUR_PTR) { 5171 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5172 "in xmlParseAttributeListDecl\n"); 5173 if (defaultValue != NULL) 5174 xmlFree(defaultValue); 5175 if (tree != NULL) 5176 xmlFreeEnumeration(tree); 5177 break; 5178 } 5179 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5180 (ctxt->sax->attributeDecl != NULL)) 5181 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5182 type, def, defaultValue, tree); 5183 else if (tree != NULL) 5184 xmlFreeEnumeration(tree); 5185 5186 if ((ctxt->sax2) && (defaultValue != NULL) && 5187 (def != XML_ATTRIBUTE_IMPLIED) && 5188 (def != XML_ATTRIBUTE_REQUIRED)) { 5189 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5190 } 5191 if (ctxt->sax2) { 5192 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5193 } 5194 if (defaultValue != NULL) 5195 xmlFree(defaultValue); 5196 GROW; 5197 } 5198 if (RAW == '>') { 5199 if (input != ctxt->input) { 5200 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5201 "Attribute list declaration doesn't start and stop in the same entity\n"); 5202 } 5203 NEXT; 5204 } 5205 } 5206} 5207 5208/** 5209 * xmlParseElementMixedContentDecl: 5210 * @ctxt: an XML parser context 5211 * @inputchk: the input used for the current entity, needed for boundary checks 5212 * 5213 * parse the declaration for a Mixed Element content 5214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5215 * 5216 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5217 * '(' S? '#PCDATA' S? ')' 5218 * 5219 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5220 * 5221 * [ VC: No Duplicate Types ] 5222 * The same name must not appear more than once in a single 5223 * mixed-content declaration. 5224 * 5225 * returns: the list of the xmlElementContentPtr describing the element choices 5226 */ 5227xmlElementContentPtr 5228xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5229 xmlElementContentPtr ret = NULL, cur = NULL, n; 5230 const xmlChar *elem = NULL; 5231 5232 GROW; 5233 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5234 SKIP(7); 5235 SKIP_BLANKS; 5236 SHRINK; 5237 if (RAW == ')') { 5238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5240"Element content declaration doesn't start and stop in the same entity\n", 5241 NULL); 5242 } 5243 NEXT; 5244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5245 if (ret == NULL) 5246 return(NULL); 5247 if (RAW == '*') { 5248 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5249 NEXT; 5250 } 5251 return(ret); 5252 } 5253 if ((RAW == '(') || (RAW == '|')) { 5254 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5255 if (ret == NULL) return(NULL); 5256 } 5257 while (RAW == '|') { 5258 NEXT; 5259 if (elem == NULL) { 5260 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5261 if (ret == NULL) return(NULL); 5262 ret->c1 = cur; 5263 if (cur != NULL) 5264 cur->parent = ret; 5265 cur = ret; 5266 } else { 5267 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5268 if (n == NULL) return(NULL); 5269 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5270 if (n->c1 != NULL) 5271 n->c1->parent = n; 5272 cur->c2 = n; 5273 if (n != NULL) 5274 n->parent = cur; 5275 cur = n; 5276 } 5277 SKIP_BLANKS; 5278 elem = xmlParseName(ctxt); 5279 if (elem == NULL) { 5280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5281 "xmlParseElementMixedContentDecl : Name expected\n"); 5282 xmlFreeDocElementContent(ctxt->myDoc, cur); 5283 return(NULL); 5284 } 5285 SKIP_BLANKS; 5286 GROW; 5287 } 5288 if ((RAW == ')') && (NXT(1) == '*')) { 5289 if (elem != NULL) { 5290 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5291 XML_ELEMENT_CONTENT_ELEMENT); 5292 if (cur->c2 != NULL) 5293 cur->c2->parent = cur; 5294 } 5295 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5296 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5297 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5298"Element content declaration doesn't start and stop in the same entity\n", 5299 NULL); 5300 } 5301 SKIP(2); 5302 } else { 5303 xmlFreeDocElementContent(ctxt->myDoc, ret); 5304 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5305 return(NULL); 5306 } 5307 5308 } else { 5309 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5310 } 5311 return(ret); 5312} 5313 5314/** 5315 * xmlParseElementChildrenContentDecl: 5316 * @ctxt: an XML parser context 5317 * @inputchk: the input used for the current entity, needed for boundary checks 5318 * 5319 * parse the declaration for a Mixed Element content 5320 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5321 * 5322 * 5323 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5324 * 5325 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5326 * 5327 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5328 * 5329 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5330 * 5331 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5332 * TODO Parameter-entity replacement text must be properly nested 5333 * with parenthesized groups. That is to say, if either of the 5334 * opening or closing parentheses in a choice, seq, or Mixed 5335 * construct is contained in the replacement text for a parameter 5336 * entity, both must be contained in the same replacement text. For 5337 * interoperability, if a parameter-entity reference appears in a 5338 * choice, seq, or Mixed construct, its replacement text should not 5339 * be empty, and neither the first nor last non-blank character of 5340 * the replacement text should be a connector (| or ,). 5341 * 5342 * Returns the tree of xmlElementContentPtr describing the element 5343 * hierarchy. 5344 */ 5345xmlElementContentPtr 5346xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 5347 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5348 const xmlChar *elem; 5349 xmlChar type = 0; 5350 5351 SKIP_BLANKS; 5352 GROW; 5353 if (RAW == '(') { 5354 int inputid = ctxt->input->id; 5355 5356 /* Recurse on first child */ 5357 NEXT; 5358 SKIP_BLANKS; 5359 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 5360 SKIP_BLANKS; 5361 GROW; 5362 } else { 5363 elem = xmlParseName(ctxt); 5364 if (elem == NULL) { 5365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5366 return(NULL); 5367 } 5368 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5369 if (cur == NULL) { 5370 xmlErrMemory(ctxt, NULL); 5371 return(NULL); 5372 } 5373 GROW; 5374 if (RAW == '?') { 5375 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5376 NEXT; 5377 } else if (RAW == '*') { 5378 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5379 NEXT; 5380 } else if (RAW == '+') { 5381 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5382 NEXT; 5383 } else { 5384 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5385 } 5386 GROW; 5387 } 5388 SKIP_BLANKS; 5389 SHRINK; 5390 while (RAW != ')') { 5391 /* 5392 * Each loop we parse one separator and one element. 5393 */ 5394 if (RAW == ',') { 5395 if (type == 0) type = CUR; 5396 5397 /* 5398 * Detect "Name | Name , Name" error 5399 */ 5400 else if (type != CUR) { 5401 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5402 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5403 type); 5404 if ((last != NULL) && (last != ret)) 5405 xmlFreeDocElementContent(ctxt->myDoc, last); 5406 if (ret != NULL) 5407 xmlFreeDocElementContent(ctxt->myDoc, ret); 5408 return(NULL); 5409 } 5410 NEXT; 5411 5412 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5413 if (op == NULL) { 5414 if ((last != NULL) && (last != ret)) 5415 xmlFreeDocElementContent(ctxt->myDoc, last); 5416 xmlFreeDocElementContent(ctxt->myDoc, ret); 5417 return(NULL); 5418 } 5419 if (last == NULL) { 5420 op->c1 = ret; 5421 if (ret != NULL) 5422 ret->parent = op; 5423 ret = cur = op; 5424 } else { 5425 cur->c2 = op; 5426 if (op != NULL) 5427 op->parent = cur; 5428 op->c1 = last; 5429 if (last != NULL) 5430 last->parent = op; 5431 cur =op; 5432 last = NULL; 5433 } 5434 } else if (RAW == '|') { 5435 if (type == 0) type = CUR; 5436 5437 /* 5438 * Detect "Name , Name | Name" error 5439 */ 5440 else if (type != CUR) { 5441 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5442 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5443 type); 5444 if ((last != NULL) && (last != ret)) 5445 xmlFreeDocElementContent(ctxt->myDoc, last); 5446 if (ret != NULL) 5447 xmlFreeDocElementContent(ctxt->myDoc, ret); 5448 return(NULL); 5449 } 5450 NEXT; 5451 5452 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5453 if (op == NULL) { 5454 if ((last != NULL) && (last != ret)) 5455 xmlFreeDocElementContent(ctxt->myDoc, last); 5456 if (ret != NULL) 5457 xmlFreeDocElementContent(ctxt->myDoc, ret); 5458 return(NULL); 5459 } 5460 if (last == NULL) { 5461 op->c1 = ret; 5462 if (ret != NULL) 5463 ret->parent = op; 5464 ret = cur = op; 5465 } else { 5466 cur->c2 = op; 5467 if (op != NULL) 5468 op->parent = cur; 5469 op->c1 = last; 5470 if (last != NULL) 5471 last->parent = op; 5472 cur =op; 5473 last = NULL; 5474 } 5475 } else { 5476 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5477 if ((last != NULL) && (last != ret)) 5478 xmlFreeDocElementContent(ctxt->myDoc, last); 5479 if (ret != NULL) 5480 xmlFreeDocElementContent(ctxt->myDoc, ret); 5481 return(NULL); 5482 } 5483 GROW; 5484 SKIP_BLANKS; 5485 GROW; 5486 if (RAW == '(') { 5487 int inputid = ctxt->input->id; 5488 /* Recurse on second child */ 5489 NEXT; 5490 SKIP_BLANKS; 5491 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5492 SKIP_BLANKS; 5493 } else { 5494 elem = xmlParseName(ctxt); 5495 if (elem == NULL) { 5496 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5497 if (ret != NULL) 5498 xmlFreeDocElementContent(ctxt->myDoc, ret); 5499 return(NULL); 5500 } 5501 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5502 if (last == NULL) { 5503 if (ret != NULL) 5504 xmlFreeDocElementContent(ctxt->myDoc, ret); 5505 return(NULL); 5506 } 5507 if (RAW == '?') { 5508 last->ocur = XML_ELEMENT_CONTENT_OPT; 5509 NEXT; 5510 } else if (RAW == '*') { 5511 last->ocur = XML_ELEMENT_CONTENT_MULT; 5512 NEXT; 5513 } else if (RAW == '+') { 5514 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5515 NEXT; 5516 } else { 5517 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5518 } 5519 } 5520 SKIP_BLANKS; 5521 GROW; 5522 } 5523 if ((cur != NULL) && (last != NULL)) { 5524 cur->c2 = last; 5525 if (last != NULL) 5526 last->parent = cur; 5527 } 5528 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5529 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5530"Element content declaration doesn't start and stop in the same entity\n", 5531 NULL); 5532 } 5533 NEXT; 5534 if (RAW == '?') { 5535 if (ret != NULL) { 5536 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 5537 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5538 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5539 else 5540 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5541 } 5542 NEXT; 5543 } else if (RAW == '*') { 5544 if (ret != NULL) { 5545 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5546 cur = ret; 5547 /* 5548 * Some normalization: 5549 * (a | b* | c?)* == (a | b | c)* 5550 */ 5551 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5552 if ((cur->c1 != NULL) && 5553 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5554 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5555 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5556 if ((cur->c2 != NULL) && 5557 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5558 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5559 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5560 cur = cur->c2; 5561 } 5562 } 5563 NEXT; 5564 } else if (RAW == '+') { 5565 if (ret != NULL) { 5566 int found = 0; 5567 5568 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 5569 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5570 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5571 else 5572 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 5573 /* 5574 * Some normalization: 5575 * (a | b*)+ == (a | b)* 5576 * (a | b?)+ == (a | b)* 5577 */ 5578 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5579 if ((cur->c1 != NULL) && 5580 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5581 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 5582 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5583 found = 1; 5584 } 5585 if ((cur->c2 != NULL) && 5586 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5587 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 5588 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5589 found = 1; 5590 } 5591 cur = cur->c2; 5592 } 5593 if (found) 5594 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5595 } 5596 NEXT; 5597 } 5598 return(ret); 5599} 5600 5601/** 5602 * xmlParseElementContentDecl: 5603 * @ctxt: an XML parser context 5604 * @name: the name of the element being defined. 5605 * @result: the Element Content pointer will be stored here if any 5606 * 5607 * parse the declaration for an Element content either Mixed or Children, 5608 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 5609 * 5610 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 5611 * 5612 * returns: the type of element content XML_ELEMENT_TYPE_xxx 5613 */ 5614 5615int 5616xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 5617 xmlElementContentPtr *result) { 5618 5619 xmlElementContentPtr tree = NULL; 5620 int inputid = ctxt->input->id; 5621 int res; 5622 5623 *result = NULL; 5624 5625 if (RAW != '(') { 5626 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5627 "xmlParseElementContentDecl : %s '(' expected\n", name); 5628 return(-1); 5629 } 5630 NEXT; 5631 GROW; 5632 SKIP_BLANKS; 5633 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5634 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 5635 res = XML_ELEMENT_TYPE_MIXED; 5636 } else { 5637 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 5638 res = XML_ELEMENT_TYPE_ELEMENT; 5639 } 5640 SKIP_BLANKS; 5641 *result = tree; 5642 return(res); 5643} 5644 5645/** 5646 * xmlParseElementDecl: 5647 * @ctxt: an XML parser context 5648 * 5649 * parse an Element declaration. 5650 * 5651 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 5652 * 5653 * [ VC: Unique Element Type Declaration ] 5654 * No element type may be declared more than once 5655 * 5656 * Returns the type of the element, or -1 in case of error 5657 */ 5658int 5659xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 5660 const xmlChar *name; 5661 int ret = -1; 5662 xmlElementContentPtr content = NULL; 5663 5664 /* GROW; done in the caller */ 5665 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 5666 xmlParserInputPtr input = ctxt->input; 5667 5668 SKIP(9); 5669 if (!IS_BLANK_CH(CUR)) { 5670 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5671 "Space required after 'ELEMENT'\n"); 5672 } 5673 SKIP_BLANKS; 5674 name = xmlParseName(ctxt); 5675 if (name == NULL) { 5676 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5677 "xmlParseElementDecl: no name for Element\n"); 5678 return(-1); 5679 } 5680 while ((RAW == 0) && (ctxt->inputNr > 1)) 5681 xmlPopInput(ctxt); 5682 if (!IS_BLANK_CH(CUR)) { 5683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5684 "Space required after the element name\n"); 5685 } 5686 SKIP_BLANKS; 5687 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 5688 SKIP(5); 5689 /* 5690 * Element must always be empty. 5691 */ 5692 ret = XML_ELEMENT_TYPE_EMPTY; 5693 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5694 (NXT(2) == 'Y')) { 5695 SKIP(3); 5696 /* 5697 * Element is a generic container. 5698 */ 5699 ret = XML_ELEMENT_TYPE_ANY; 5700 } else if (RAW == '(') { 5701 ret = xmlParseElementContentDecl(ctxt, name, &content); 5702 } else { 5703 /* 5704 * [ WFC: PEs in Internal Subset ] error handling. 5705 */ 5706 if ((RAW == '%') && (ctxt->external == 0) && 5707 (ctxt->inputNr == 1)) { 5708 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 5709 "PEReference: forbidden within markup decl in internal subset\n"); 5710 } else { 5711 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5712 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5713 } 5714 return(-1); 5715 } 5716 5717 SKIP_BLANKS; 5718 /* 5719 * Pop-up of finished entities. 5720 */ 5721 while ((RAW == 0) && (ctxt->inputNr > 1)) 5722 xmlPopInput(ctxt); 5723 SKIP_BLANKS; 5724 5725 if (RAW != '>') { 5726 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5727 if (content != NULL) { 5728 xmlFreeDocElementContent(ctxt->myDoc, content); 5729 } 5730 } else { 5731 if (input != ctxt->input) { 5732 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5733 "Element declaration doesn't start and stop in the same entity\n"); 5734 } 5735 5736 NEXT; 5737 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5738 (ctxt->sax->elementDecl != NULL)) { 5739 if (content != NULL) 5740 content->parent = NULL; 5741 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5742 content); 5743 if ((content != NULL) && (content->parent == NULL)) { 5744 /* 5745 * this is a trick: if xmlAddElementDecl is called, 5746 * instead of copying the full tree it is plugged directly 5747 * if called from the parser. Avoid duplicating the 5748 * interfaces or change the API/ABI 5749 */ 5750 xmlFreeDocElementContent(ctxt->myDoc, content); 5751 } 5752 } else if (content != NULL) { 5753 xmlFreeDocElementContent(ctxt->myDoc, content); 5754 } 5755 } 5756 } 5757 return(ret); 5758} 5759 5760/** 5761 * xmlParseConditionalSections 5762 * @ctxt: an XML parser context 5763 * 5764 * [61] conditionalSect ::= includeSect | ignoreSect 5765 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5766 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5767 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5768 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5769 */ 5770 5771static void 5772xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5773 SKIP(3); 5774 SKIP_BLANKS; 5775 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 5776 SKIP(7); 5777 SKIP_BLANKS; 5778 if (RAW != '[') { 5779 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5780 } else { 5781 NEXT; 5782 } 5783 if (xmlParserDebugEntities) { 5784 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5785 xmlGenericError(xmlGenericErrorContext, 5786 "%s(%d): ", ctxt->input->filename, 5787 ctxt->input->line); 5788 xmlGenericError(xmlGenericErrorContext, 5789 "Entering INCLUDE Conditional Section\n"); 5790 } 5791 5792 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5793 (NXT(2) != '>'))) { 5794 const xmlChar *check = CUR_PTR; 5795 unsigned int cons = ctxt->input->consumed; 5796 5797 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5798 xmlParseConditionalSections(ctxt); 5799 } else if (IS_BLANK_CH(CUR)) { 5800 NEXT; 5801 } else if (RAW == '%') { 5802 xmlParsePEReference(ctxt); 5803 } else 5804 xmlParseMarkupDecl(ctxt); 5805 5806 /* 5807 * Pop-up of finished entities. 5808 */ 5809 while ((RAW == 0) && (ctxt->inputNr > 1)) 5810 xmlPopInput(ctxt); 5811 5812 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5813 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5814 break; 5815 } 5816 } 5817 if (xmlParserDebugEntities) { 5818 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5819 xmlGenericError(xmlGenericErrorContext, 5820 "%s(%d): ", ctxt->input->filename, 5821 ctxt->input->line); 5822 xmlGenericError(xmlGenericErrorContext, 5823 "Leaving INCLUDE Conditional Section\n"); 5824 } 5825 5826 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5827 int state; 5828 xmlParserInputState instate; 5829 int depth = 0; 5830 5831 SKIP(6); 5832 SKIP_BLANKS; 5833 if (RAW != '[') { 5834 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5835 } else { 5836 NEXT; 5837 } 5838 if (xmlParserDebugEntities) { 5839 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5840 xmlGenericError(xmlGenericErrorContext, 5841 "%s(%d): ", ctxt->input->filename, 5842 ctxt->input->line); 5843 xmlGenericError(xmlGenericErrorContext, 5844 "Entering IGNORE Conditional Section\n"); 5845 } 5846 5847 /* 5848 * Parse up to the end of the conditional section 5849 * But disable SAX event generating DTD building in the meantime 5850 */ 5851 state = ctxt->disableSAX; 5852 instate = ctxt->instate; 5853 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5854 ctxt->instate = XML_PARSER_IGNORE; 5855 5856 while ((depth >= 0) && (RAW != 0)) { 5857 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5858 depth++; 5859 SKIP(3); 5860 continue; 5861 } 5862 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5863 if (--depth >= 0) SKIP(3); 5864 continue; 5865 } 5866 NEXT; 5867 continue; 5868 } 5869 5870 ctxt->disableSAX = state; 5871 ctxt->instate = instate; 5872 5873 if (xmlParserDebugEntities) { 5874 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5875 xmlGenericError(xmlGenericErrorContext, 5876 "%s(%d): ", ctxt->input->filename, 5877 ctxt->input->line); 5878 xmlGenericError(xmlGenericErrorContext, 5879 "Leaving IGNORE Conditional Section\n"); 5880 } 5881 5882 } else { 5883 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5884 } 5885 5886 if (RAW == 0) 5887 SHRINK; 5888 5889 if (RAW == 0) { 5890 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5891 } else { 5892 SKIP(3); 5893 } 5894} 5895 5896/** 5897 * xmlParseMarkupDecl: 5898 * @ctxt: an XML parser context 5899 * 5900 * parse Markup declarations 5901 * 5902 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5903 * NotationDecl | PI | Comment 5904 * 5905 * [ VC: Proper Declaration/PE Nesting ] 5906 * Parameter-entity replacement text must be properly nested with 5907 * markup declarations. That is to say, if either the first character 5908 * or the last character of a markup declaration (markupdecl above) is 5909 * contained in the replacement text for a parameter-entity reference, 5910 * both must be contained in the same replacement text. 5911 * 5912 * [ WFC: PEs in Internal Subset ] 5913 * In the internal DTD subset, parameter-entity references can occur 5914 * only where markup declarations can occur, not within markup declarations. 5915 * (This does not apply to references that occur in external parameter 5916 * entities or to the external subset.) 5917 */ 5918void 5919xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5920 GROW; 5921 if (CUR == '<') { 5922 if (NXT(1) == '!') { 5923 switch (NXT(2)) { 5924 case 'E': 5925 if (NXT(3) == 'L') 5926 xmlParseElementDecl(ctxt); 5927 else if (NXT(3) == 'N') 5928 xmlParseEntityDecl(ctxt); 5929 break; 5930 case 'A': 5931 xmlParseAttributeListDecl(ctxt); 5932 break; 5933 case 'N': 5934 xmlParseNotationDecl(ctxt); 5935 break; 5936 case '-': 5937 xmlParseComment(ctxt); 5938 break; 5939 default: 5940 /* there is an error but it will be detected later */ 5941 break; 5942 } 5943 } else if (NXT(1) == '?') { 5944 xmlParsePI(ctxt); 5945 } 5946 } 5947 /* 5948 * This is only for internal subset. On external entities, 5949 * the replacement is done before parsing stage 5950 */ 5951 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5952 xmlParsePEReference(ctxt); 5953 5954 /* 5955 * Conditional sections are allowed from entities included 5956 * by PE References in the internal subset. 5957 */ 5958 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5959 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5960 xmlParseConditionalSections(ctxt); 5961 } 5962 } 5963 5964 ctxt->instate = XML_PARSER_DTD; 5965} 5966 5967/** 5968 * xmlParseTextDecl: 5969 * @ctxt: an XML parser context 5970 * 5971 * parse an XML declaration header for external entities 5972 * 5973 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5974 * 5975 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5976 */ 5977 5978void 5979xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5980 xmlChar *version; 5981 const xmlChar *encoding; 5982 5983 /* 5984 * We know that '<?xml' is here. 5985 */ 5986 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5987 SKIP(5); 5988 } else { 5989 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5990 return; 5991 } 5992 5993 if (!IS_BLANK_CH(CUR)) { 5994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5995 "Space needed after '<?xml'\n"); 5996 } 5997 SKIP_BLANKS; 5998 5999 /* 6000 * We may have the VersionInfo here. 6001 */ 6002 version = xmlParseVersionInfo(ctxt); 6003 if (version == NULL) 6004 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6005 else { 6006 if (!IS_BLANK_CH(CUR)) { 6007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6008 "Space needed here\n"); 6009 } 6010 } 6011 ctxt->input->version = version; 6012 6013 /* 6014 * We must have the encoding declaration 6015 */ 6016 encoding = xmlParseEncodingDecl(ctxt); 6017 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6018 /* 6019 * The XML REC instructs us to stop parsing right here 6020 */ 6021 return; 6022 } 6023 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6024 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6025 "Missing encoding in text declaration\n"); 6026 } 6027 6028 SKIP_BLANKS; 6029 if ((RAW == '?') && (NXT(1) == '>')) { 6030 SKIP(2); 6031 } else if (RAW == '>') { 6032 /* Deprecated old WD ... */ 6033 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6034 NEXT; 6035 } else { 6036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6037 MOVETO_ENDTAG(CUR_PTR); 6038 NEXT; 6039 } 6040} 6041 6042/** 6043 * xmlParseExternalSubset: 6044 * @ctxt: an XML parser context 6045 * @ExternalID: the external identifier 6046 * @SystemID: the system identifier (or URL) 6047 * 6048 * parse Markup declarations from an external subset 6049 * 6050 * [30] extSubset ::= textDecl? extSubsetDecl 6051 * 6052 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6053 */ 6054void 6055xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6056 const xmlChar *SystemID) { 6057 xmlDetectSAX2(ctxt); 6058 GROW; 6059 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6060 xmlParseTextDecl(ctxt); 6061 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6062 /* 6063 * The XML REC instructs us to stop parsing right here 6064 */ 6065 ctxt->instate = XML_PARSER_EOF; 6066 return; 6067 } 6068 } 6069 if (ctxt->myDoc == NULL) { 6070 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6071 } 6072 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6073 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6074 6075 ctxt->instate = XML_PARSER_DTD; 6076 ctxt->external = 1; 6077 while (((RAW == '<') && (NXT(1) == '?')) || 6078 ((RAW == '<') && (NXT(1) == '!')) || 6079 (RAW == '%') || IS_BLANK_CH(CUR)) { 6080 const xmlChar *check = CUR_PTR; 6081 unsigned int cons = ctxt->input->consumed; 6082 6083 GROW; 6084 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6085 xmlParseConditionalSections(ctxt); 6086 } else if (IS_BLANK_CH(CUR)) { 6087 NEXT; 6088 } else if (RAW == '%') { 6089 xmlParsePEReference(ctxt); 6090 } else 6091 xmlParseMarkupDecl(ctxt); 6092 6093 /* 6094 * Pop-up of finished entities. 6095 */ 6096 while ((RAW == 0) && (ctxt->inputNr > 1)) 6097 xmlPopInput(ctxt); 6098 6099 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6100 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6101 break; 6102 } 6103 } 6104 6105 if (RAW != 0) { 6106 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6107 } 6108 6109} 6110 6111/** 6112 * xmlParseReference: 6113 * @ctxt: an XML parser context 6114 * 6115 * parse and handle entity references in content, depending on the SAX 6116 * interface, this may end-up in a call to character() if this is a 6117 * CharRef, a predefined entity, if there is no reference() callback. 6118 * or if the parser was asked to switch to that mode. 6119 * 6120 * [67] Reference ::= EntityRef | CharRef 6121 */ 6122void 6123xmlParseReference(xmlParserCtxtPtr ctxt) { 6124 xmlEntityPtr ent; 6125 xmlChar *val; 6126 if (RAW != '&') return; 6127 6128 if (NXT(1) == '#') { 6129 int i = 0; 6130 xmlChar out[10]; 6131 int hex = NXT(2); 6132 int value = xmlParseCharRef(ctxt); 6133 6134 if (value == 0) 6135 return; 6136 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6137 /* 6138 * So we are using non-UTF-8 buffers 6139 * Check that the char fit on 8bits, if not 6140 * generate a CharRef. 6141 */ 6142 if (value <= 0xFF) { 6143 out[0] = value; 6144 out[1] = 0; 6145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6146 (!ctxt->disableSAX)) 6147 ctxt->sax->characters(ctxt->userData, out, 1); 6148 } else { 6149 if ((hex == 'x') || (hex == 'X')) 6150 snprintf((char *)out, sizeof(out), "#x%X", value); 6151 else 6152 snprintf((char *)out, sizeof(out), "#%d", value); 6153 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6154 (!ctxt->disableSAX)) 6155 ctxt->sax->reference(ctxt->userData, out); 6156 } 6157 } else { 6158 /* 6159 * Just encode the value in UTF-8 6160 */ 6161 COPY_BUF(0 ,out, i, value); 6162 out[i] = 0; 6163 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6164 (!ctxt->disableSAX)) 6165 ctxt->sax->characters(ctxt->userData, out, i); 6166 } 6167 } else { 6168 int was_checked; 6169 6170 ent = xmlParseEntityRef(ctxt); 6171 if (ent == NULL) return; 6172 if (!ctxt->wellFormed) 6173 return; 6174 was_checked = ent->checked; 6175 if ((ent->name != NULL) && 6176 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 6177 xmlNodePtr list = NULL; 6178 xmlParserErrors ret = XML_ERR_OK; 6179 6180 6181 /* 6182 * The first reference to the entity trigger a parsing phase 6183 * where the ent->children is filled with the result from 6184 * the parsing. 6185 */ 6186 if (ent->checked == 0) { 6187 xmlChar *value; 6188 6189 value = ent->content; 6190 6191 /* 6192 * Check that this entity is well formed 6193 */ 6194 if ((value != NULL) && (value[0] != 0) && 6195 (value[1] == 0) && (value[0] == '<') && 6196 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 6197 /* 6198 * DONE: get definite answer on this !!! 6199 * Lots of entity decls are used to declare a single 6200 * char 6201 * <!ENTITY lt "<"> 6202 * Which seems to be valid since 6203 * 2.4: The ampersand character (&) and the left angle 6204 * bracket (<) may appear in their literal form only 6205 * when used ... They are also legal within the literal 6206 * entity value of an internal entity declaration;i 6207 * see "4.3.2 Well-Formed Parsed Entities". 6208 * IMHO 2.4 and 4.3.2 are directly in contradiction. 6209 * Looking at the OASIS test suite and James Clark 6210 * tests, this is broken. However the XML REC uses 6211 * it. Is the XML REC not well-formed ???? 6212 * This is a hack to avoid this problem 6213 * 6214 * ANSWER: since lt gt amp .. are already defined, 6215 * this is a redefinition and hence the fact that the 6216 * content is not well balanced is not a Wf error, this 6217 * is lousy but acceptable. 6218 */ 6219 list = xmlNewDocText(ctxt->myDoc, value); 6220 if (list != NULL) { 6221 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 6222 (ent->children == NULL)) { 6223 ent->children = list; 6224 ent->last = list; 6225 ent->owner = 1; 6226 list->parent = (xmlNodePtr) ent; 6227 } else { 6228 xmlFreeNodeList(list); 6229 } 6230 } else if (list != NULL) { 6231 xmlFreeNodeList(list); 6232 } 6233 } else { 6234 /* 6235 * 4.3.2: An internal general parsed entity is well-formed 6236 * if its replacement text matches the production labeled 6237 * content. 6238 */ 6239 6240 void *user_data; 6241 /* 6242 * This is a bit hackish but this seems the best 6243 * way to make sure both SAX and DOM entity support 6244 * behaves okay. 6245 */ 6246 if (ctxt->userData == ctxt) 6247 user_data = NULL; 6248 else 6249 user_data = ctxt->userData; 6250 6251 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6252 ctxt->depth++; 6253 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6254 value, user_data, &list); 6255 ctxt->depth--; 6256 } else if (ent->etype == 6257 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6258 ctxt->depth++; 6259 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6260 ctxt->sax, user_data, ctxt->depth, 6261 ent->URI, ent->ExternalID, &list); 6262 ctxt->depth--; 6263 } else { 6264 ret = XML_ERR_ENTITY_PE_INTERNAL; 6265 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6266 "invalid entity type found\n", NULL); 6267 } 6268 if (ret == XML_ERR_ENTITY_LOOP) { 6269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6270 return; 6271 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 6272 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6273 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6274 (ent->children == NULL)) { 6275 ent->children = list; 6276 if (ctxt->replaceEntities) { 6277 /* 6278 * Prune it directly in the generated document 6279 * except for single text nodes. 6280 */ 6281 if (((list->type == XML_TEXT_NODE) && 6282 (list->next == NULL)) || 6283 (ctxt->parseMode == XML_PARSE_READER)) { 6284 list->parent = (xmlNodePtr) ent; 6285 list = NULL; 6286 ent->owner = 1; 6287 } else { 6288 ent->owner = 0; 6289 while (list != NULL) { 6290 list->parent = (xmlNodePtr) ctxt->node; 6291 list->doc = ctxt->myDoc; 6292 if (list->next == NULL) 6293 ent->last = list; 6294 list = list->next; 6295 } 6296 list = ent->children; 6297#ifdef LIBXML_LEGACY_ENABLED 6298 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6299 xmlAddEntityReference(ent, list, NULL); 6300#endif /* LIBXML_LEGACY_ENABLED */ 6301 } 6302 } else { 6303 ent->owner = 1; 6304 while (list != NULL) { 6305 list->parent = (xmlNodePtr) ent; 6306 if (list->next == NULL) 6307 ent->last = list; 6308 list = list->next; 6309 } 6310 } 6311 } else { 6312 xmlFreeNodeList(list); 6313 list = NULL; 6314 } 6315 } else if ((ret != XML_ERR_OK) && 6316 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6317 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6318 "Entity '%s' failed to parse\n", ent->name); 6319 } else if (list != NULL) { 6320 xmlFreeNodeList(list); 6321 list = NULL; 6322 } 6323 } 6324 ent->checked = 1; 6325 } 6326 6327 if (ent->children == NULL) { 6328 /* 6329 * Probably running in SAX mode and the callbacks don't 6330 * build the entity content. So unless we already went 6331 * though parsing for first checking go though the entity 6332 * content to generate callbacks associated to the entity 6333 */ 6334 if (was_checked == 1) { 6335 void *user_data; 6336 /* 6337 * This is a bit hackish but this seems the best 6338 * way to make sure both SAX and DOM entity support 6339 * behaves okay. 6340 */ 6341 if (ctxt->userData == ctxt) 6342 user_data = NULL; 6343 else 6344 user_data = ctxt->userData; 6345 6346 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6347 ctxt->depth++; 6348 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6349 ent->content, user_data, NULL); 6350 ctxt->depth--; 6351 } else if (ent->etype == 6352 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6353 ctxt->depth++; 6354 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6355 ctxt->sax, user_data, ctxt->depth, 6356 ent->URI, ent->ExternalID, NULL); 6357 ctxt->depth--; 6358 } else { 6359 ret = XML_ERR_ENTITY_PE_INTERNAL; 6360 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6361 "invalid entity type found\n", NULL); 6362 } 6363 if (ret == XML_ERR_ENTITY_LOOP) { 6364 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6365 return; 6366 } 6367 } 6368 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6369 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6370 /* 6371 * Entity reference callback comes second, it's somewhat 6372 * superfluous but a compatibility to historical behaviour 6373 */ 6374 ctxt->sax->reference(ctxt->userData, ent->name); 6375 } 6376 return; 6377 } 6378 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6379 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6380 /* 6381 * Create a node. 6382 */ 6383 ctxt->sax->reference(ctxt->userData, ent->name); 6384 return; 6385 } 6386 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 6387 /* 6388 * There is a problem on the handling of _private for entities 6389 * (bug 155816): Should we copy the content of the field from 6390 * the entity (possibly overwriting some value set by the user 6391 * when a copy is created), should we leave it alone, or should 6392 * we try to take care of different situations? The problem 6393 * is exacerbated by the usage of this field by the xmlReader. 6394 * To fix this bug, we look at _private on the created node 6395 * and, if it's NULL, we copy in whatever was in the entity. 6396 * If it's not NULL we leave it alone. This is somewhat of a 6397 * hack - maybe we should have further tests to determine 6398 * what to do. 6399 */ 6400 if ((ctxt->node != NULL) && (ent->children != NULL)) { 6401 /* 6402 * Seems we are generating the DOM content, do 6403 * a simple tree copy for all references except the first 6404 * In the first occurrence list contains the replacement. 6405 * progressive == 2 means we are operating on the Reader 6406 * and since nodes are discarded we must copy all the time. 6407 */ 6408 if (((list == NULL) && (ent->owner == 0)) || 6409 (ctxt->parseMode == XML_PARSE_READER)) { 6410 xmlNodePtr nw = NULL, cur, firstChild = NULL; 6411 6412 /* 6413 * when operating on a reader, the entities definitions 6414 * are always owning the entities subtree. 6415 if (ctxt->parseMode == XML_PARSE_READER) 6416 ent->owner = 1; 6417 */ 6418 6419 cur = ent->children; 6420 while (cur != NULL) { 6421 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6422 if (nw != NULL) { 6423 if (nw->_private == NULL) 6424 nw->_private = cur->_private; 6425 if (firstChild == NULL){ 6426 firstChild = nw; 6427 } 6428 nw = xmlAddChild(ctxt->node, nw); 6429 } 6430 if (cur == ent->last) { 6431 /* 6432 * needed to detect some strange empty 6433 * node cases in the reader tests 6434 */ 6435 if ((ctxt->parseMode == XML_PARSE_READER) && 6436 (nw != NULL) && 6437 (nw->type == XML_ELEMENT_NODE) && 6438 (nw->children == NULL)) 6439 nw->extra = 1; 6440 6441 break; 6442 } 6443 cur = cur->next; 6444 } 6445#ifdef LIBXML_LEGACY_ENABLED 6446 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6447 xmlAddEntityReference(ent, firstChild, nw); 6448#endif /* LIBXML_LEGACY_ENABLED */ 6449 } else if (list == NULL) { 6450 xmlNodePtr nw = NULL, cur, next, last, 6451 firstChild = NULL; 6452 /* 6453 * Copy the entity child list and make it the new 6454 * entity child list. The goal is to make sure any 6455 * ID or REF referenced will be the one from the 6456 * document content and not the entity copy. 6457 */ 6458 cur = ent->children; 6459 ent->children = NULL; 6460 last = ent->last; 6461 ent->last = NULL; 6462 while (cur != NULL) { 6463 next = cur->next; 6464 cur->next = NULL; 6465 cur->parent = NULL; 6466 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6467 if (nw != NULL) { 6468 if (nw->_private == NULL) 6469 nw->_private = cur->_private; 6470 if (firstChild == NULL){ 6471 firstChild = cur; 6472 } 6473 xmlAddChild((xmlNodePtr) ent, nw); 6474 xmlAddChild(ctxt->node, cur); 6475 } 6476 if (cur == last) 6477 break; 6478 cur = next; 6479 } 6480 ent->owner = 1; 6481#ifdef LIBXML_LEGACY_ENABLED 6482 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6483 xmlAddEntityReference(ent, firstChild, nw); 6484#endif /* LIBXML_LEGACY_ENABLED */ 6485 } else { 6486 const xmlChar *nbktext; 6487 6488 /* 6489 * the name change is to avoid coalescing of the 6490 * node with a possible previous text one which 6491 * would make ent->children a dangling pointer 6492 */ 6493 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 6494 -1); 6495 if (ent->children->type == XML_TEXT_NODE) 6496 ent->children->name = nbktext; 6497 if ((ent->last != ent->children) && 6498 (ent->last->type == XML_TEXT_NODE)) 6499 ent->last->name = nbktext; 6500 xmlAddChildList(ctxt->node, ent->children); 6501 } 6502 6503 /* 6504 * This is to avoid a nasty side effect, see 6505 * characters() in SAX.c 6506 */ 6507 ctxt->nodemem = 0; 6508 ctxt->nodelen = 0; 6509 return; 6510 } 6511 } 6512 } else { 6513 val = ent->content; 6514 if (val == NULL) return; 6515 /* 6516 * inline the entity. 6517 */ 6518 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6519 (!ctxt->disableSAX)) 6520 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6521 } 6522 } 6523} 6524 6525/** 6526 * xmlParseEntityRef: 6527 * @ctxt: an XML parser context 6528 * 6529 * parse ENTITY references declarations 6530 * 6531 * [68] EntityRef ::= '&' Name ';' 6532 * 6533 * [ WFC: Entity Declared ] 6534 * In a document without any DTD, a document with only an internal DTD 6535 * subset which contains no parameter entity references, or a document 6536 * with "standalone='yes'", the Name given in the entity reference 6537 * must match that in an entity declaration, except that well-formed 6538 * documents need not declare any of the following entities: amp, lt, 6539 * gt, apos, quot. The declaration of a parameter entity must precede 6540 * any reference to it. Similarly, the declaration of a general entity 6541 * must precede any reference to it which appears in a default value in an 6542 * attribute-list declaration. Note that if entities are declared in the 6543 * external subset or in external parameter entities, a non-validating 6544 * processor is not obligated to read and process their declarations; 6545 * for such documents, the rule that an entity must be declared is a 6546 * well-formedness constraint only if standalone='yes'. 6547 * 6548 * [ WFC: Parsed Entity ] 6549 * An entity reference must not contain the name of an unparsed entity 6550 * 6551 * Returns the xmlEntityPtr if found, or NULL otherwise. 6552 */ 6553xmlEntityPtr 6554xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 6555 const xmlChar *name; 6556 xmlEntityPtr ent = NULL; 6557 6558 GROW; 6559 6560 if (RAW == '&') { 6561 NEXT; 6562 name = xmlParseName(ctxt); 6563 if (name == NULL) { 6564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6565 "xmlParseEntityRef: no name\n"); 6566 } else { 6567 if (RAW == ';') { 6568 NEXT; 6569 /* 6570 * Ask first SAX for entity resolution, otherwise try the 6571 * predefined set. 6572 */ 6573 if (ctxt->sax != NULL) { 6574 if (ctxt->sax->getEntity != NULL) 6575 ent = ctxt->sax->getEntity(ctxt->userData, name); 6576 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 6577 ent = xmlGetPredefinedEntity(name); 6578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 6579 (ctxt->userData==ctxt)) { 6580 ent = xmlSAX2GetEntity(ctxt, name); 6581 } 6582 } 6583 /* 6584 * [ WFC: Entity Declared ] 6585 * In a document without any DTD, a document with only an 6586 * internal DTD subset which contains no parameter entity 6587 * references, or a document with "standalone='yes'", the 6588 * Name given in the entity reference must match that in an 6589 * entity declaration, except that well-formed documents 6590 * need not declare any of the following entities: amp, lt, 6591 * gt, apos, quot. 6592 * The declaration of a parameter entity must precede any 6593 * reference to it. 6594 * Similarly, the declaration of a general entity must 6595 * precede any reference to it which appears in a default 6596 * value in an attribute-list declaration. Note that if 6597 * entities are declared in the external subset or in 6598 * external parameter entities, a non-validating processor 6599 * is not obligated to read and process their declarations; 6600 * for such documents, the rule that an entity must be 6601 * declared is a well-formedness constraint only if 6602 * standalone='yes'. 6603 */ 6604 if (ent == NULL) { 6605 if ((ctxt->standalone == 1) || 6606 ((ctxt->hasExternalSubset == 0) && 6607 (ctxt->hasPErefs == 0))) { 6608 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6609 "Entity '%s' not defined\n", name); 6610 } else { 6611 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6612 "Entity '%s' not defined\n", name); 6613 if ((ctxt->inSubset == 0) && 6614 (ctxt->sax != NULL) && 6615 (ctxt->sax->reference != NULL)) { 6616 ctxt->sax->reference(ctxt->userData, name); 6617 } 6618 } 6619 ctxt->valid = 0; 6620 } 6621 6622 /* 6623 * [ WFC: Parsed Entity ] 6624 * An entity reference must not contain the name of an 6625 * unparsed entity 6626 */ 6627 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6628 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6629 "Entity reference to unparsed entity %s\n", name); 6630 } 6631 6632 /* 6633 * [ WFC: No External Entity References ] 6634 * Attribute values cannot contain direct or indirect 6635 * entity references to external entities. 6636 */ 6637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6638 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6639 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6640 "Attribute references external entity '%s'\n", name); 6641 } 6642 /* 6643 * [ WFC: No < in Attribute Values ] 6644 * The replacement text of any entity referred to directly or 6645 * indirectly in an attribute value (other than "<") must 6646 * not contain a <. 6647 */ 6648 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6649 (ent != NULL) && 6650 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6651 (ent->content != NULL) && 6652 (xmlStrchr(ent->content, '<'))) { 6653 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6654 "'<' in entity '%s' is not allowed in attributes values\n", name); 6655 } 6656 6657 /* 6658 * Internal check, no parameter entities here ... 6659 */ 6660 else { 6661 switch (ent->etype) { 6662 case XML_INTERNAL_PARAMETER_ENTITY: 6663 case XML_EXTERNAL_PARAMETER_ENTITY: 6664 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6665 "Attempt to reference the parameter entity '%s'\n", 6666 name); 6667 break; 6668 default: 6669 break; 6670 } 6671 } 6672 6673 /* 6674 * [ WFC: No Recursion ] 6675 * A parsed entity must not contain a recursive reference 6676 * to itself, either directly or indirectly. 6677 * Done somewhere else 6678 */ 6679 6680 } else { 6681 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6682 } 6683 } 6684 } 6685 return(ent); 6686} 6687 6688/** 6689 * xmlParseStringEntityRef: 6690 * @ctxt: an XML parser context 6691 * @str: a pointer to an index in the string 6692 * 6693 * parse ENTITY references declarations, but this version parses it from 6694 * a string value. 6695 * 6696 * [68] EntityRef ::= '&' Name ';' 6697 * 6698 * [ WFC: Entity Declared ] 6699 * In a document without any DTD, a document with only an internal DTD 6700 * subset which contains no parameter entity references, or a document 6701 * with "standalone='yes'", the Name given in the entity reference 6702 * must match that in an entity declaration, except that well-formed 6703 * documents need not declare any of the following entities: amp, lt, 6704 * gt, apos, quot. The declaration of a parameter entity must precede 6705 * any reference to it. Similarly, the declaration of a general entity 6706 * must precede any reference to it which appears in a default value in an 6707 * attribute-list declaration. Note that if entities are declared in the 6708 * external subset or in external parameter entities, a non-validating 6709 * processor is not obligated to read and process their declarations; 6710 * for such documents, the rule that an entity must be declared is a 6711 * well-formedness constraint only if standalone='yes'. 6712 * 6713 * [ WFC: Parsed Entity ] 6714 * An entity reference must not contain the name of an unparsed entity 6715 * 6716 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 6717 * is updated to the current location in the string. 6718 */ 6719xmlEntityPtr 6720xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 6721 xmlChar *name; 6722 const xmlChar *ptr; 6723 xmlChar cur; 6724 xmlEntityPtr ent = NULL; 6725 6726 if ((str == NULL) || (*str == NULL)) 6727 return(NULL); 6728 ptr = *str; 6729 cur = *ptr; 6730 if (cur == '&') { 6731 ptr++; 6732 cur = *ptr; 6733 name = xmlParseStringName(ctxt, &ptr); 6734 if (name == NULL) { 6735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6736 "xmlParseStringEntityRef: no name\n"); 6737 } else { 6738 if (*ptr == ';') { 6739 ptr++; 6740 /* 6741 * Ask first SAX for entity resolution, otherwise try the 6742 * predefined set. 6743 */ 6744 if (ctxt->sax != NULL) { 6745 if (ctxt->sax->getEntity != NULL) 6746 ent = ctxt->sax->getEntity(ctxt->userData, name); 6747 if (ent == NULL) 6748 ent = xmlGetPredefinedEntity(name); 6749 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6750 ent = xmlSAX2GetEntity(ctxt, name); 6751 } 6752 } 6753 /* 6754 * [ WFC: Entity Declared ] 6755 * In a document without any DTD, a document with only an 6756 * internal DTD subset which contains no parameter entity 6757 * references, or a document with "standalone='yes'", the 6758 * Name given in the entity reference must match that in an 6759 * entity declaration, except that well-formed documents 6760 * need not declare any of the following entities: amp, lt, 6761 * gt, apos, quot. 6762 * The declaration of a parameter entity must precede any 6763 * reference to it. 6764 * Similarly, the declaration of a general entity must 6765 * precede any reference to it which appears in a default 6766 * value in an attribute-list declaration. Note that if 6767 * entities are declared in the external subset or in 6768 * external parameter entities, a non-validating processor 6769 * is not obligated to read and process their declarations; 6770 * for such documents, the rule that an entity must be 6771 * declared is a well-formedness constraint only if 6772 * standalone='yes'. 6773 */ 6774 if (ent == NULL) { 6775 if ((ctxt->standalone == 1) || 6776 ((ctxt->hasExternalSubset == 0) && 6777 (ctxt->hasPErefs == 0))) { 6778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6779 "Entity '%s' not defined\n", name); 6780 } else { 6781 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6782 "Entity '%s' not defined\n", 6783 name); 6784 } 6785 /* TODO ? check regressions ctxt->valid = 0; */ 6786 } 6787 6788 /* 6789 * [ WFC: Parsed Entity ] 6790 * An entity reference must not contain the name of an 6791 * unparsed entity 6792 */ 6793 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6794 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6795 "Entity reference to unparsed entity %s\n", name); 6796 } 6797 6798 /* 6799 * [ WFC: No External Entity References ] 6800 * Attribute values cannot contain direct or indirect 6801 * entity references to external entities. 6802 */ 6803 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6804 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6805 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6806 "Attribute references external entity '%s'\n", name); 6807 } 6808 /* 6809 * [ WFC: No < in Attribute Values ] 6810 * The replacement text of any entity referred to directly or 6811 * indirectly in an attribute value (other than "<") must 6812 * not contain a <. 6813 */ 6814 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6815 (ent != NULL) && 6816 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6817 (ent->content != NULL) && 6818 (xmlStrchr(ent->content, '<'))) { 6819 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6820 "'<' in entity '%s' is not allowed in attributes values\n", 6821 name); 6822 } 6823 6824 /* 6825 * Internal check, no parameter entities here ... 6826 */ 6827 else { 6828 switch (ent->etype) { 6829 case XML_INTERNAL_PARAMETER_ENTITY: 6830 case XML_EXTERNAL_PARAMETER_ENTITY: 6831 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6832 "Attempt to reference the parameter entity '%s'\n", 6833 name); 6834 break; 6835 default: 6836 break; 6837 } 6838 } 6839 6840 /* 6841 * [ WFC: No Recursion ] 6842 * A parsed entity must not contain a recursive reference 6843 * to itself, either directly or indirectly. 6844 * Done somewhere else 6845 */ 6846 6847 } else { 6848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6849 } 6850 xmlFree(name); 6851 } 6852 } 6853 *str = ptr; 6854 return(ent); 6855} 6856 6857/** 6858 * xmlParsePEReference: 6859 * @ctxt: an XML parser context 6860 * 6861 * parse PEReference declarations 6862 * The entity content is handled directly by pushing it's content as 6863 * a new input stream. 6864 * 6865 * [69] PEReference ::= '%' Name ';' 6866 * 6867 * [ WFC: No Recursion ] 6868 * A parsed entity must not contain a recursive 6869 * reference to itself, either directly or indirectly. 6870 * 6871 * [ WFC: Entity Declared ] 6872 * In a document without any DTD, a document with only an internal DTD 6873 * subset which contains no parameter entity references, or a document 6874 * with "standalone='yes'", ... ... The declaration of a parameter 6875 * entity must precede any reference to it... 6876 * 6877 * [ VC: Entity Declared ] 6878 * In a document with an external subset or external parameter entities 6879 * with "standalone='no'", ... ... The declaration of a parameter entity 6880 * must precede any reference to it... 6881 * 6882 * [ WFC: In DTD ] 6883 * Parameter-entity references may only appear in the DTD. 6884 * NOTE: misleading but this is handled. 6885 */ 6886void 6887xmlParsePEReference(xmlParserCtxtPtr ctxt) 6888{ 6889 const xmlChar *name; 6890 xmlEntityPtr entity = NULL; 6891 xmlParserInputPtr input; 6892 6893 if (RAW == '%') { 6894 NEXT; 6895 name = xmlParseName(ctxt); 6896 if (name == NULL) { 6897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6898 "xmlParsePEReference: no name\n"); 6899 } else { 6900 if (RAW == ';') { 6901 NEXT; 6902 if ((ctxt->sax != NULL) && 6903 (ctxt->sax->getParameterEntity != NULL)) 6904 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6905 name); 6906 if (entity == NULL) { 6907 /* 6908 * [ WFC: Entity Declared ] 6909 * In a document without any DTD, a document with only an 6910 * internal DTD subset which contains no parameter entity 6911 * references, or a document with "standalone='yes'", ... 6912 * ... The declaration of a parameter entity must precede 6913 * any reference to it... 6914 */ 6915 if ((ctxt->standalone == 1) || 6916 ((ctxt->hasExternalSubset == 0) && 6917 (ctxt->hasPErefs == 0))) { 6918 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6919 "PEReference: %%%s; not found\n", 6920 name); 6921 } else { 6922 /* 6923 * [ VC: Entity Declared ] 6924 * In a document with an external subset or external 6925 * parameter entities with "standalone='no'", ... 6926 * ... The declaration of a parameter entity must 6927 * precede any reference to it... 6928 */ 6929 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6930 "PEReference: %%%s; not found\n", 6931 name, NULL); 6932 ctxt->valid = 0; 6933 } 6934 } else { 6935 /* 6936 * Internal checking in case the entity quest barfed 6937 */ 6938 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6939 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6940 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6941 "Internal: %%%s; is not a parameter entity\n", 6942 name, NULL); 6943 } else if (ctxt->input->free != deallocblankswrapper) { 6944 input = 6945 xmlNewBlanksWrapperInputStream(ctxt, entity); 6946 xmlPushInput(ctxt, input); 6947 } else { 6948 /* 6949 * TODO !!! 6950 * handle the extra spaces added before and after 6951 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6952 */ 6953 input = xmlNewEntityInputStream(ctxt, entity); 6954 xmlPushInput(ctxt, input); 6955 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6956 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6957 (IS_BLANK_CH(NXT(5)))) { 6958 xmlParseTextDecl(ctxt); 6959 if (ctxt->errNo == 6960 XML_ERR_UNSUPPORTED_ENCODING) { 6961 /* 6962 * The XML REC instructs us to stop parsing 6963 * right here 6964 */ 6965 ctxt->instate = XML_PARSER_EOF; 6966 return; 6967 } 6968 } 6969 } 6970 } 6971 ctxt->hasPErefs = 1; 6972 } else { 6973 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6974 } 6975 } 6976 } 6977} 6978 6979/** 6980 * xmlLoadEntityContent: 6981 * @ctxt: an XML parser context 6982 * @entity: an unloaded system entity 6983 * 6984 * Load the original content of the given system entity from the 6985 * ExternalID/SystemID given. This is to be used for Included in Literal 6986 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 6987 * 6988 * Returns 0 in case of success and -1 in case of failure 6989 */ 6990static int 6991xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 6992 xmlParserInputPtr input; 6993 xmlBufferPtr buf; 6994 int l, c; 6995 int count = 0; 6996 6997 if ((ctxt == NULL) || (entity == NULL) || 6998 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 6999 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7000 (entity->content != NULL)) { 7001 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7002 "xmlLoadEntityContent parameter error"); 7003 return(-1); 7004 } 7005 7006 if (xmlParserDebugEntities) 7007 xmlGenericError(xmlGenericErrorContext, 7008 "Reading %s entity content input\n", entity->name); 7009 7010 buf = xmlBufferCreate(); 7011 if (buf == NULL) { 7012 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7013 "xmlLoadEntityContent parameter error"); 7014 return(-1); 7015 } 7016 7017 input = xmlNewEntityInputStream(ctxt, entity); 7018 if (input == NULL) { 7019 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7020 "xmlLoadEntityContent input error"); 7021 xmlBufferFree(buf); 7022 return(-1); 7023 } 7024 7025 /* 7026 * Push the entity as the current input, read char by char 7027 * saving to the buffer until the end of the entity or an error 7028 */ 7029 xmlPushInput(ctxt, input); 7030 GROW; 7031 c = CUR_CHAR(l); 7032 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7033 (IS_CHAR(c))) { 7034 xmlBufferAdd(buf, ctxt->input->cur, l); 7035 if (count++ > 100) { 7036 count = 0; 7037 GROW; 7038 } 7039 NEXTL(l); 7040 c = CUR_CHAR(l); 7041 } 7042 7043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7044 xmlPopInput(ctxt); 7045 } else if (!IS_CHAR(c)) { 7046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7047 "xmlLoadEntityContent: invalid char value %d\n", 7048 c); 7049 xmlBufferFree(buf); 7050 return(-1); 7051 } 7052 entity->content = buf->content; 7053 buf->content = NULL; 7054 xmlBufferFree(buf); 7055 7056 return(0); 7057} 7058 7059/** 7060 * xmlParseStringPEReference: 7061 * @ctxt: an XML parser context 7062 * @str: a pointer to an index in the string 7063 * 7064 * parse PEReference declarations 7065 * 7066 * [69] PEReference ::= '%' Name ';' 7067 * 7068 * [ WFC: No Recursion ] 7069 * A parsed entity must not contain a recursive 7070 * reference to itself, either directly or indirectly. 7071 * 7072 * [ WFC: Entity Declared ] 7073 * In a document without any DTD, a document with only an internal DTD 7074 * subset which contains no parameter entity references, or a document 7075 * with "standalone='yes'", ... ... The declaration of a parameter 7076 * entity must precede any reference to it... 7077 * 7078 * [ VC: Entity Declared ] 7079 * In a document with an external subset or external parameter entities 7080 * with "standalone='no'", ... ... The declaration of a parameter entity 7081 * must precede any reference to it... 7082 * 7083 * [ WFC: In DTD ] 7084 * Parameter-entity references may only appear in the DTD. 7085 * NOTE: misleading but this is handled. 7086 * 7087 * Returns the string of the entity content. 7088 * str is updated to the current value of the index 7089 */ 7090xmlEntityPtr 7091xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7092 const xmlChar *ptr; 7093 xmlChar cur; 7094 xmlChar *name; 7095 xmlEntityPtr entity = NULL; 7096 7097 if ((str == NULL) || (*str == NULL)) return(NULL); 7098 ptr = *str; 7099 cur = *ptr; 7100 if (cur == '%') { 7101 ptr++; 7102 cur = *ptr; 7103 name = xmlParseStringName(ctxt, &ptr); 7104 if (name == NULL) { 7105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7106 "xmlParseStringPEReference: no name\n"); 7107 } else { 7108 cur = *ptr; 7109 if (cur == ';') { 7110 ptr++; 7111 cur = *ptr; 7112 if ((ctxt->sax != NULL) && 7113 (ctxt->sax->getParameterEntity != NULL)) 7114 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7115 name); 7116 if (entity == NULL) { 7117 /* 7118 * [ WFC: Entity Declared ] 7119 * In a document without any DTD, a document with only an 7120 * internal DTD subset which contains no parameter entity 7121 * references, or a document with "standalone='yes'", ... 7122 * ... The declaration of a parameter entity must precede 7123 * any reference to it... 7124 */ 7125 if ((ctxt->standalone == 1) || 7126 ((ctxt->hasExternalSubset == 0) && 7127 (ctxt->hasPErefs == 0))) { 7128 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7129 "PEReference: %%%s; not found\n", name); 7130 } else { 7131 /* 7132 * [ VC: Entity Declared ] 7133 * In a document with an external subset or external 7134 * parameter entities with "standalone='no'", ... 7135 * ... The declaration of a parameter entity must 7136 * precede any reference to it... 7137 */ 7138 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7139 "PEReference: %%%s; not found\n", 7140 name, NULL); 7141 ctxt->valid = 0; 7142 } 7143 } else { 7144 /* 7145 * Internal checking in case the entity quest barfed 7146 */ 7147 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7148 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7149 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7150 "%%%s; is not a parameter entity\n", 7151 name, NULL); 7152 } 7153 } 7154 ctxt->hasPErefs = 1; 7155 } else { 7156 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7157 } 7158 xmlFree(name); 7159 } 7160 } 7161 *str = ptr; 7162 return(entity); 7163} 7164 7165/** 7166 * xmlParseDocTypeDecl: 7167 * @ctxt: an XML parser context 7168 * 7169 * parse a DOCTYPE declaration 7170 * 7171 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7172 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7173 * 7174 * [ VC: Root Element Type ] 7175 * The Name in the document type declaration must match the element 7176 * type of the root element. 7177 */ 7178 7179void 7180xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7181 const xmlChar *name = NULL; 7182 xmlChar *ExternalID = NULL; 7183 xmlChar *URI = NULL; 7184 7185 /* 7186 * We know that '<!DOCTYPE' has been detected. 7187 */ 7188 SKIP(9); 7189 7190 SKIP_BLANKS; 7191 7192 /* 7193 * Parse the DOCTYPE name. 7194 */ 7195 name = xmlParseName(ctxt); 7196 if (name == NULL) { 7197 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7198 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7199 } 7200 ctxt->intSubName = name; 7201 7202 SKIP_BLANKS; 7203 7204 /* 7205 * Check for SystemID and ExternalID 7206 */ 7207 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7208 7209 if ((URI != NULL) || (ExternalID != NULL)) { 7210 ctxt->hasExternalSubset = 1; 7211 } 7212 ctxt->extSubURI = URI; 7213 ctxt->extSubSystem = ExternalID; 7214 7215 SKIP_BLANKS; 7216 7217 /* 7218 * Create and update the internal subset. 7219 */ 7220 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7221 (!ctxt->disableSAX)) 7222 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7223 7224 /* 7225 * Is there any internal subset declarations ? 7226 * they are handled separately in xmlParseInternalSubset() 7227 */ 7228 if (RAW == '[') 7229 return; 7230 7231 /* 7232 * We should be at the end of the DOCTYPE declaration. 7233 */ 7234 if (RAW != '>') { 7235 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7236 } 7237 NEXT; 7238} 7239 7240/** 7241 * xmlParseInternalSubset: 7242 * @ctxt: an XML parser context 7243 * 7244 * parse the internal subset declaration 7245 * 7246 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7247 */ 7248 7249static void 7250xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 7251 /* 7252 * Is there any DTD definition ? 7253 */ 7254 if (RAW == '[') { 7255 ctxt->instate = XML_PARSER_DTD; 7256 NEXT; 7257 /* 7258 * Parse the succession of Markup declarations and 7259 * PEReferences. 7260 * Subsequence (markupdecl | PEReference | S)* 7261 */ 7262 while (RAW != ']') { 7263 const xmlChar *check = CUR_PTR; 7264 unsigned int cons = ctxt->input->consumed; 7265 7266 SKIP_BLANKS; 7267 xmlParseMarkupDecl(ctxt); 7268 xmlParsePEReference(ctxt); 7269 7270 /* 7271 * Pop-up of finished entities. 7272 */ 7273 while ((RAW == 0) && (ctxt->inputNr > 1)) 7274 xmlPopInput(ctxt); 7275 7276 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7277 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7278 "xmlParseInternalSubset: error detected in Markup declaration\n"); 7279 break; 7280 } 7281 } 7282 if (RAW == ']') { 7283 NEXT; 7284 SKIP_BLANKS; 7285 } 7286 } 7287 7288 /* 7289 * We should be at the end of the DOCTYPE declaration. 7290 */ 7291 if (RAW != '>') { 7292 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7293 } 7294 NEXT; 7295} 7296 7297#ifdef LIBXML_SAX1_ENABLED 7298/** 7299 * xmlParseAttribute: 7300 * @ctxt: an XML parser context 7301 * @value: a xmlChar ** used to store the value of the attribute 7302 * 7303 * parse an attribute 7304 * 7305 * [41] Attribute ::= Name Eq AttValue 7306 * 7307 * [ WFC: No External Entity References ] 7308 * Attribute values cannot contain direct or indirect entity references 7309 * to external entities. 7310 * 7311 * [ WFC: No < in Attribute Values ] 7312 * The replacement text of any entity referred to directly or indirectly in 7313 * an attribute value (other than "<") must not contain a <. 7314 * 7315 * [ VC: Attribute Value Type ] 7316 * The attribute must have been declared; the value must be of the type 7317 * declared for it. 7318 * 7319 * [25] Eq ::= S? '=' S? 7320 * 7321 * With namespace: 7322 * 7323 * [NS 11] Attribute ::= QName Eq AttValue 7324 * 7325 * Also the case QName == xmlns:??? is handled independently as a namespace 7326 * definition. 7327 * 7328 * Returns the attribute name, and the value in *value. 7329 */ 7330 7331const xmlChar * 7332xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 7333 const xmlChar *name; 7334 xmlChar *val; 7335 7336 *value = NULL; 7337 GROW; 7338 name = xmlParseName(ctxt); 7339 if (name == NULL) { 7340 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7341 "error parsing attribute name\n"); 7342 return(NULL); 7343 } 7344 7345 /* 7346 * read the value 7347 */ 7348 SKIP_BLANKS; 7349 if (RAW == '=') { 7350 NEXT; 7351 SKIP_BLANKS; 7352 val = xmlParseAttValue(ctxt); 7353 ctxt->instate = XML_PARSER_CONTENT; 7354 } else { 7355 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7356 "Specification mandate value for attribute %s\n", name); 7357 return(NULL); 7358 } 7359 7360 /* 7361 * Check that xml:lang conforms to the specification 7362 * No more registered as an error, just generate a warning now 7363 * since this was deprecated in XML second edition 7364 */ 7365 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7366 if (!xmlCheckLanguageID(val)) { 7367 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7368 "Malformed value for xml:lang : %s\n", 7369 val, NULL); 7370 } 7371 } 7372 7373 /* 7374 * Check that xml:space conforms to the specification 7375 */ 7376 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7377 if (xmlStrEqual(val, BAD_CAST "default")) 7378 *(ctxt->space) = 0; 7379 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7380 *(ctxt->space) = 1; 7381 else { 7382 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 7383"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7384 val, NULL); 7385 } 7386 } 7387 7388 *value = val; 7389 return(name); 7390} 7391 7392/** 7393 * xmlParseStartTag: 7394 * @ctxt: an XML parser context 7395 * 7396 * parse a start of tag either for rule element or 7397 * EmptyElement. In both case we don't parse the tag closing chars. 7398 * 7399 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7400 * 7401 * [ WFC: Unique Att Spec ] 7402 * No attribute name may appear more than once in the same start-tag or 7403 * empty-element tag. 7404 * 7405 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7406 * 7407 * [ WFC: Unique Att Spec ] 7408 * No attribute name may appear more than once in the same start-tag or 7409 * empty-element tag. 7410 * 7411 * With namespace: 7412 * 7413 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7414 * 7415 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7416 * 7417 * Returns the element name parsed 7418 */ 7419 7420const xmlChar * 7421xmlParseStartTag(xmlParserCtxtPtr ctxt) { 7422 const xmlChar *name; 7423 const xmlChar *attname; 7424 xmlChar *attvalue; 7425 const xmlChar **atts = ctxt->atts; 7426 int nbatts = 0; 7427 int maxatts = ctxt->maxatts; 7428 int i; 7429 7430 if (RAW != '<') return(NULL); 7431 NEXT1; 7432 7433 name = xmlParseName(ctxt); 7434 if (name == NULL) { 7435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7436 "xmlParseStartTag: invalid element name\n"); 7437 return(NULL); 7438 } 7439 7440 /* 7441 * Now parse the attributes, it ends up with the ending 7442 * 7443 * (S Attribute)* S? 7444 */ 7445 SKIP_BLANKS; 7446 GROW; 7447 7448 while ((RAW != '>') && 7449 ((RAW != '/') || (NXT(1) != '>')) && 7450 (IS_BYTE_CHAR(RAW))) { 7451 const xmlChar *q = CUR_PTR; 7452 unsigned int cons = ctxt->input->consumed; 7453 7454 attname = xmlParseAttribute(ctxt, &attvalue); 7455 if ((attname != NULL) && (attvalue != NULL)) { 7456 /* 7457 * [ WFC: Unique Att Spec ] 7458 * No attribute name may appear more than once in the same 7459 * start-tag or empty-element tag. 7460 */ 7461 for (i = 0; i < nbatts;i += 2) { 7462 if (xmlStrEqual(atts[i], attname)) { 7463 xmlErrAttributeDup(ctxt, NULL, attname); 7464 xmlFree(attvalue); 7465 goto failed; 7466 } 7467 } 7468 /* 7469 * Add the pair to atts 7470 */ 7471 if (atts == NULL) { 7472 maxatts = 22; /* allow for 10 attrs by default */ 7473 atts = (const xmlChar **) 7474 xmlMalloc(maxatts * sizeof(xmlChar *)); 7475 if (atts == NULL) { 7476 xmlErrMemory(ctxt, NULL); 7477 if (attvalue != NULL) 7478 xmlFree(attvalue); 7479 goto failed; 7480 } 7481 ctxt->atts = atts; 7482 ctxt->maxatts = maxatts; 7483 } else if (nbatts + 4 > maxatts) { 7484 const xmlChar **n; 7485 7486 maxatts *= 2; 7487 n = (const xmlChar **) xmlRealloc((void *) atts, 7488 maxatts * sizeof(const xmlChar *)); 7489 if (n == NULL) { 7490 xmlErrMemory(ctxt, NULL); 7491 if (attvalue != NULL) 7492 xmlFree(attvalue); 7493 goto failed; 7494 } 7495 atts = n; 7496 ctxt->atts = atts; 7497 ctxt->maxatts = maxatts; 7498 } 7499 atts[nbatts++] = attname; 7500 atts[nbatts++] = attvalue; 7501 atts[nbatts] = NULL; 7502 atts[nbatts + 1] = NULL; 7503 } else { 7504 if (attvalue != NULL) 7505 xmlFree(attvalue); 7506 } 7507 7508failed: 7509 7510 GROW 7511 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7512 break; 7513 if (!IS_BLANK_CH(RAW)) { 7514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7515 "attributes construct error\n"); 7516 } 7517 SKIP_BLANKS; 7518 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7519 (attname == NULL) && (attvalue == NULL)) { 7520 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 7521 "xmlParseStartTag: problem parsing attributes\n"); 7522 break; 7523 } 7524 SHRINK; 7525 GROW; 7526 } 7527 7528 /* 7529 * SAX: Start of Element ! 7530 */ 7531 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 7532 (!ctxt->disableSAX)) { 7533 if (nbatts > 0) 7534 ctxt->sax->startElement(ctxt->userData, name, atts); 7535 else 7536 ctxt->sax->startElement(ctxt->userData, name, NULL); 7537 } 7538 7539 if (atts != NULL) { 7540 /* Free only the content strings */ 7541 for (i = 1;i < nbatts;i+=2) 7542 if (atts[i] != NULL) 7543 xmlFree((xmlChar *) atts[i]); 7544 } 7545 return(name); 7546} 7547 7548/** 7549 * xmlParseEndTag1: 7550 * @ctxt: an XML parser context 7551 * @line: line of the start tag 7552 * @nsNr: number of namespaces on the start tag 7553 * 7554 * parse an end of tag 7555 * 7556 * [42] ETag ::= '</' Name S? '>' 7557 * 7558 * With namespace 7559 * 7560 * [NS 9] ETag ::= '</' QName S? '>' 7561 */ 7562 7563static void 7564xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 7565 const xmlChar *name; 7566 7567 GROW; 7568 if ((RAW != '<') || (NXT(1) != '/')) { 7569 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 7570 "xmlParseEndTag: '</' not found\n"); 7571 return; 7572 } 7573 SKIP(2); 7574 7575 name = xmlParseNameAndCompare(ctxt,ctxt->name); 7576 7577 /* 7578 * We should definitely be at the ending "S? '>'" part 7579 */ 7580 GROW; 7581 SKIP_BLANKS; 7582 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7583 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7584 } else 7585 NEXT1; 7586 7587 /* 7588 * [ WFC: Element Type Match ] 7589 * The Name in an element's end-tag must match the element type in the 7590 * start-tag. 7591 * 7592 */ 7593 if (name != (xmlChar*)1) { 7594 if (name == NULL) name = BAD_CAST "unparseable"; 7595 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7596 "Opening and ending tag mismatch: %s line %d and %s\n", 7597 ctxt->name, line, name); 7598 } 7599 7600 /* 7601 * SAX: End of Tag 7602 */ 7603 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7604 (!ctxt->disableSAX)) 7605 ctxt->sax->endElement(ctxt->userData, ctxt->name); 7606 7607 namePop(ctxt); 7608 spacePop(ctxt); 7609 return; 7610} 7611 7612/** 7613 * xmlParseEndTag: 7614 * @ctxt: an XML parser context 7615 * 7616 * parse an end of tag 7617 * 7618 * [42] ETag ::= '</' Name S? '>' 7619 * 7620 * With namespace 7621 * 7622 * [NS 9] ETag ::= '</' QName S? '>' 7623 */ 7624 7625void 7626xmlParseEndTag(xmlParserCtxtPtr ctxt) { 7627 xmlParseEndTag1(ctxt, 0); 7628} 7629#endif /* LIBXML_SAX1_ENABLED */ 7630 7631/************************************************************************ 7632 * * 7633 * SAX 2 specific operations * 7634 * * 7635 ************************************************************************/ 7636 7637static const xmlChar * 7638xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 7639 int len = 0, l; 7640 int c; 7641 int count = 0; 7642 7643 /* 7644 * Handler for more complex cases 7645 */ 7646 GROW; 7647 c = CUR_CHAR(l); 7648 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 7649 (!IS_LETTER(c) && (c != '_'))) { 7650 return(NULL); 7651 } 7652 7653 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 7654 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 7655 (c == '.') || (c == '-') || (c == '_') || 7656 (IS_COMBINING(c)) || 7657 (IS_EXTENDER(c)))) { 7658 if (count++ > 100) { 7659 count = 0; 7660 GROW; 7661 } 7662 len += l; 7663 NEXTL(l); 7664 c = CUR_CHAR(l); 7665 } 7666 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 7667} 7668 7669/* 7670 * xmlGetNamespace: 7671 * @ctxt: an XML parser context 7672 * @prefix: the prefix to lookup 7673 * 7674 * Lookup the namespace name for the @prefix (which ca be NULL) 7675 * The prefix must come from the @ctxt->dict dictionnary 7676 * 7677 * Returns the namespace name or NULL if not bound 7678 */ 7679static const xmlChar * 7680xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 7681 int i; 7682 7683 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 7684 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 7685 if (ctxt->nsTab[i] == prefix) { 7686 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 7687 return(NULL); 7688 return(ctxt->nsTab[i + 1]); 7689 } 7690 return(NULL); 7691} 7692 7693/** 7694 * xmlParseNCName: 7695 * @ctxt: an XML parser context 7696 * @len: lenght of the string parsed 7697 * 7698 * parse an XML name. 7699 * 7700 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 7701 * CombiningChar | Extender 7702 * 7703 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 7704 * 7705 * Returns the Name parsed or NULL 7706 */ 7707 7708static const xmlChar * 7709xmlParseNCName(xmlParserCtxtPtr ctxt) { 7710 const xmlChar *in; 7711 const xmlChar *ret; 7712 int count = 0; 7713 7714 /* 7715 * Accelerator for simple ASCII names 7716 */ 7717 in = ctxt->input->cur; 7718 if (((*in >= 0x61) && (*in <= 0x7A)) || 7719 ((*in >= 0x41) && (*in <= 0x5A)) || 7720 (*in == '_')) { 7721 in++; 7722 while (((*in >= 0x61) && (*in <= 0x7A)) || 7723 ((*in >= 0x41) && (*in <= 0x5A)) || 7724 ((*in >= 0x30) && (*in <= 0x39)) || 7725 (*in == '_') || (*in == '-') || 7726 (*in == '.')) 7727 in++; 7728 if ((*in > 0) && (*in < 0x80)) { 7729 count = in - ctxt->input->cur; 7730 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 7731 ctxt->input->cur = in; 7732 ctxt->nbChars += count; 7733 ctxt->input->col += count; 7734 if (ret == NULL) { 7735 xmlErrMemory(ctxt, NULL); 7736 } 7737 return(ret); 7738 } 7739 } 7740 return(xmlParseNCNameComplex(ctxt)); 7741} 7742 7743/** 7744 * xmlParseQName: 7745 * @ctxt: an XML parser context 7746 * @prefix: pointer to store the prefix part 7747 * 7748 * parse an XML Namespace QName 7749 * 7750 * [6] QName ::= (Prefix ':')? LocalPart 7751 * [7] Prefix ::= NCName 7752 * [8] LocalPart ::= NCName 7753 * 7754 * Returns the Name parsed or NULL 7755 */ 7756 7757static const xmlChar * 7758xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 7759 const xmlChar *l, *p; 7760 7761 GROW; 7762 7763 l = xmlParseNCName(ctxt); 7764 if (l == NULL) { 7765 if (CUR == ':') { 7766 l = xmlParseName(ctxt); 7767 if (l != NULL) { 7768 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7769 "Failed to parse QName '%s'\n", l, NULL, NULL); 7770 *prefix = NULL; 7771 return(l); 7772 } 7773 } 7774 return(NULL); 7775 } 7776 if (CUR == ':') { 7777 NEXT; 7778 p = l; 7779 l = xmlParseNCName(ctxt); 7780 if (l == NULL) { 7781 xmlChar *tmp; 7782 7783 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7784 "Failed to parse QName '%s:'\n", p, NULL, NULL); 7785 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 7786 p = xmlDictLookup(ctxt->dict, tmp, -1); 7787 if (tmp != NULL) xmlFree(tmp); 7788 *prefix = NULL; 7789 return(p); 7790 } 7791 if (CUR == ':') { 7792 xmlChar *tmp; 7793 7794 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7795 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 7796 NEXT; 7797 tmp = (xmlChar *) xmlParseName(ctxt); 7798 if (tmp != NULL) { 7799 tmp = xmlBuildQName(tmp, l, NULL, 0); 7800 l = xmlDictLookup(ctxt->dict, tmp, -1); 7801 if (tmp != NULL) xmlFree(tmp); 7802 *prefix = p; 7803 return(l); 7804 } 7805 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 7806 l = xmlDictLookup(ctxt->dict, tmp, -1); 7807 if (tmp != NULL) xmlFree(tmp); 7808 *prefix = p; 7809 return(l); 7810 } 7811 *prefix = p; 7812 } else 7813 *prefix = NULL; 7814 return(l); 7815} 7816 7817/** 7818 * xmlParseQNameAndCompare: 7819 * @ctxt: an XML parser context 7820 * @name: the localname 7821 * @prefix: the prefix, if any. 7822 * 7823 * parse an XML name and compares for match 7824 * (specialized for endtag parsing) 7825 * 7826 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7827 * and the name for mismatch 7828 */ 7829 7830static const xmlChar * 7831xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7832 xmlChar const *prefix) { 7833 const xmlChar *cmp = name; 7834 const xmlChar *in; 7835 const xmlChar *ret; 7836 const xmlChar *prefix2; 7837 7838 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7839 7840 GROW; 7841 in = ctxt->input->cur; 7842 7843 cmp = prefix; 7844 while (*in != 0 && *in == *cmp) { 7845 ++in; 7846 ++cmp; 7847 } 7848 if ((*cmp == 0) && (*in == ':')) { 7849 in++; 7850 cmp = name; 7851 while (*in != 0 && *in == *cmp) { 7852 ++in; 7853 ++cmp; 7854 } 7855 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 7856 /* success */ 7857 ctxt->input->cur = in; 7858 return((const xmlChar*) 1); 7859 } 7860 } 7861 /* 7862 * all strings coms from the dictionary, equality can be done directly 7863 */ 7864 ret = xmlParseQName (ctxt, &prefix2); 7865 if ((ret == name) && (prefix == prefix2)) 7866 return((const xmlChar*) 1); 7867 return ret; 7868} 7869 7870/** 7871 * xmlParseAttValueInternal: 7872 * @ctxt: an XML parser context 7873 * @len: attribute len result 7874 * @alloc: whether the attribute was reallocated as a new string 7875 * @normalize: if 1 then further non-CDATA normalization must be done 7876 * 7877 * parse a value for an attribute. 7878 * NOTE: if no normalization is needed, the routine will return pointers 7879 * directly from the data buffer. 7880 * 7881 * 3.3.3 Attribute-Value Normalization: 7882 * Before the value of an attribute is passed to the application or 7883 * checked for validity, the XML processor must normalize it as follows: 7884 * - a character reference is processed by appending the referenced 7885 * character to the attribute value 7886 * - an entity reference is processed by recursively processing the 7887 * replacement text of the entity 7888 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7889 * appending #x20 to the normalized value, except that only a single 7890 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7891 * parsed entity or the literal entity value of an internal parsed entity 7892 * - other characters are processed by appending them to the normalized value 7893 * If the declared value is not CDATA, then the XML processor must further 7894 * process the normalized attribute value by discarding any leading and 7895 * trailing space (#x20) characters, and by replacing sequences of space 7896 * (#x20) characters by a single space (#x20) character. 7897 * All attributes for which no declaration has been read should be treated 7898 * by a non-validating parser as if declared CDATA. 7899 * 7900 * Returns the AttValue parsed or NULL. The value has to be freed by the 7901 * caller if it was copied, this can be detected by val[*len] == 0. 7902 */ 7903 7904static xmlChar * 7905xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7906 int normalize) 7907{ 7908 xmlChar limit = 0; 7909 const xmlChar *in = NULL, *start, *end, *last; 7910 xmlChar *ret = NULL; 7911 7912 GROW; 7913 in = (xmlChar *) CUR_PTR; 7914 if (*in != '"' && *in != '\'') { 7915 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7916 return (NULL); 7917 } 7918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7919 7920 /* 7921 * try to handle in this routine the most common case where no 7922 * allocation of a new string is required and where content is 7923 * pure ASCII. 7924 */ 7925 limit = *in++; 7926 end = ctxt->input->end; 7927 start = in; 7928 if (in >= end) { 7929 const xmlChar *oldbase = ctxt->input->base; 7930 GROW; 7931 if (oldbase != ctxt->input->base) { 7932 long delta = ctxt->input->base - oldbase; 7933 start = start + delta; 7934 in = in + delta; 7935 } 7936 end = ctxt->input->end; 7937 } 7938 if (normalize) { 7939 /* 7940 * Skip any leading spaces 7941 */ 7942 while ((in < end) && (*in != limit) && 7943 ((*in == 0x20) || (*in == 0x9) || 7944 (*in == 0xA) || (*in == 0xD))) { 7945 in++; 7946 start = in; 7947 if (in >= end) { 7948 const xmlChar *oldbase = ctxt->input->base; 7949 GROW; 7950 if (oldbase != ctxt->input->base) { 7951 long delta = ctxt->input->base - oldbase; 7952 start = start + delta; 7953 in = in + delta; 7954 } 7955 end = ctxt->input->end; 7956 } 7957 } 7958 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7959 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7960 if ((*in++ == 0x20) && (*in == 0x20)) break; 7961 if (in >= end) { 7962 const xmlChar *oldbase = ctxt->input->base; 7963 GROW; 7964 if (oldbase != ctxt->input->base) { 7965 long delta = ctxt->input->base - oldbase; 7966 start = start + delta; 7967 in = in + delta; 7968 } 7969 end = ctxt->input->end; 7970 } 7971 } 7972 last = in; 7973 /* 7974 * skip the trailing blanks 7975 */ 7976 while ((last[-1] == 0x20) && (last > start)) last--; 7977 while ((in < end) && (*in != limit) && 7978 ((*in == 0x20) || (*in == 0x9) || 7979 (*in == 0xA) || (*in == 0xD))) { 7980 in++; 7981 if (in >= end) { 7982 const xmlChar *oldbase = ctxt->input->base; 7983 GROW; 7984 if (oldbase != ctxt->input->base) { 7985 long delta = ctxt->input->base - oldbase; 7986 start = start + delta; 7987 in = in + delta; 7988 last = last + delta; 7989 } 7990 end = ctxt->input->end; 7991 } 7992 } 7993 if (*in != limit) goto need_complex; 7994 } else { 7995 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7996 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7997 in++; 7998 if (in >= end) { 7999 const xmlChar *oldbase = ctxt->input->base; 8000 GROW; 8001 if (oldbase != ctxt->input->base) { 8002 long delta = ctxt->input->base - oldbase; 8003 start = start + delta; 8004 in = in + delta; 8005 } 8006 end = ctxt->input->end; 8007 } 8008 } 8009 last = in; 8010 if (*in != limit) goto need_complex; 8011 } 8012 in++; 8013 if (len != NULL) { 8014 *len = last - start; 8015 ret = (xmlChar *) start; 8016 } else { 8017 if (alloc) *alloc = 1; 8018 ret = xmlStrndup(start, last - start); 8019 } 8020 CUR_PTR = in; 8021 if (alloc) *alloc = 0; 8022 return ret; 8023need_complex: 8024 if (alloc) *alloc = 1; 8025 return xmlParseAttValueComplex(ctxt, len, normalize); 8026} 8027 8028/** 8029 * xmlParseAttribute2: 8030 * @ctxt: an XML parser context 8031 * @pref: the element prefix 8032 * @elem: the element name 8033 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8034 * @value: a xmlChar ** used to store the value of the attribute 8035 * @len: an int * to save the length of the attribute 8036 * @alloc: an int * to indicate if the attribute was allocated 8037 * 8038 * parse an attribute in the new SAX2 framework. 8039 * 8040 * Returns the attribute name, and the value in *value, . 8041 */ 8042 8043static const xmlChar * 8044xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8045 const xmlChar * pref, const xmlChar * elem, 8046 const xmlChar ** prefix, xmlChar ** value, 8047 int *len, int *alloc) 8048{ 8049 const xmlChar *name; 8050 xmlChar *val, *internal_val = NULL; 8051 int normalize = 0; 8052 8053 *value = NULL; 8054 GROW; 8055 name = xmlParseQName(ctxt, prefix); 8056 if (name == NULL) { 8057 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8058 "error parsing attribute name\n"); 8059 return (NULL); 8060 } 8061 8062 /* 8063 * get the type if needed 8064 */ 8065 if (ctxt->attsSpecial != NULL) { 8066 int type; 8067 8068 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8069 pref, elem, *prefix, name); 8070 if (type != 0) 8071 normalize = 1; 8072 } 8073 8074 /* 8075 * read the value 8076 */ 8077 SKIP_BLANKS; 8078 if (RAW == '=') { 8079 NEXT; 8080 SKIP_BLANKS; 8081 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8082 if (normalize) { 8083 /* 8084 * Sometimes a second normalisation pass for spaces is needed 8085 * but that only happens if charrefs or entities refernces 8086 * have been used in the attribute value, i.e. the attribute 8087 * value have been extracted in an allocated string already. 8088 */ 8089 if (*alloc) { 8090 const xmlChar *val2; 8091 8092 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8093 if (val2 != NULL) { 8094 xmlFree(val); 8095 val = (xmlChar *) val2; 8096 } 8097 } 8098 } 8099 ctxt->instate = XML_PARSER_CONTENT; 8100 } else { 8101 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8102 "Specification mandate value for attribute %s\n", 8103 name); 8104 return (NULL); 8105 } 8106 8107 if (*prefix == ctxt->str_xml) { 8108 /* 8109 * Check that xml:lang conforms to the specification 8110 * No more registered as an error, just generate a warning now 8111 * since this was deprecated in XML second edition 8112 */ 8113 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8114 internal_val = xmlStrndup(val, *len); 8115 if (!xmlCheckLanguageID(internal_val)) { 8116 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8117 "Malformed value for xml:lang : %s\n", 8118 internal_val, NULL); 8119 } 8120 } 8121 8122 /* 8123 * Check that xml:space conforms to the specification 8124 */ 8125 if (xmlStrEqual(name, BAD_CAST "space")) { 8126 internal_val = xmlStrndup(val, *len); 8127 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8128 *(ctxt->space) = 0; 8129 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8130 *(ctxt->space) = 1; 8131 else { 8132 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8133 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8134 internal_val, NULL); 8135 } 8136 } 8137 if (internal_val) { 8138 xmlFree(internal_val); 8139 } 8140 } 8141 8142 *value = val; 8143 return (name); 8144} 8145/** 8146 * xmlParseStartTag2: 8147 * @ctxt: an XML parser context 8148 * 8149 * parse a start of tag either for rule element or 8150 * EmptyElement. In both case we don't parse the tag closing chars. 8151 * This routine is called when running SAX2 parsing 8152 * 8153 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8154 * 8155 * [ WFC: Unique Att Spec ] 8156 * No attribute name may appear more than once in the same start-tag or 8157 * empty-element tag. 8158 * 8159 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8160 * 8161 * [ WFC: Unique Att Spec ] 8162 * No attribute name may appear more than once in the same start-tag or 8163 * empty-element tag. 8164 * 8165 * With namespace: 8166 * 8167 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8168 * 8169 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8170 * 8171 * Returns the element name parsed 8172 */ 8173 8174static const xmlChar * 8175xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8176 const xmlChar **URI, int *tlen) { 8177 const xmlChar *localname; 8178 const xmlChar *prefix; 8179 const xmlChar *attname; 8180 const xmlChar *aprefix; 8181 const xmlChar *nsname; 8182 xmlChar *attvalue; 8183 const xmlChar **atts = ctxt->atts; 8184 int maxatts = ctxt->maxatts; 8185 int nratts, nbatts, nbdef; 8186 int i, j, nbNs, attval, oldline, oldcol; 8187 const xmlChar *base; 8188 unsigned long cur; 8189 int nsNr = ctxt->nsNr; 8190 8191 if (RAW != '<') return(NULL); 8192 NEXT1; 8193 8194 /* 8195 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8196 * point since the attribute values may be stored as pointers to 8197 * the buffer and calling SHRINK would destroy them ! 8198 * The Shrinking is only possible once the full set of attribute 8199 * callbacks have been done. 8200 */ 8201reparse: 8202 SHRINK; 8203 base = ctxt->input->base; 8204 cur = ctxt->input->cur - ctxt->input->base; 8205 oldline = ctxt->input->line; 8206 oldcol = ctxt->input->col; 8207 nbatts = 0; 8208 nratts = 0; 8209 nbdef = 0; 8210 nbNs = 0; 8211 attval = 0; 8212 /* Forget any namespaces added during an earlier parse of this element. */ 8213 ctxt->nsNr = nsNr; 8214 8215 localname = xmlParseQName(ctxt, &prefix); 8216 if (localname == NULL) { 8217 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8218 "StartTag: invalid element name\n"); 8219 return(NULL); 8220 } 8221 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8222 8223 /* 8224 * Now parse the attributes, it ends up with the ending 8225 * 8226 * (S Attribute)* S? 8227 */ 8228 SKIP_BLANKS; 8229 GROW; 8230 if (ctxt->input->base != base) goto base_changed; 8231 8232 while ((RAW != '>') && 8233 ((RAW != '/') || (NXT(1) != '>')) && 8234 (IS_BYTE_CHAR(RAW))) { 8235 const xmlChar *q = CUR_PTR; 8236 unsigned int cons = ctxt->input->consumed; 8237 int len = -1, alloc = 0; 8238 8239 attname = xmlParseAttribute2(ctxt, prefix, localname, 8240 &aprefix, &attvalue, &len, &alloc); 8241 if (ctxt->input->base != base) { 8242 if ((attvalue != NULL) && (alloc != 0)) 8243 xmlFree(attvalue); 8244 attvalue = NULL; 8245 goto base_changed; 8246 } 8247 if ((attname != NULL) && (attvalue != NULL)) { 8248 if (len < 0) len = xmlStrlen(attvalue); 8249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8251 xmlURIPtr uri; 8252 8253 if (*URL != 0) { 8254 uri = xmlParseURI((const char *) URL); 8255 if (uri == NULL) { 8256 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 8257 "xmlns: %s not a valid URI\n", 8258 URL, NULL); 8259 } else { 8260 if (uri->scheme == NULL) { 8261 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 8262 "xmlns: URI %s is not absolute\n", 8263 URL, NULL); 8264 } 8265 xmlFreeURI(uri); 8266 } 8267 } 8268 /* 8269 * check that it's not a defined namespace 8270 */ 8271 for (j = 1;j <= nbNs;j++) 8272 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8273 break; 8274 if (j <= nbNs) 8275 xmlErrAttributeDup(ctxt, NULL, attname); 8276 else 8277 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8278 if (alloc != 0) xmlFree(attvalue); 8279 SKIP_BLANKS; 8280 continue; 8281 } 8282 if (aprefix == ctxt->str_xmlns) { 8283 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8284 xmlURIPtr uri; 8285 8286 if (attname == ctxt->str_xml) { 8287 if (URL != ctxt->str_xml_ns) { 8288 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8289 "xml namespace prefix mapped to wrong URI\n", 8290 NULL, NULL, NULL); 8291 } 8292 /* 8293 * Do not keep a namespace definition node 8294 */ 8295 if (alloc != 0) xmlFree(attvalue); 8296 SKIP_BLANKS; 8297 continue; 8298 } 8299 uri = xmlParseURI((const char *) URL); 8300 if (uri == NULL) { 8301 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 8302 "xmlns:%s: '%s' is not a valid URI\n", 8303 attname, URL); 8304 } else { 8305 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 8306 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 8307 "xmlns:%s: URI %s is not absolute\n", 8308 attname, URL); 8309 } 8310 xmlFreeURI(uri); 8311 } 8312 8313 /* 8314 * check that it's not a defined namespace 8315 */ 8316 for (j = 1;j <= nbNs;j++) 8317 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8318 break; 8319 if (j <= nbNs) 8320 xmlErrAttributeDup(ctxt, aprefix, attname); 8321 else 8322 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 8323 if (alloc != 0) xmlFree(attvalue); 8324 SKIP_BLANKS; 8325 if (ctxt->input->base != base) goto base_changed; 8326 continue; 8327 } 8328 8329 /* 8330 * Add the pair to atts 8331 */ 8332 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8333 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8334 if (attvalue[len] == 0) 8335 xmlFree(attvalue); 8336 goto failed; 8337 } 8338 maxatts = ctxt->maxatts; 8339 atts = ctxt->atts; 8340 } 8341 ctxt->attallocs[nratts++] = alloc; 8342 atts[nbatts++] = attname; 8343 atts[nbatts++] = aprefix; 8344 atts[nbatts++] = NULL; /* the URI will be fetched later */ 8345 atts[nbatts++] = attvalue; 8346 attvalue += len; 8347 atts[nbatts++] = attvalue; 8348 /* 8349 * tag if some deallocation is needed 8350 */ 8351 if (alloc != 0) attval = 1; 8352 } else { 8353 if ((attvalue != NULL) && (attvalue[len] == 0)) 8354 xmlFree(attvalue); 8355 } 8356 8357failed: 8358 8359 GROW 8360 if (ctxt->input->base != base) goto base_changed; 8361 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8362 break; 8363 if (!IS_BLANK_CH(RAW)) { 8364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8365 "attributes construct error\n"); 8366 break; 8367 } 8368 SKIP_BLANKS; 8369 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8370 (attname == NULL) && (attvalue == NULL)) { 8371 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8372 "xmlParseStartTag: problem parsing attributes\n"); 8373 break; 8374 } 8375 GROW; 8376 if (ctxt->input->base != base) goto base_changed; 8377 } 8378 8379 /* 8380 * The attributes defaulting 8381 */ 8382 if (ctxt->attsDefault != NULL) { 8383 xmlDefAttrsPtr defaults; 8384 8385 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 8386 if (defaults != NULL) { 8387 for (i = 0;i < defaults->nbAttrs;i++) { 8388 attname = defaults->values[4 * i]; 8389 aprefix = defaults->values[4 * i + 1]; 8390 8391 /* 8392 * special work for namespaces defaulted defs 8393 */ 8394 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8395 /* 8396 * check that it's not a defined namespace 8397 */ 8398 for (j = 1;j <= nbNs;j++) 8399 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8400 break; 8401 if (j <= nbNs) continue; 8402 8403 nsname = xmlGetNamespace(ctxt, NULL); 8404 if (nsname != defaults->values[4 * i + 2]) { 8405 if (nsPush(ctxt, NULL, 8406 defaults->values[4 * i + 2]) > 0) 8407 nbNs++; 8408 } 8409 } else if (aprefix == ctxt->str_xmlns) { 8410 /* 8411 * check that it's not a defined namespace 8412 */ 8413 for (j = 1;j <= nbNs;j++) 8414 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8415 break; 8416 if (j <= nbNs) continue; 8417 8418 nsname = xmlGetNamespace(ctxt, attname); 8419 if (nsname != defaults->values[2]) { 8420 if (nsPush(ctxt, attname, 8421 defaults->values[4 * i + 2]) > 0) 8422 nbNs++; 8423 } 8424 } else { 8425 /* 8426 * check that it's not a defined attribute 8427 */ 8428 for (j = 0;j < nbatts;j+=5) { 8429 if ((attname == atts[j]) && (aprefix == atts[j+1])) 8430 break; 8431 } 8432 if (j < nbatts) continue; 8433 8434 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8435 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8436 return(NULL); 8437 } 8438 maxatts = ctxt->maxatts; 8439 atts = ctxt->atts; 8440 } 8441 atts[nbatts++] = attname; 8442 atts[nbatts++] = aprefix; 8443 if (aprefix == NULL) 8444 atts[nbatts++] = NULL; 8445 else 8446 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 8447 atts[nbatts++] = defaults->values[4 * i + 2]; 8448 atts[nbatts++] = defaults->values[4 * i + 3]; 8449 nbdef++; 8450 } 8451 } 8452 } 8453 } 8454 8455 /* 8456 * The attributes checkings 8457 */ 8458 for (i = 0; i < nbatts;i += 5) { 8459 /* 8460 * The default namespace does not apply to attribute names. 8461 */ 8462 if (atts[i + 1] != NULL) { 8463 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 8464 if (nsname == NULL) { 8465 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8466 "Namespace prefix %s for %s on %s is not defined\n", 8467 atts[i + 1], atts[i], localname); 8468 } 8469 atts[i + 2] = nsname; 8470 } else 8471 nsname = NULL; 8472 /* 8473 * [ WFC: Unique Att Spec ] 8474 * No attribute name may appear more than once in the same 8475 * start-tag or empty-element tag. 8476 * As extended by the Namespace in XML REC. 8477 */ 8478 for (j = 0; j < i;j += 5) { 8479 if (atts[i] == atts[j]) { 8480 if (atts[i+1] == atts[j+1]) { 8481 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 8482 break; 8483 } 8484 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 8485 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 8486 "Namespaced Attribute %s in '%s' redefined\n", 8487 atts[i], nsname, NULL); 8488 break; 8489 } 8490 } 8491 } 8492 } 8493 8494 nsname = xmlGetNamespace(ctxt, prefix); 8495 if ((prefix != NULL) && (nsname == NULL)) { 8496 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8497 "Namespace prefix %s on %s is not defined\n", 8498 prefix, localname, NULL); 8499 } 8500 *pref = prefix; 8501 *URI = nsname; 8502 8503 /* 8504 * SAX: Start of Element ! 8505 */ 8506 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 8507 (!ctxt->disableSAX)) { 8508 if (nbNs > 0) 8509 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8510 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 8511 nbatts / 5, nbdef, atts); 8512 else 8513 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8514 nsname, 0, NULL, nbatts / 5, nbdef, atts); 8515 } 8516 8517 /* 8518 * Free up attribute allocated strings if needed 8519 */ 8520 if (attval != 0) { 8521 for (i = 3,j = 0; j < nratts;i += 5,j++) 8522 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8523 xmlFree((xmlChar *) atts[i]); 8524 } 8525 8526 return(localname); 8527 8528base_changed: 8529 /* 8530 * the attribute strings are valid iif the base didn't changed 8531 */ 8532 if (attval != 0) { 8533 for (i = 3,j = 0; j < nratts;i += 5,j++) 8534 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8535 xmlFree((xmlChar *) atts[i]); 8536 } 8537 ctxt->input->cur = ctxt->input->base + cur; 8538 ctxt->input->line = oldline; 8539 ctxt->input->col = oldcol; 8540 if (ctxt->wellFormed == 1) { 8541 goto reparse; 8542 } 8543 return(NULL); 8544} 8545 8546/** 8547 * xmlParseEndTag2: 8548 * @ctxt: an XML parser context 8549 * @line: line of the start tag 8550 * @nsNr: number of namespaces on the start tag 8551 * 8552 * parse an end of tag 8553 * 8554 * [42] ETag ::= '</' Name S? '>' 8555 * 8556 * With namespace 8557 * 8558 * [NS 9] ETag ::= '</' QName S? '>' 8559 */ 8560 8561static void 8562xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 8563 const xmlChar *URI, int line, int nsNr, int tlen) { 8564 const xmlChar *name; 8565 8566 GROW; 8567 if ((RAW != '<') || (NXT(1) != '/')) { 8568 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 8569 return; 8570 } 8571 SKIP(2); 8572 8573 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 8574 if (ctxt->input->cur[tlen] == '>') { 8575 ctxt->input->cur += tlen + 1; 8576 goto done; 8577 } 8578 ctxt->input->cur += tlen; 8579 name = (xmlChar*)1; 8580 } else { 8581 if (prefix == NULL) 8582 name = xmlParseNameAndCompare(ctxt, ctxt->name); 8583 else 8584 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 8585 } 8586 8587 /* 8588 * We should definitely be at the ending "S? '>'" part 8589 */ 8590 GROW; 8591 SKIP_BLANKS; 8592 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8593 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8594 } else 8595 NEXT1; 8596 8597 /* 8598 * [ WFC: Element Type Match ] 8599 * The Name in an element's end-tag must match the element type in the 8600 * start-tag. 8601 * 8602 */ 8603 if (name != (xmlChar*)1) { 8604 if (name == NULL) name = BAD_CAST "unparseable"; 8605 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8606 "Opening and ending tag mismatch: %s line %d and %s\n", 8607 ctxt->name, line, name); 8608 } 8609 8610 /* 8611 * SAX: End of Tag 8612 */ 8613done: 8614 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8615 (!ctxt->disableSAX)) 8616 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 8617 8618 spacePop(ctxt); 8619 if (nsNr != 0) 8620 nsPop(ctxt, nsNr); 8621 return; 8622} 8623 8624/** 8625 * xmlParseCDSect: 8626 * @ctxt: an XML parser context 8627 * 8628 * Parse escaped pure raw content. 8629 * 8630 * [18] CDSect ::= CDStart CData CDEnd 8631 * 8632 * [19] CDStart ::= '<![CDATA[' 8633 * 8634 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 8635 * 8636 * [21] CDEnd ::= ']]>' 8637 */ 8638void 8639xmlParseCDSect(xmlParserCtxtPtr ctxt) { 8640 xmlChar *buf = NULL; 8641 int len = 0; 8642 int size = XML_PARSER_BUFFER_SIZE; 8643 int r, rl; 8644 int s, sl; 8645 int cur, l; 8646 int count = 0; 8647 8648 /* Check 2.6.0 was NXT(0) not RAW */ 8649 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8650 SKIP(9); 8651 } else 8652 return; 8653 8654 ctxt->instate = XML_PARSER_CDATA_SECTION; 8655 r = CUR_CHAR(rl); 8656 if (!IS_CHAR(r)) { 8657 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8658 ctxt->instate = XML_PARSER_CONTENT; 8659 return; 8660 } 8661 NEXTL(rl); 8662 s = CUR_CHAR(sl); 8663 if (!IS_CHAR(s)) { 8664 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8665 ctxt->instate = XML_PARSER_CONTENT; 8666 return; 8667 } 8668 NEXTL(sl); 8669 cur = CUR_CHAR(l); 8670 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8671 if (buf == NULL) { 8672 xmlErrMemory(ctxt, NULL); 8673 return; 8674 } 8675 while (IS_CHAR(cur) && 8676 ((r != ']') || (s != ']') || (cur != '>'))) { 8677 if (len + 5 >= size) { 8678 xmlChar *tmp; 8679 8680 size *= 2; 8681 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8682 if (tmp == NULL) { 8683 xmlFree(buf); 8684 xmlErrMemory(ctxt, NULL); 8685 return; 8686 } 8687 buf = tmp; 8688 } 8689 COPY_BUF(rl,buf,len,r); 8690 r = s; 8691 rl = sl; 8692 s = cur; 8693 sl = l; 8694 count++; 8695 if (count > 50) { 8696 GROW; 8697 count = 0; 8698 } 8699 NEXTL(l); 8700 cur = CUR_CHAR(l); 8701 } 8702 buf[len] = 0; 8703 ctxt->instate = XML_PARSER_CONTENT; 8704 if (cur != '>') { 8705 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 8706 "CData section not finished\n%.50s\n", buf); 8707 xmlFree(buf); 8708 return; 8709 } 8710 NEXTL(l); 8711 8712 /* 8713 * OK the buffer is to be consumed as cdata. 8714 */ 8715 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8716 if (ctxt->sax->cdataBlock != NULL) 8717 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 8718 else if (ctxt->sax->characters != NULL) 8719 ctxt->sax->characters(ctxt->userData, buf, len); 8720 } 8721 xmlFree(buf); 8722} 8723 8724/** 8725 * xmlParseContent: 8726 * @ctxt: an XML parser context 8727 * 8728 * Parse a content: 8729 * 8730 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8731 */ 8732 8733void 8734xmlParseContent(xmlParserCtxtPtr ctxt) { 8735 GROW; 8736 while ((RAW != 0) && 8737 ((RAW != '<') || (NXT(1) != '/')) && 8738 (ctxt->instate != XML_PARSER_EOF)) { 8739 const xmlChar *test = CUR_PTR; 8740 unsigned int cons = ctxt->input->consumed; 8741 const xmlChar *cur = ctxt->input->cur; 8742 8743 /* 8744 * First case : a Processing Instruction. 8745 */ 8746 if ((*cur == '<') && (cur[1] == '?')) { 8747 xmlParsePI(ctxt); 8748 } 8749 8750 /* 8751 * Second case : a CDSection 8752 */ 8753 /* 2.6.0 test was *cur not RAW */ 8754 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8755 xmlParseCDSect(ctxt); 8756 } 8757 8758 /* 8759 * Third case : a comment 8760 */ 8761 else if ((*cur == '<') && (NXT(1) == '!') && 8762 (NXT(2) == '-') && (NXT(3) == '-')) { 8763 xmlParseComment(ctxt); 8764 ctxt->instate = XML_PARSER_CONTENT; 8765 } 8766 8767 /* 8768 * Fourth case : a sub-element. 8769 */ 8770 else if (*cur == '<') { 8771 xmlParseElement(ctxt); 8772 } 8773 8774 /* 8775 * Fifth case : a reference. If if has not been resolved, 8776 * parsing returns it's Name, create the node 8777 */ 8778 8779 else if (*cur == '&') { 8780 xmlParseReference(ctxt); 8781 } 8782 8783 /* 8784 * Last case, text. Note that References are handled directly. 8785 */ 8786 else { 8787 xmlParseCharData(ctxt, 0); 8788 } 8789 8790 GROW; 8791 /* 8792 * Pop-up of finished entities. 8793 */ 8794 while ((RAW == 0) && (ctxt->inputNr > 1)) 8795 xmlPopInput(ctxt); 8796 SHRINK; 8797 8798 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8799 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8800 "detected an error in element content\n"); 8801 ctxt->instate = XML_PARSER_EOF; 8802 break; 8803 } 8804 } 8805} 8806 8807/** 8808 * xmlParseElement: 8809 * @ctxt: an XML parser context 8810 * 8811 * parse an XML element, this is highly recursive 8812 * 8813 * [39] element ::= EmptyElemTag | STag content ETag 8814 * 8815 * [ WFC: Element Type Match ] 8816 * The Name in an element's end-tag must match the element type in the 8817 * start-tag. 8818 * 8819 */ 8820 8821void 8822xmlParseElement(xmlParserCtxtPtr ctxt) { 8823 const xmlChar *name; 8824 const xmlChar *prefix; 8825 const xmlChar *URI; 8826 xmlParserNodeInfo node_info; 8827 int line, tlen; 8828 xmlNodePtr ret; 8829 int nsNr = ctxt->nsNr; 8830 8831 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { 8832 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 8833 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 8834 xmlParserMaxDepth); 8835 ctxt->instate = XML_PARSER_EOF; 8836 return; 8837 } 8838 8839 /* Capture start position */ 8840 if (ctxt->record_info) { 8841 node_info.begin_pos = ctxt->input->consumed + 8842 (CUR_PTR - ctxt->input->base); 8843 node_info.begin_line = ctxt->input->line; 8844 } 8845 8846 if (ctxt->spaceNr == 0) 8847 spacePush(ctxt, -1); 8848 else if (*ctxt->space == -2) 8849 spacePush(ctxt, -1); 8850 else 8851 spacePush(ctxt, *ctxt->space); 8852 8853 line = ctxt->input->line; 8854#ifdef LIBXML_SAX1_ENABLED 8855 if (ctxt->sax2) 8856#endif /* LIBXML_SAX1_ENABLED */ 8857 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 8858#ifdef LIBXML_SAX1_ENABLED 8859 else 8860 name = xmlParseStartTag(ctxt); 8861#endif /* LIBXML_SAX1_ENABLED */ 8862 if (name == NULL) { 8863 spacePop(ctxt); 8864 return; 8865 } 8866 namePush(ctxt, name); 8867 ret = ctxt->node; 8868 8869#ifdef LIBXML_VALID_ENABLED 8870 /* 8871 * [ VC: Root Element Type ] 8872 * The Name in the document type declaration must match the element 8873 * type of the root element. 8874 */ 8875 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8876 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8877 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8878#endif /* LIBXML_VALID_ENABLED */ 8879 8880 /* 8881 * Check for an Empty Element. 8882 */ 8883 if ((RAW == '/') && (NXT(1) == '>')) { 8884 SKIP(2); 8885 if (ctxt->sax2) { 8886 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8887 (!ctxt->disableSAX)) 8888 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8889#ifdef LIBXML_SAX1_ENABLED 8890 } else { 8891 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8892 (!ctxt->disableSAX)) 8893 ctxt->sax->endElement(ctxt->userData, name); 8894#endif /* LIBXML_SAX1_ENABLED */ 8895 } 8896 namePop(ctxt); 8897 spacePop(ctxt); 8898 if (nsNr != ctxt->nsNr) 8899 nsPop(ctxt, ctxt->nsNr - nsNr); 8900 if ( ret != NULL && ctxt->record_info ) { 8901 node_info.end_pos = ctxt->input->consumed + 8902 (CUR_PTR - ctxt->input->base); 8903 node_info.end_line = ctxt->input->line; 8904 node_info.node = ret; 8905 xmlParserAddNodeInfo(ctxt, &node_info); 8906 } 8907 return; 8908 } 8909 if (RAW == '>') { 8910 NEXT1; 8911 } else { 8912 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 8913 "Couldn't find end of Start Tag %s line %d\n", 8914 name, line, NULL); 8915 8916 /* 8917 * end of parsing of this node. 8918 */ 8919 nodePop(ctxt); 8920 namePop(ctxt); 8921 spacePop(ctxt); 8922 if (nsNr != ctxt->nsNr) 8923 nsPop(ctxt, ctxt->nsNr - nsNr); 8924 8925 /* 8926 * Capture end position and add node 8927 */ 8928 if ( ret != NULL && ctxt->record_info ) { 8929 node_info.end_pos = ctxt->input->consumed + 8930 (CUR_PTR - ctxt->input->base); 8931 node_info.end_line = ctxt->input->line; 8932 node_info.node = ret; 8933 xmlParserAddNodeInfo(ctxt, &node_info); 8934 } 8935 return; 8936 } 8937 8938 /* 8939 * Parse the content of the element: 8940 */ 8941 xmlParseContent(ctxt); 8942 if (!IS_BYTE_CHAR(RAW)) { 8943 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 8944 "Premature end of data in tag %s line %d\n", 8945 name, line, NULL); 8946 8947 /* 8948 * end of parsing of this node. 8949 */ 8950 nodePop(ctxt); 8951 namePop(ctxt); 8952 spacePop(ctxt); 8953 if (nsNr != ctxt->nsNr) 8954 nsPop(ctxt, ctxt->nsNr - nsNr); 8955 return; 8956 } 8957 8958 /* 8959 * parse the end of tag: '</' should be here. 8960 */ 8961 if (ctxt->sax2) { 8962 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 8963 namePop(ctxt); 8964 } 8965#ifdef LIBXML_SAX1_ENABLED 8966 else 8967 xmlParseEndTag1(ctxt, line); 8968#endif /* LIBXML_SAX1_ENABLED */ 8969 8970 /* 8971 * Capture end position and add node 8972 */ 8973 if ( ret != NULL && ctxt->record_info ) { 8974 node_info.end_pos = ctxt->input->consumed + 8975 (CUR_PTR - ctxt->input->base); 8976 node_info.end_line = ctxt->input->line; 8977 node_info.node = ret; 8978 xmlParserAddNodeInfo(ctxt, &node_info); 8979 } 8980} 8981 8982/** 8983 * xmlParseVersionNum: 8984 * @ctxt: an XML parser context 8985 * 8986 * parse the XML version value. 8987 * 8988 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 8989 * 8990 * Returns the string giving the XML version number, or NULL 8991 */ 8992xmlChar * 8993xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 8994 xmlChar *buf = NULL; 8995 int len = 0; 8996 int size = 10; 8997 xmlChar cur; 8998 8999 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9000 if (buf == NULL) { 9001 xmlErrMemory(ctxt, NULL); 9002 return(NULL); 9003 } 9004 cur = CUR; 9005 while (((cur >= 'a') && (cur <= 'z')) || 9006 ((cur >= 'A') && (cur <= 'Z')) || 9007 ((cur >= '0') && (cur <= '9')) || 9008 (cur == '_') || (cur == '.') || 9009 (cur == ':') || (cur == '-')) { 9010 if (len + 1 >= size) { 9011 xmlChar *tmp; 9012 9013 size *= 2; 9014 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9015 if (tmp == NULL) { 9016 xmlFree(buf); 9017 xmlErrMemory(ctxt, NULL); 9018 return(NULL); 9019 } 9020 buf = tmp; 9021 } 9022 buf[len++] = cur; 9023 NEXT; 9024 cur=CUR; 9025 } 9026 buf[len] = 0; 9027 return(buf); 9028} 9029 9030/** 9031 * xmlParseVersionInfo: 9032 * @ctxt: an XML parser context 9033 * 9034 * parse the XML version. 9035 * 9036 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9037 * 9038 * [25] Eq ::= S? '=' S? 9039 * 9040 * Returns the version string, e.g. "1.0" 9041 */ 9042 9043xmlChar * 9044xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9045 xmlChar *version = NULL; 9046 9047 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9048 SKIP(7); 9049 SKIP_BLANKS; 9050 if (RAW != '=') { 9051 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9052 return(NULL); 9053 } 9054 NEXT; 9055 SKIP_BLANKS; 9056 if (RAW == '"') { 9057 NEXT; 9058 version = xmlParseVersionNum(ctxt); 9059 if (RAW != '"') { 9060 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9061 } else 9062 NEXT; 9063 } else if (RAW == '\''){ 9064 NEXT; 9065 version = xmlParseVersionNum(ctxt); 9066 if (RAW != '\'') { 9067 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9068 } else 9069 NEXT; 9070 } else { 9071 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9072 } 9073 } 9074 return(version); 9075} 9076 9077/** 9078 * xmlParseEncName: 9079 * @ctxt: an XML parser context 9080 * 9081 * parse the XML encoding name 9082 * 9083 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9084 * 9085 * Returns the encoding name value or NULL 9086 */ 9087xmlChar * 9088xmlParseEncName(xmlParserCtxtPtr ctxt) { 9089 xmlChar *buf = NULL; 9090 int len = 0; 9091 int size = 10; 9092 xmlChar cur; 9093 9094 cur = CUR; 9095 if (((cur >= 'a') && (cur <= 'z')) || 9096 ((cur >= 'A') && (cur <= 'Z'))) { 9097 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9098 if (buf == NULL) { 9099 xmlErrMemory(ctxt, NULL); 9100 return(NULL); 9101 } 9102 9103 buf[len++] = cur; 9104 NEXT; 9105 cur = CUR; 9106 while (((cur >= 'a') && (cur <= 'z')) || 9107 ((cur >= 'A') && (cur <= 'Z')) || 9108 ((cur >= '0') && (cur <= '9')) || 9109 (cur == '.') || (cur == '_') || 9110 (cur == '-')) { 9111 if (len + 1 >= size) { 9112 xmlChar *tmp; 9113 9114 size *= 2; 9115 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9116 if (tmp == NULL) { 9117 xmlErrMemory(ctxt, NULL); 9118 xmlFree(buf); 9119 return(NULL); 9120 } 9121 buf = tmp; 9122 } 9123 buf[len++] = cur; 9124 NEXT; 9125 cur = CUR; 9126 if (cur == 0) { 9127 SHRINK; 9128 GROW; 9129 cur = CUR; 9130 } 9131 } 9132 buf[len] = 0; 9133 } else { 9134 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9135 } 9136 return(buf); 9137} 9138 9139/** 9140 * xmlParseEncodingDecl: 9141 * @ctxt: an XML parser context 9142 * 9143 * parse the XML encoding declaration 9144 * 9145 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9146 * 9147 * this setups the conversion filters. 9148 * 9149 * Returns the encoding value or NULL 9150 */ 9151 9152const xmlChar * 9153xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9154 xmlChar *encoding = NULL; 9155 9156 SKIP_BLANKS; 9157 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9158 SKIP(8); 9159 SKIP_BLANKS; 9160 if (RAW != '=') { 9161 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9162 return(NULL); 9163 } 9164 NEXT; 9165 SKIP_BLANKS; 9166 if (RAW == '"') { 9167 NEXT; 9168 encoding = xmlParseEncName(ctxt); 9169 if (RAW != '"') { 9170 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9171 } else 9172 NEXT; 9173 } else if (RAW == '\''){ 9174 NEXT; 9175 encoding = xmlParseEncName(ctxt); 9176 if (RAW != '\'') { 9177 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9178 } else 9179 NEXT; 9180 } else { 9181 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9182 } 9183 /* 9184 * UTF-16 encoding stwich has already taken place at this stage, 9185 * more over the little-endian/big-endian selection is already done 9186 */ 9187 if ((encoding != NULL) && 9188 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9189 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9190 if (ctxt->encoding != NULL) 9191 xmlFree((xmlChar *) ctxt->encoding); 9192 ctxt->encoding = encoding; 9193 } 9194 /* 9195 * UTF-8 encoding is handled natively 9196 */ 9197 else if ((encoding != NULL) && 9198 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9199 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9200 if (ctxt->encoding != NULL) 9201 xmlFree((xmlChar *) ctxt->encoding); 9202 ctxt->encoding = encoding; 9203 } 9204 else if (encoding != NULL) { 9205 xmlCharEncodingHandlerPtr handler; 9206 9207 if (ctxt->input->encoding != NULL) 9208 xmlFree((xmlChar *) ctxt->input->encoding); 9209 ctxt->input->encoding = encoding; 9210 9211 handler = xmlFindCharEncodingHandler((const char *) encoding); 9212 if (handler != NULL) { 9213 xmlSwitchToEncoding(ctxt, handler); 9214 } else { 9215 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9216 "Unsupported encoding %s\n", encoding); 9217 return(NULL); 9218 } 9219 } 9220 } 9221 return(encoding); 9222} 9223 9224/** 9225 * xmlParseSDDecl: 9226 * @ctxt: an XML parser context 9227 * 9228 * parse the XML standalone declaration 9229 * 9230 * [32] SDDecl ::= S 'standalone' Eq 9231 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 9232 * 9233 * [ VC: Standalone Document Declaration ] 9234 * TODO The standalone document declaration must have the value "no" 9235 * if any external markup declarations contain declarations of: 9236 * - attributes with default values, if elements to which these 9237 * attributes apply appear in the document without specifications 9238 * of values for these attributes, or 9239 * - entities (other than amp, lt, gt, apos, quot), if references 9240 * to those entities appear in the document, or 9241 * - attributes with values subject to normalization, where the 9242 * attribute appears in the document with a value which will change 9243 * as a result of normalization, or 9244 * - element types with element content, if white space occurs directly 9245 * within any instance of those types. 9246 * 9247 * Returns: 9248 * 1 if standalone="yes" 9249 * 0 if standalone="no" 9250 * -2 if standalone attribute is missing or invalid 9251 * (A standalone value of -2 means that the XML declaration was found, 9252 * but no value was specified for the standalone attribute). 9253 */ 9254 9255int 9256xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 9257 int standalone = -2; 9258 9259 SKIP_BLANKS; 9260 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 9261 SKIP(10); 9262 SKIP_BLANKS; 9263 if (RAW != '=') { 9264 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9265 return(standalone); 9266 } 9267 NEXT; 9268 SKIP_BLANKS; 9269 if (RAW == '\''){ 9270 NEXT; 9271 if ((RAW == 'n') && (NXT(1) == 'o')) { 9272 standalone = 0; 9273 SKIP(2); 9274 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9275 (NXT(2) == 's')) { 9276 standalone = 1; 9277 SKIP(3); 9278 } else { 9279 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9280 } 9281 if (RAW != '\'') { 9282 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9283 } else 9284 NEXT; 9285 } else if (RAW == '"'){ 9286 NEXT; 9287 if ((RAW == 'n') && (NXT(1) == 'o')) { 9288 standalone = 0; 9289 SKIP(2); 9290 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9291 (NXT(2) == 's')) { 9292 standalone = 1; 9293 SKIP(3); 9294 } else { 9295 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9296 } 9297 if (RAW != '"') { 9298 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9299 } else 9300 NEXT; 9301 } else { 9302 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9303 } 9304 } 9305 return(standalone); 9306} 9307 9308/** 9309 * xmlParseXMLDecl: 9310 * @ctxt: an XML parser context 9311 * 9312 * parse an XML declaration header 9313 * 9314 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 9315 */ 9316 9317void 9318xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 9319 xmlChar *version; 9320 9321 /* 9322 * This value for standalone indicates that the document has an 9323 * XML declaration but it does not have a standalone attribute. 9324 * It will be overwritten later if a standalone attribute is found. 9325 */ 9326 ctxt->input->standalone = -2; 9327 9328 /* 9329 * We know that '<?xml' is here. 9330 */ 9331 SKIP(5); 9332 9333 if (!IS_BLANK_CH(RAW)) { 9334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9335 "Blank needed after '<?xml'\n"); 9336 } 9337 SKIP_BLANKS; 9338 9339 /* 9340 * We must have the VersionInfo here. 9341 */ 9342 version = xmlParseVersionInfo(ctxt); 9343 if (version == NULL) { 9344 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 9345 } else { 9346 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 9347 /* 9348 * TODO: Blueberry should be detected here 9349 */ 9350 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 9351 "Unsupported version '%s'\n", 9352 version, NULL); 9353 } 9354 if (ctxt->version != NULL) 9355 xmlFree((void *) ctxt->version); 9356 ctxt->version = version; 9357 } 9358 9359 /* 9360 * We may have the encoding declaration 9361 */ 9362 if (!IS_BLANK_CH(RAW)) { 9363 if ((RAW == '?') && (NXT(1) == '>')) { 9364 SKIP(2); 9365 return; 9366 } 9367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9368 } 9369 xmlParseEncodingDecl(ctxt); 9370 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9371 /* 9372 * The XML REC instructs us to stop parsing right here 9373 */ 9374 return; 9375 } 9376 9377 /* 9378 * We may have the standalone status. 9379 */ 9380 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 9381 if ((RAW == '?') && (NXT(1) == '>')) { 9382 SKIP(2); 9383 return; 9384 } 9385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9386 } 9387 SKIP_BLANKS; 9388 ctxt->input->standalone = xmlParseSDDecl(ctxt); 9389 9390 SKIP_BLANKS; 9391 if ((RAW == '?') && (NXT(1) == '>')) { 9392 SKIP(2); 9393 } else if (RAW == '>') { 9394 /* Deprecated old WD ... */ 9395 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9396 NEXT; 9397 } else { 9398 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9399 MOVETO_ENDTAG(CUR_PTR); 9400 NEXT; 9401 } 9402} 9403 9404/** 9405 * xmlParseMisc: 9406 * @ctxt: an XML parser context 9407 * 9408 * parse an XML Misc* optional field. 9409 * 9410 * [27] Misc ::= Comment | PI | S 9411 */ 9412 9413void 9414xmlParseMisc(xmlParserCtxtPtr ctxt) { 9415 while (((RAW == '<') && (NXT(1) == '?')) || 9416 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 9417 IS_BLANK_CH(CUR)) { 9418 if ((RAW == '<') && (NXT(1) == '?')) { 9419 xmlParsePI(ctxt); 9420 } else if (IS_BLANK_CH(CUR)) { 9421 NEXT; 9422 } else 9423 xmlParseComment(ctxt); 9424 } 9425} 9426 9427/** 9428 * xmlParseDocument: 9429 * @ctxt: an XML parser context 9430 * 9431 * parse an XML document (and build a tree if using the standard SAX 9432 * interface). 9433 * 9434 * [1] document ::= prolog element Misc* 9435 * 9436 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 9437 * 9438 * Returns 0, -1 in case of error. the parser context is augmented 9439 * as a result of the parsing. 9440 */ 9441 9442int 9443xmlParseDocument(xmlParserCtxtPtr ctxt) { 9444 xmlChar start[4]; 9445 xmlCharEncoding enc; 9446 9447 xmlInitParser(); 9448 9449 if ((ctxt == NULL) || (ctxt->input == NULL)) 9450 return(-1); 9451 9452 GROW; 9453 9454 /* 9455 * SAX: detecting the level. 9456 */ 9457 xmlDetectSAX2(ctxt); 9458 9459 /* 9460 * SAX: beginning of the document processing. 9461 */ 9462 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9463 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9464 9465 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 9466 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 9467 /* 9468 * Get the 4 first bytes and decode the charset 9469 * if enc != XML_CHAR_ENCODING_NONE 9470 * plug some encoding conversion routines. 9471 */ 9472 start[0] = RAW; 9473 start[1] = NXT(1); 9474 start[2] = NXT(2); 9475 start[3] = NXT(3); 9476 enc = xmlDetectCharEncoding(&start[0], 4); 9477 if (enc != XML_CHAR_ENCODING_NONE) { 9478 xmlSwitchEncoding(ctxt, enc); 9479 } 9480 } 9481 9482 9483 if (CUR == 0) { 9484 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9485 } 9486 9487 /* 9488 * Check for the XMLDecl in the Prolog. 9489 */ 9490 GROW; 9491 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9492 9493 /* 9494 * Note that we will switch encoding on the fly. 9495 */ 9496 xmlParseXMLDecl(ctxt); 9497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9498 /* 9499 * The XML REC instructs us to stop parsing right here 9500 */ 9501 return(-1); 9502 } 9503 ctxt->standalone = ctxt->input->standalone; 9504 SKIP_BLANKS; 9505 } else { 9506 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9507 } 9508 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9509 ctxt->sax->startDocument(ctxt->userData); 9510 9511 /* 9512 * The Misc part of the Prolog 9513 */ 9514 GROW; 9515 xmlParseMisc(ctxt); 9516 9517 /* 9518 * Then possibly doc type declaration(s) and more Misc 9519 * (doctypedecl Misc*)? 9520 */ 9521 GROW; 9522 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 9523 9524 ctxt->inSubset = 1; 9525 xmlParseDocTypeDecl(ctxt); 9526 if (RAW == '[') { 9527 ctxt->instate = XML_PARSER_DTD; 9528 xmlParseInternalSubset(ctxt); 9529 } 9530 9531 /* 9532 * Create and update the external subset. 9533 */ 9534 ctxt->inSubset = 2; 9535 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 9536 (!ctxt->disableSAX)) 9537 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9538 ctxt->extSubSystem, ctxt->extSubURI); 9539 ctxt->inSubset = 0; 9540 9541 xmlCleanSpecialAttr(ctxt); 9542 9543 ctxt->instate = XML_PARSER_PROLOG; 9544 xmlParseMisc(ctxt); 9545 } 9546 9547 /* 9548 * Time to start parsing the tree itself 9549 */ 9550 GROW; 9551 if (RAW != '<') { 9552 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 9553 "Start tag expected, '<' not found\n"); 9554 } else { 9555 ctxt->instate = XML_PARSER_CONTENT; 9556 xmlParseElement(ctxt); 9557 ctxt->instate = XML_PARSER_EPILOG; 9558 9559 9560 /* 9561 * The Misc part at the end 9562 */ 9563 xmlParseMisc(ctxt); 9564 9565 if (RAW != 0) { 9566 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9567 } 9568 ctxt->instate = XML_PARSER_EOF; 9569 } 9570 9571 /* 9572 * SAX: end of the document processing. 9573 */ 9574 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9575 ctxt->sax->endDocument(ctxt->userData); 9576 9577 /* 9578 * Remove locally kept entity definitions if the tree was not built 9579 */ 9580 if ((ctxt->myDoc != NULL) && 9581 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 9582 xmlFreeDoc(ctxt->myDoc); 9583 ctxt->myDoc = NULL; 9584 } 9585 9586 if (! ctxt->wellFormed) { 9587 ctxt->valid = 0; 9588 return(-1); 9589 } 9590 return(0); 9591} 9592 9593/** 9594 * xmlParseExtParsedEnt: 9595 * @ctxt: an XML parser context 9596 * 9597 * parse a general parsed entity 9598 * An external general parsed entity is well-formed if it matches the 9599 * production labeled extParsedEnt. 9600 * 9601 * [78] extParsedEnt ::= TextDecl? content 9602 * 9603 * Returns 0, -1 in case of error. the parser context is augmented 9604 * as a result of the parsing. 9605 */ 9606 9607int 9608xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 9609 xmlChar start[4]; 9610 xmlCharEncoding enc; 9611 9612 if ((ctxt == NULL) || (ctxt->input == NULL)) 9613 return(-1); 9614 9615 xmlDefaultSAXHandlerInit(); 9616 9617 xmlDetectSAX2(ctxt); 9618 9619 GROW; 9620 9621 /* 9622 * SAX: beginning of the document processing. 9623 */ 9624 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9625 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9626 9627 /* 9628 * Get the 4 first bytes and decode the charset 9629 * if enc != XML_CHAR_ENCODING_NONE 9630 * plug some encoding conversion routines. 9631 */ 9632 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 9633 start[0] = RAW; 9634 start[1] = NXT(1); 9635 start[2] = NXT(2); 9636 start[3] = NXT(3); 9637 enc = xmlDetectCharEncoding(start, 4); 9638 if (enc != XML_CHAR_ENCODING_NONE) { 9639 xmlSwitchEncoding(ctxt, enc); 9640 } 9641 } 9642 9643 9644 if (CUR == 0) { 9645 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9646 } 9647 9648 /* 9649 * Check for the XMLDecl in the Prolog. 9650 */ 9651 GROW; 9652 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9653 9654 /* 9655 * Note that we will switch encoding on the fly. 9656 */ 9657 xmlParseXMLDecl(ctxt); 9658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9659 /* 9660 * The XML REC instructs us to stop parsing right here 9661 */ 9662 return(-1); 9663 } 9664 SKIP_BLANKS; 9665 } else { 9666 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9667 } 9668 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9669 ctxt->sax->startDocument(ctxt->userData); 9670 9671 /* 9672 * Doing validity checking on chunk doesn't make sense 9673 */ 9674 ctxt->instate = XML_PARSER_CONTENT; 9675 ctxt->validate = 0; 9676 ctxt->loadsubset = 0; 9677 ctxt->depth = 0; 9678 9679 xmlParseContent(ctxt); 9680 9681 if ((RAW == '<') && (NXT(1) == '/')) { 9682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 9683 } else if (RAW != 0) { 9684 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 9685 } 9686 9687 /* 9688 * SAX: end of the document processing. 9689 */ 9690 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9691 ctxt->sax->endDocument(ctxt->userData); 9692 9693 if (! ctxt->wellFormed) return(-1); 9694 return(0); 9695} 9696 9697#ifdef LIBXML_PUSH_ENABLED 9698/************************************************************************ 9699 * * 9700 * Progressive parsing interfaces * 9701 * * 9702 ************************************************************************/ 9703 9704/** 9705 * xmlParseLookupSequence: 9706 * @ctxt: an XML parser context 9707 * @first: the first char to lookup 9708 * @next: the next char to lookup or zero 9709 * @third: the next char to lookup or zero 9710 * 9711 * Try to find if a sequence (first, next, third) or just (first next) or 9712 * (first) is available in the input stream. 9713 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 9714 * to avoid rescanning sequences of bytes, it DOES change the state of the 9715 * parser, do not use liberally. 9716 * 9717 * Returns the index to the current parsing point if the full sequence 9718 * is available, -1 otherwise. 9719 */ 9720static int 9721xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 9722 xmlChar next, xmlChar third) { 9723 int base, len; 9724 xmlParserInputPtr in; 9725 const xmlChar *buf; 9726 9727 in = ctxt->input; 9728 if (in == NULL) return(-1); 9729 base = in->cur - in->base; 9730 if (base < 0) return(-1); 9731 if (ctxt->checkIndex > base) 9732 base = ctxt->checkIndex; 9733 if (in->buf == NULL) { 9734 buf = in->base; 9735 len = in->length; 9736 } else { 9737 buf = in->buf->buffer->content; 9738 len = in->buf->buffer->use; 9739 } 9740 /* take into account the sequence length */ 9741 if (third) len -= 2; 9742 else if (next) len --; 9743 for (;base < len;base++) { 9744 if (buf[base] == first) { 9745 if (third != 0) { 9746 if ((buf[base + 1] != next) || 9747 (buf[base + 2] != third)) continue; 9748 } else if (next != 0) { 9749 if (buf[base + 1] != next) continue; 9750 } 9751 ctxt->checkIndex = 0; 9752#ifdef DEBUG_PUSH 9753 if (next == 0) 9754 xmlGenericError(xmlGenericErrorContext, 9755 "PP: lookup '%c' found at %d\n", 9756 first, base); 9757 else if (third == 0) 9758 xmlGenericError(xmlGenericErrorContext, 9759 "PP: lookup '%c%c' found at %d\n", 9760 first, next, base); 9761 else 9762 xmlGenericError(xmlGenericErrorContext, 9763 "PP: lookup '%c%c%c' found at %d\n", 9764 first, next, third, base); 9765#endif 9766 return(base - (in->cur - in->base)); 9767 } 9768 } 9769 ctxt->checkIndex = base; 9770#ifdef DEBUG_PUSH 9771 if (next == 0) 9772 xmlGenericError(xmlGenericErrorContext, 9773 "PP: lookup '%c' failed\n", first); 9774 else if (third == 0) 9775 xmlGenericError(xmlGenericErrorContext, 9776 "PP: lookup '%c%c' failed\n", first, next); 9777 else 9778 xmlGenericError(xmlGenericErrorContext, 9779 "PP: lookup '%c%c%c' failed\n", first, next, third); 9780#endif 9781 return(-1); 9782} 9783 9784/** 9785 * xmlParseGetLasts: 9786 * @ctxt: an XML parser context 9787 * @lastlt: pointer to store the last '<' from the input 9788 * @lastgt: pointer to store the last '>' from the input 9789 * 9790 * Lookup the last < and > in the current chunk 9791 */ 9792static void 9793xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 9794 const xmlChar **lastgt) { 9795 const xmlChar *tmp; 9796 9797 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 9798 xmlGenericError(xmlGenericErrorContext, 9799 "Internal error: xmlParseGetLasts\n"); 9800 return; 9801 } 9802 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 9803 tmp = ctxt->input->end; 9804 tmp--; 9805 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 9806 if (tmp < ctxt->input->base) { 9807 *lastlt = NULL; 9808 *lastgt = NULL; 9809 } else { 9810 *lastlt = tmp; 9811 tmp++; 9812 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 9813 if (*tmp == '\'') { 9814 tmp++; 9815 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 9816 if (tmp < ctxt->input->end) tmp++; 9817 } else if (*tmp == '"') { 9818 tmp++; 9819 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 9820 if (tmp < ctxt->input->end) tmp++; 9821 } else 9822 tmp++; 9823 } 9824 if (tmp < ctxt->input->end) 9825 *lastgt = tmp; 9826 else { 9827 tmp = *lastlt; 9828 tmp--; 9829 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 9830 if (tmp >= ctxt->input->base) 9831 *lastgt = tmp; 9832 else 9833 *lastgt = NULL; 9834 } 9835 } 9836 } else { 9837 *lastlt = NULL; 9838 *lastgt = NULL; 9839 } 9840} 9841/** 9842 * xmlCheckCdataPush: 9843 * @cur: pointer to the bock of characters 9844 * @len: length of the block in bytes 9845 * 9846 * Check that the block of characters is okay as SCdata content [20] 9847 * 9848 * Returns the number of bytes to pass if okay, a negative index where an 9849 * UTF-8 error occured otherwise 9850 */ 9851static int 9852xmlCheckCdataPush(const xmlChar *utf, int len) { 9853 int ix; 9854 unsigned char c; 9855 int codepoint; 9856 9857 if ((utf == NULL) || (len <= 0)) 9858 return(0); 9859 9860 for (ix = 0; ix < len;) { /* string is 0-terminated */ 9861 c = utf[ix]; 9862 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 9863 if (c >= 0x20) 9864 ix++; 9865 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 9866 ix++; 9867 else 9868 return(-ix); 9869 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 9870 if (ix + 2 > len) return(ix); 9871 if ((utf[ix+1] & 0xc0 ) != 0x80) 9872 return(-ix); 9873 codepoint = (utf[ix] & 0x1f) << 6; 9874 codepoint |= utf[ix+1] & 0x3f; 9875 if (!xmlIsCharQ(codepoint)) 9876 return(-ix); 9877 ix += 2; 9878 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 9879 if (ix + 3 > len) return(ix); 9880 if (((utf[ix+1] & 0xc0) != 0x80) || 9881 ((utf[ix+2] & 0xc0) != 0x80)) 9882 return(-ix); 9883 codepoint = (utf[ix] & 0xf) << 12; 9884 codepoint |= (utf[ix+1] & 0x3f) << 6; 9885 codepoint |= utf[ix+2] & 0x3f; 9886 if (!xmlIsCharQ(codepoint)) 9887 return(-ix); 9888 ix += 3; 9889 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 9890 if (ix + 4 > len) return(ix); 9891 if (((utf[ix+1] & 0xc0) != 0x80) || 9892 ((utf[ix+2] & 0xc0) != 0x80) || 9893 ((utf[ix+3] & 0xc0) != 0x80)) 9894 return(-ix); 9895 codepoint = (utf[ix] & 0x7) << 18; 9896 codepoint |= (utf[ix+1] & 0x3f) << 12; 9897 codepoint |= (utf[ix+2] & 0x3f) << 6; 9898 codepoint |= utf[ix+3] & 0x3f; 9899 if (!xmlIsCharQ(codepoint)) 9900 return(-ix); 9901 ix += 4; 9902 } else /* unknown encoding */ 9903 return(-ix); 9904 } 9905 return(ix); 9906} 9907 9908/** 9909 * xmlParseTryOrFinish: 9910 * @ctxt: an XML parser context 9911 * @terminate: last chunk indicator 9912 * 9913 * Try to progress on parsing 9914 * 9915 * Returns zero if no parsing was possible 9916 */ 9917static int 9918xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 9919 int ret = 0; 9920 int avail, tlen; 9921 xmlChar cur, next; 9922 const xmlChar *lastlt, *lastgt; 9923 9924 if (ctxt->input == NULL) 9925 return(0); 9926 9927#ifdef DEBUG_PUSH 9928 switch (ctxt->instate) { 9929 case XML_PARSER_EOF: 9930 xmlGenericError(xmlGenericErrorContext, 9931 "PP: try EOF\n"); break; 9932 case XML_PARSER_START: 9933 xmlGenericError(xmlGenericErrorContext, 9934 "PP: try START\n"); break; 9935 case XML_PARSER_MISC: 9936 xmlGenericError(xmlGenericErrorContext, 9937 "PP: try MISC\n");break; 9938 case XML_PARSER_COMMENT: 9939 xmlGenericError(xmlGenericErrorContext, 9940 "PP: try COMMENT\n");break; 9941 case XML_PARSER_PROLOG: 9942 xmlGenericError(xmlGenericErrorContext, 9943 "PP: try PROLOG\n");break; 9944 case XML_PARSER_START_TAG: 9945 xmlGenericError(xmlGenericErrorContext, 9946 "PP: try START_TAG\n");break; 9947 case XML_PARSER_CONTENT: 9948 xmlGenericError(xmlGenericErrorContext, 9949 "PP: try CONTENT\n");break; 9950 case XML_PARSER_CDATA_SECTION: 9951 xmlGenericError(xmlGenericErrorContext, 9952 "PP: try CDATA_SECTION\n");break; 9953 case XML_PARSER_END_TAG: 9954 xmlGenericError(xmlGenericErrorContext, 9955 "PP: try END_TAG\n");break; 9956 case XML_PARSER_ENTITY_DECL: 9957 xmlGenericError(xmlGenericErrorContext, 9958 "PP: try ENTITY_DECL\n");break; 9959 case XML_PARSER_ENTITY_VALUE: 9960 xmlGenericError(xmlGenericErrorContext, 9961 "PP: try ENTITY_VALUE\n");break; 9962 case XML_PARSER_ATTRIBUTE_VALUE: 9963 xmlGenericError(xmlGenericErrorContext, 9964 "PP: try ATTRIBUTE_VALUE\n");break; 9965 case XML_PARSER_DTD: 9966 xmlGenericError(xmlGenericErrorContext, 9967 "PP: try DTD\n");break; 9968 case XML_PARSER_EPILOG: 9969 xmlGenericError(xmlGenericErrorContext, 9970 "PP: try EPILOG\n");break; 9971 case XML_PARSER_PI: 9972 xmlGenericError(xmlGenericErrorContext, 9973 "PP: try PI\n");break; 9974 case XML_PARSER_IGNORE: 9975 xmlGenericError(xmlGenericErrorContext, 9976 "PP: try IGNORE\n");break; 9977 } 9978#endif 9979 9980 if ((ctxt->input != NULL) && 9981 (ctxt->input->cur - ctxt->input->base > 4096)) { 9982 xmlSHRINK(ctxt); 9983 ctxt->checkIndex = 0; 9984 } 9985 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9986 9987 while (1) { 9988 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9989 return(0); 9990 9991 9992 /* 9993 * Pop-up of finished entities. 9994 */ 9995 while ((RAW == 0) && (ctxt->inputNr > 1)) 9996 xmlPopInput(ctxt); 9997 9998 if (ctxt->input == NULL) break; 9999 if (ctxt->input->buf == NULL) 10000 avail = ctxt->input->length - 10001 (ctxt->input->cur - ctxt->input->base); 10002 else { 10003 /* 10004 * If we are operating on converted input, try to flush 10005 * remainng chars to avoid them stalling in the non-converted 10006 * buffer. 10007 */ 10008 if ((ctxt->input->buf->raw != NULL) && 10009 (ctxt->input->buf->raw->use > 0)) { 10010 int base = ctxt->input->base - 10011 ctxt->input->buf->buffer->content; 10012 int current = ctxt->input->cur - ctxt->input->base; 10013 10014 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10015 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10016 ctxt->input->cur = ctxt->input->base + current; 10017 ctxt->input->end = 10018 &ctxt->input->buf->buffer->content[ 10019 ctxt->input->buf->buffer->use]; 10020 } 10021 avail = ctxt->input->buf->buffer->use - 10022 (ctxt->input->cur - ctxt->input->base); 10023 } 10024 if (avail < 1) 10025 goto done; 10026 switch (ctxt->instate) { 10027 case XML_PARSER_EOF: 10028 /* 10029 * Document parsing is done ! 10030 */ 10031 goto done; 10032 case XML_PARSER_START: 10033 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10034 xmlChar start[4]; 10035 xmlCharEncoding enc; 10036 10037 /* 10038 * Very first chars read from the document flow. 10039 */ 10040 if (avail < 4) 10041 goto done; 10042 10043 /* 10044 * Get the 4 first bytes and decode the charset 10045 * if enc != XML_CHAR_ENCODING_NONE 10046 * plug some encoding conversion routines, 10047 * else xmlSwitchEncoding will set to (default) 10048 * UTF8. 10049 */ 10050 start[0] = RAW; 10051 start[1] = NXT(1); 10052 start[2] = NXT(2); 10053 start[3] = NXT(3); 10054 enc = xmlDetectCharEncoding(start, 4); 10055 xmlSwitchEncoding(ctxt, enc); 10056 break; 10057 } 10058 10059 if (avail < 2) 10060 goto done; 10061 cur = ctxt->input->cur[0]; 10062 next = ctxt->input->cur[1]; 10063 if (cur == 0) { 10064 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10065 ctxt->sax->setDocumentLocator(ctxt->userData, 10066 &xmlDefaultSAXLocator); 10067 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10068 ctxt->instate = XML_PARSER_EOF; 10069#ifdef DEBUG_PUSH 10070 xmlGenericError(xmlGenericErrorContext, 10071 "PP: entering EOF\n"); 10072#endif 10073 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10074 ctxt->sax->endDocument(ctxt->userData); 10075 goto done; 10076 } 10077 if ((cur == '<') && (next == '?')) { 10078 /* PI or XML decl */ 10079 if (avail < 5) return(ret); 10080 if ((!terminate) && 10081 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10082 return(ret); 10083 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10084 ctxt->sax->setDocumentLocator(ctxt->userData, 10085 &xmlDefaultSAXLocator); 10086 if ((ctxt->input->cur[2] == 'x') && 10087 (ctxt->input->cur[3] == 'm') && 10088 (ctxt->input->cur[4] == 'l') && 10089 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10090 ret += 5; 10091#ifdef DEBUG_PUSH 10092 xmlGenericError(xmlGenericErrorContext, 10093 "PP: Parsing XML Decl\n"); 10094#endif 10095 xmlParseXMLDecl(ctxt); 10096 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10097 /* 10098 * The XML REC instructs us to stop parsing right 10099 * here 10100 */ 10101 ctxt->instate = XML_PARSER_EOF; 10102 return(0); 10103 } 10104 ctxt->standalone = ctxt->input->standalone; 10105 if ((ctxt->encoding == NULL) && 10106 (ctxt->input->encoding != NULL)) 10107 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10108 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10109 (!ctxt->disableSAX)) 10110 ctxt->sax->startDocument(ctxt->userData); 10111 ctxt->instate = XML_PARSER_MISC; 10112#ifdef DEBUG_PUSH 10113 xmlGenericError(xmlGenericErrorContext, 10114 "PP: entering MISC\n"); 10115#endif 10116 } else { 10117 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10118 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10119 (!ctxt->disableSAX)) 10120 ctxt->sax->startDocument(ctxt->userData); 10121 ctxt->instate = XML_PARSER_MISC; 10122#ifdef DEBUG_PUSH 10123 xmlGenericError(xmlGenericErrorContext, 10124 "PP: entering MISC\n"); 10125#endif 10126 } 10127 } else { 10128 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10129 ctxt->sax->setDocumentLocator(ctxt->userData, 10130 &xmlDefaultSAXLocator); 10131 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10132 if (ctxt->version == NULL) { 10133 xmlErrMemory(ctxt, NULL); 10134 break; 10135 } 10136 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10137 (!ctxt->disableSAX)) 10138 ctxt->sax->startDocument(ctxt->userData); 10139 ctxt->instate = XML_PARSER_MISC; 10140#ifdef DEBUG_PUSH 10141 xmlGenericError(xmlGenericErrorContext, 10142 "PP: entering MISC\n"); 10143#endif 10144 } 10145 break; 10146 case XML_PARSER_START_TAG: { 10147 const xmlChar *name; 10148 const xmlChar *prefix; 10149 const xmlChar *URI; 10150 int nsNr = ctxt->nsNr; 10151 10152 if ((avail < 2) && (ctxt->inputNr == 1)) 10153 goto done; 10154 cur = ctxt->input->cur[0]; 10155 if (cur != '<') { 10156 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10157 ctxt->instate = XML_PARSER_EOF; 10158 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10159 ctxt->sax->endDocument(ctxt->userData); 10160 goto done; 10161 } 10162 if (!terminate) { 10163 if (ctxt->progressive) { 10164 /* > can be found unescaped in attribute values */ 10165 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10166 goto done; 10167 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10168 goto done; 10169 } 10170 } 10171 if (ctxt->spaceNr == 0) 10172 spacePush(ctxt, -1); 10173 else if (*ctxt->space == -2) 10174 spacePush(ctxt, -1); 10175 else 10176 spacePush(ctxt, *ctxt->space); 10177#ifdef LIBXML_SAX1_ENABLED 10178 if (ctxt->sax2) 10179#endif /* LIBXML_SAX1_ENABLED */ 10180 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10181#ifdef LIBXML_SAX1_ENABLED 10182 else 10183 name = xmlParseStartTag(ctxt); 10184#endif /* LIBXML_SAX1_ENABLED */ 10185 if (name == NULL) { 10186 spacePop(ctxt); 10187 ctxt->instate = XML_PARSER_EOF; 10188 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10189 ctxt->sax->endDocument(ctxt->userData); 10190 goto done; 10191 } 10192#ifdef LIBXML_VALID_ENABLED 10193 /* 10194 * [ VC: Root Element Type ] 10195 * The Name in the document type declaration must match 10196 * the element type of the root element. 10197 */ 10198 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10199 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10200 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10201#endif /* LIBXML_VALID_ENABLED */ 10202 10203 /* 10204 * Check for an Empty Element. 10205 */ 10206 if ((RAW == '/') && (NXT(1) == '>')) { 10207 SKIP(2); 10208 10209 if (ctxt->sax2) { 10210 if ((ctxt->sax != NULL) && 10211 (ctxt->sax->endElementNs != NULL) && 10212 (!ctxt->disableSAX)) 10213 ctxt->sax->endElementNs(ctxt->userData, name, 10214 prefix, URI); 10215 if (ctxt->nsNr - nsNr > 0) 10216 nsPop(ctxt, ctxt->nsNr - nsNr); 10217#ifdef LIBXML_SAX1_ENABLED 10218 } else { 10219 if ((ctxt->sax != NULL) && 10220 (ctxt->sax->endElement != NULL) && 10221 (!ctxt->disableSAX)) 10222 ctxt->sax->endElement(ctxt->userData, name); 10223#endif /* LIBXML_SAX1_ENABLED */ 10224 } 10225 spacePop(ctxt); 10226 if (ctxt->nameNr == 0) { 10227 ctxt->instate = XML_PARSER_EPILOG; 10228 } else { 10229 ctxt->instate = XML_PARSER_CONTENT; 10230 } 10231 break; 10232 } 10233 if (RAW == '>') { 10234 NEXT; 10235 } else { 10236 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 10237 "Couldn't find end of Start Tag %s\n", 10238 name); 10239 nodePop(ctxt); 10240 spacePop(ctxt); 10241 } 10242 if (ctxt->sax2) 10243 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 10244#ifdef LIBXML_SAX1_ENABLED 10245 else 10246 namePush(ctxt, name); 10247#endif /* LIBXML_SAX1_ENABLED */ 10248 10249 ctxt->instate = XML_PARSER_CONTENT; 10250 break; 10251 } 10252 case XML_PARSER_CONTENT: { 10253 const xmlChar *test; 10254 unsigned int cons; 10255 if ((avail < 2) && (ctxt->inputNr == 1)) 10256 goto done; 10257 cur = ctxt->input->cur[0]; 10258 next = ctxt->input->cur[1]; 10259 10260 test = CUR_PTR; 10261 cons = ctxt->input->consumed; 10262 if ((cur == '<') && (next == '/')) { 10263 ctxt->instate = XML_PARSER_END_TAG; 10264 break; 10265 } else if ((cur == '<') && (next == '?')) { 10266 if ((!terminate) && 10267 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10268 goto done; 10269 xmlParsePI(ctxt); 10270 } else if ((cur == '<') && (next != '!')) { 10271 ctxt->instate = XML_PARSER_START_TAG; 10272 break; 10273 } else if ((cur == '<') && (next == '!') && 10274 (ctxt->input->cur[2] == '-') && 10275 (ctxt->input->cur[3] == '-')) { 10276 int term; 10277 10278 if (avail < 4) 10279 goto done; 10280 ctxt->input->cur += 4; 10281 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 10282 ctxt->input->cur -= 4; 10283 if ((!terminate) && (term < 0)) 10284 goto done; 10285 xmlParseComment(ctxt); 10286 ctxt->instate = XML_PARSER_CONTENT; 10287 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 10288 (ctxt->input->cur[2] == '[') && 10289 (ctxt->input->cur[3] == 'C') && 10290 (ctxt->input->cur[4] == 'D') && 10291 (ctxt->input->cur[5] == 'A') && 10292 (ctxt->input->cur[6] == 'T') && 10293 (ctxt->input->cur[7] == 'A') && 10294 (ctxt->input->cur[8] == '[')) { 10295 SKIP(9); 10296 ctxt->instate = XML_PARSER_CDATA_SECTION; 10297 break; 10298 } else if ((cur == '<') && (next == '!') && 10299 (avail < 9)) { 10300 goto done; 10301 } else if (cur == '&') { 10302 if ((!terminate) && 10303 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 10304 goto done; 10305 xmlParseReference(ctxt); 10306 } else { 10307 /* TODO Avoid the extra copy, handle directly !!! */ 10308 /* 10309 * Goal of the following test is: 10310 * - minimize calls to the SAX 'character' callback 10311 * when they are mergeable 10312 * - handle an problem for isBlank when we only parse 10313 * a sequence of blank chars and the next one is 10314 * not available to check against '<' presence. 10315 * - tries to homogenize the differences in SAX 10316 * callbacks between the push and pull versions 10317 * of the parser. 10318 */ 10319 if ((ctxt->inputNr == 1) && 10320 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 10321 if (!terminate) { 10322 if (ctxt->progressive) { 10323 if ((lastlt == NULL) || 10324 (ctxt->input->cur > lastlt)) 10325 goto done; 10326 } else if (xmlParseLookupSequence(ctxt, 10327 '<', 0, 0) < 0) { 10328 goto done; 10329 } 10330 } 10331 } 10332 ctxt->checkIndex = 0; 10333 xmlParseCharData(ctxt, 0); 10334 } 10335 /* 10336 * Pop-up of finished entities. 10337 */ 10338 while ((RAW == 0) && (ctxt->inputNr > 1)) 10339 xmlPopInput(ctxt); 10340 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10341 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10342 "detected an error in element content\n"); 10343 ctxt->instate = XML_PARSER_EOF; 10344 break; 10345 } 10346 break; 10347 } 10348 case XML_PARSER_END_TAG: 10349 if (avail < 2) 10350 goto done; 10351 if (!terminate) { 10352 if (ctxt->progressive) { 10353 /* > can be found unescaped in attribute values */ 10354 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10355 goto done; 10356 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10357 goto done; 10358 } 10359 } 10360 if (ctxt->sax2) { 10361 xmlParseEndTag2(ctxt, 10362 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 10363 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 10364 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 10365 nameNsPop(ctxt); 10366 } 10367#ifdef LIBXML_SAX1_ENABLED 10368 else 10369 xmlParseEndTag1(ctxt, 0); 10370#endif /* LIBXML_SAX1_ENABLED */ 10371 if (ctxt->nameNr == 0) { 10372 ctxt->instate = XML_PARSER_EPILOG; 10373 } else { 10374 ctxt->instate = XML_PARSER_CONTENT; 10375 } 10376 break; 10377 case XML_PARSER_CDATA_SECTION: { 10378 /* 10379 * The Push mode need to have the SAX callback for 10380 * cdataBlock merge back contiguous callbacks. 10381 */ 10382 int base; 10383 10384 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 10385 if (base < 0) { 10386 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 10387 int tmp; 10388 10389 tmp = xmlCheckCdataPush(ctxt->input->cur, 10390 XML_PARSER_BIG_BUFFER_SIZE); 10391 if (tmp < 0) { 10392 tmp = -tmp; 10393 ctxt->input->cur += tmp; 10394 goto encoding_error; 10395 } 10396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10397 if (ctxt->sax->cdataBlock != NULL) 10398 ctxt->sax->cdataBlock(ctxt->userData, 10399 ctxt->input->cur, tmp); 10400 else if (ctxt->sax->characters != NULL) 10401 ctxt->sax->characters(ctxt->userData, 10402 ctxt->input->cur, tmp); 10403 } 10404 SKIPL(tmp); 10405 ctxt->checkIndex = 0; 10406 } 10407 goto done; 10408 } else { 10409 int tmp; 10410 10411 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 10412 if ((tmp < 0) || (tmp != base)) { 10413 tmp = -tmp; 10414 ctxt->input->cur += tmp; 10415 goto encoding_error; 10416 } 10417 if ((ctxt->sax != NULL) && (base == 0) && 10418 (ctxt->sax->cdataBlock != NULL) && 10419 (!ctxt->disableSAX)) { 10420 /* 10421 * Special case to provide identical behaviour 10422 * between pull and push parsers on enpty CDATA 10423 * sections 10424 */ 10425 if ((ctxt->input->cur - ctxt->input->base >= 9) && 10426 (!strncmp((const char *)&ctxt->input->cur[-9], 10427 "<![CDATA[", 9))) 10428 ctxt->sax->cdataBlock(ctxt->userData, 10429 BAD_CAST "", 0); 10430 } else if ((ctxt->sax != NULL) && (base > 0) && 10431 (!ctxt->disableSAX)) { 10432 if (ctxt->sax->cdataBlock != NULL) 10433 ctxt->sax->cdataBlock(ctxt->userData, 10434 ctxt->input->cur, base); 10435 else if (ctxt->sax->characters != NULL) 10436 ctxt->sax->characters(ctxt->userData, 10437 ctxt->input->cur, base); 10438 } 10439 SKIPL(base + 3); 10440 ctxt->checkIndex = 0; 10441 ctxt->instate = XML_PARSER_CONTENT; 10442#ifdef DEBUG_PUSH 10443 xmlGenericError(xmlGenericErrorContext, 10444 "PP: entering CONTENT\n"); 10445#endif 10446 } 10447 break; 10448 } 10449 case XML_PARSER_MISC: 10450 SKIP_BLANKS; 10451 if (ctxt->input->buf == NULL) 10452 avail = ctxt->input->length - 10453 (ctxt->input->cur - ctxt->input->base); 10454 else 10455 avail = ctxt->input->buf->buffer->use - 10456 (ctxt->input->cur - ctxt->input->base); 10457 if (avail < 2) 10458 goto done; 10459 cur = ctxt->input->cur[0]; 10460 next = ctxt->input->cur[1]; 10461 if ((cur == '<') && (next == '?')) { 10462 if ((!terminate) && 10463 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10464 goto done; 10465#ifdef DEBUG_PUSH 10466 xmlGenericError(xmlGenericErrorContext, 10467 "PP: Parsing PI\n"); 10468#endif 10469 xmlParsePI(ctxt); 10470 ctxt->checkIndex = 0; 10471 } else if ((cur == '<') && (next == '!') && 10472 (ctxt->input->cur[2] == '-') && 10473 (ctxt->input->cur[3] == '-')) { 10474 if ((!terminate) && 10475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10476 goto done; 10477#ifdef DEBUG_PUSH 10478 xmlGenericError(xmlGenericErrorContext, 10479 "PP: Parsing Comment\n"); 10480#endif 10481 xmlParseComment(ctxt); 10482 ctxt->instate = XML_PARSER_MISC; 10483 ctxt->checkIndex = 0; 10484 } else if ((cur == '<') && (next == '!') && 10485 (ctxt->input->cur[2] == 'D') && 10486 (ctxt->input->cur[3] == 'O') && 10487 (ctxt->input->cur[4] == 'C') && 10488 (ctxt->input->cur[5] == 'T') && 10489 (ctxt->input->cur[6] == 'Y') && 10490 (ctxt->input->cur[7] == 'P') && 10491 (ctxt->input->cur[8] == 'E')) { 10492 if ((!terminate) && 10493 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 10494 goto done; 10495#ifdef DEBUG_PUSH 10496 xmlGenericError(xmlGenericErrorContext, 10497 "PP: Parsing internal subset\n"); 10498#endif 10499 ctxt->inSubset = 1; 10500 xmlParseDocTypeDecl(ctxt); 10501 if (RAW == '[') { 10502 ctxt->instate = XML_PARSER_DTD; 10503#ifdef DEBUG_PUSH 10504 xmlGenericError(xmlGenericErrorContext, 10505 "PP: entering DTD\n"); 10506#endif 10507 } else { 10508 /* 10509 * Create and update the external subset. 10510 */ 10511 ctxt->inSubset = 2; 10512 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10513 (ctxt->sax->externalSubset != NULL)) 10514 ctxt->sax->externalSubset(ctxt->userData, 10515 ctxt->intSubName, ctxt->extSubSystem, 10516 ctxt->extSubURI); 10517 ctxt->inSubset = 0; 10518 xmlCleanSpecialAttr(ctxt); 10519 ctxt->instate = XML_PARSER_PROLOG; 10520#ifdef DEBUG_PUSH 10521 xmlGenericError(xmlGenericErrorContext, 10522 "PP: entering PROLOG\n"); 10523#endif 10524 } 10525 } else if ((cur == '<') && (next == '!') && 10526 (avail < 9)) { 10527 goto done; 10528 } else { 10529 ctxt->instate = XML_PARSER_START_TAG; 10530 ctxt->progressive = 1; 10531 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10532#ifdef DEBUG_PUSH 10533 xmlGenericError(xmlGenericErrorContext, 10534 "PP: entering START_TAG\n"); 10535#endif 10536 } 10537 break; 10538 case XML_PARSER_PROLOG: 10539 SKIP_BLANKS; 10540 if (ctxt->input->buf == NULL) 10541 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10542 else 10543 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10544 if (avail < 2) 10545 goto done; 10546 cur = ctxt->input->cur[0]; 10547 next = ctxt->input->cur[1]; 10548 if ((cur == '<') && (next == '?')) { 10549 if ((!terminate) && 10550 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10551 goto done; 10552#ifdef DEBUG_PUSH 10553 xmlGenericError(xmlGenericErrorContext, 10554 "PP: Parsing PI\n"); 10555#endif 10556 xmlParsePI(ctxt); 10557 } else if ((cur == '<') && (next == '!') && 10558 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10559 if ((!terminate) && 10560 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10561 goto done; 10562#ifdef DEBUG_PUSH 10563 xmlGenericError(xmlGenericErrorContext, 10564 "PP: Parsing Comment\n"); 10565#endif 10566 xmlParseComment(ctxt); 10567 ctxt->instate = XML_PARSER_PROLOG; 10568 } else if ((cur == '<') && (next == '!') && 10569 (avail < 4)) { 10570 goto done; 10571 } else { 10572 ctxt->instate = XML_PARSER_START_TAG; 10573 if (ctxt->progressive == 0) 10574 ctxt->progressive = 1; 10575 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10576#ifdef DEBUG_PUSH 10577 xmlGenericError(xmlGenericErrorContext, 10578 "PP: entering START_TAG\n"); 10579#endif 10580 } 10581 break; 10582 case XML_PARSER_EPILOG: 10583 SKIP_BLANKS; 10584 if (ctxt->input->buf == NULL) 10585 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10586 else 10587 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10588 if (avail < 2) 10589 goto done; 10590 cur = ctxt->input->cur[0]; 10591 next = ctxt->input->cur[1]; 10592 if ((cur == '<') && (next == '?')) { 10593 if ((!terminate) && 10594 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10595 goto done; 10596#ifdef DEBUG_PUSH 10597 xmlGenericError(xmlGenericErrorContext, 10598 "PP: Parsing PI\n"); 10599#endif 10600 xmlParsePI(ctxt); 10601 ctxt->instate = XML_PARSER_EPILOG; 10602 } else if ((cur == '<') && (next == '!') && 10603 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10604 if ((!terminate) && 10605 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10606 goto done; 10607#ifdef DEBUG_PUSH 10608 xmlGenericError(xmlGenericErrorContext, 10609 "PP: Parsing Comment\n"); 10610#endif 10611 xmlParseComment(ctxt); 10612 ctxt->instate = XML_PARSER_EPILOG; 10613 } else if ((cur == '<') && (next == '!') && 10614 (avail < 4)) { 10615 goto done; 10616 } else { 10617 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10618 ctxt->instate = XML_PARSER_EOF; 10619#ifdef DEBUG_PUSH 10620 xmlGenericError(xmlGenericErrorContext, 10621 "PP: entering EOF\n"); 10622#endif 10623 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10624 ctxt->sax->endDocument(ctxt->userData); 10625 goto done; 10626 } 10627 break; 10628 case XML_PARSER_DTD: { 10629 /* 10630 * Sorry but progressive parsing of the internal subset 10631 * is not expected to be supported. We first check that 10632 * the full content of the internal subset is available and 10633 * the parsing is launched only at that point. 10634 * Internal subset ends up with "']' S? '>'" in an unescaped 10635 * section and not in a ']]>' sequence which are conditional 10636 * sections (whoever argued to keep that crap in XML deserve 10637 * a place in hell !). 10638 */ 10639 int base, i; 10640 xmlChar *buf; 10641 xmlChar quote = 0; 10642 10643 base = ctxt->input->cur - ctxt->input->base; 10644 if (base < 0) return(0); 10645 if (ctxt->checkIndex > base) 10646 base = ctxt->checkIndex; 10647 buf = ctxt->input->buf->buffer->content; 10648 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 10649 base++) { 10650 if (quote != 0) { 10651 if (buf[base] == quote) 10652 quote = 0; 10653 continue; 10654 } 10655 if ((quote == 0) && (buf[base] == '<')) { 10656 int found = 0; 10657 /* special handling of comments */ 10658 if (((unsigned int) base + 4 < 10659 ctxt->input->buf->buffer->use) && 10660 (buf[base + 1] == '!') && 10661 (buf[base + 2] == '-') && 10662 (buf[base + 3] == '-')) { 10663 for (;(unsigned int) base + 3 < 10664 ctxt->input->buf->buffer->use; base++) { 10665 if ((buf[base] == '-') && 10666 (buf[base + 1] == '-') && 10667 (buf[base + 2] == '>')) { 10668 found = 1; 10669 base += 2; 10670 break; 10671 } 10672 } 10673 if (!found) { 10674#if 0 10675 fprintf(stderr, "unfinished comment\n"); 10676#endif 10677 break; /* for */ 10678 } 10679 continue; 10680 } 10681 } 10682 if (buf[base] == '"') { 10683 quote = '"'; 10684 continue; 10685 } 10686 if (buf[base] == '\'') { 10687 quote = '\''; 10688 continue; 10689 } 10690 if (buf[base] == ']') { 10691#if 0 10692 fprintf(stderr, "%c%c%c%c: ", buf[base], 10693 buf[base + 1], buf[base + 2], buf[base + 3]); 10694#endif 10695 if ((unsigned int) base +1 >= 10696 ctxt->input->buf->buffer->use) 10697 break; 10698 if (buf[base + 1] == ']') { 10699 /* conditional crap, skip both ']' ! */ 10700 base++; 10701 continue; 10702 } 10703 for (i = 1; 10704 (unsigned int) base + i < ctxt->input->buf->buffer->use; 10705 i++) { 10706 if (buf[base + i] == '>') { 10707#if 0 10708 fprintf(stderr, "found\n"); 10709#endif 10710 goto found_end_int_subset; 10711 } 10712 if (!IS_BLANK_CH(buf[base + i])) { 10713#if 0 10714 fprintf(stderr, "not found\n"); 10715#endif 10716 goto not_end_of_int_subset; 10717 } 10718 } 10719#if 0 10720 fprintf(stderr, "end of stream\n"); 10721#endif 10722 break; 10723 10724 } 10725not_end_of_int_subset: 10726 continue; /* for */ 10727 } 10728 /* 10729 * We didn't found the end of the Internal subset 10730 */ 10731#ifdef DEBUG_PUSH 10732 if (next == 0) 10733 xmlGenericError(xmlGenericErrorContext, 10734 "PP: lookup of int subset end filed\n"); 10735#endif 10736 goto done; 10737 10738found_end_int_subset: 10739 xmlParseInternalSubset(ctxt); 10740 ctxt->inSubset = 2; 10741 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10742 (ctxt->sax->externalSubset != NULL)) 10743 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10744 ctxt->extSubSystem, ctxt->extSubURI); 10745 ctxt->inSubset = 0; 10746 xmlCleanSpecialAttr(ctxt); 10747 ctxt->instate = XML_PARSER_PROLOG; 10748 ctxt->checkIndex = 0; 10749#ifdef DEBUG_PUSH 10750 xmlGenericError(xmlGenericErrorContext, 10751 "PP: entering PROLOG\n"); 10752#endif 10753 break; 10754 } 10755 case XML_PARSER_COMMENT: 10756 xmlGenericError(xmlGenericErrorContext, 10757 "PP: internal error, state == COMMENT\n"); 10758 ctxt->instate = XML_PARSER_CONTENT; 10759#ifdef DEBUG_PUSH 10760 xmlGenericError(xmlGenericErrorContext, 10761 "PP: entering CONTENT\n"); 10762#endif 10763 break; 10764 case XML_PARSER_IGNORE: 10765 xmlGenericError(xmlGenericErrorContext, 10766 "PP: internal error, state == IGNORE"); 10767 ctxt->instate = XML_PARSER_DTD; 10768#ifdef DEBUG_PUSH 10769 xmlGenericError(xmlGenericErrorContext, 10770 "PP: entering DTD\n"); 10771#endif 10772 break; 10773 case XML_PARSER_PI: 10774 xmlGenericError(xmlGenericErrorContext, 10775 "PP: internal error, state == PI\n"); 10776 ctxt->instate = XML_PARSER_CONTENT; 10777#ifdef DEBUG_PUSH 10778 xmlGenericError(xmlGenericErrorContext, 10779 "PP: entering CONTENT\n"); 10780#endif 10781 break; 10782 case XML_PARSER_ENTITY_DECL: 10783 xmlGenericError(xmlGenericErrorContext, 10784 "PP: internal error, state == ENTITY_DECL\n"); 10785 ctxt->instate = XML_PARSER_DTD; 10786#ifdef DEBUG_PUSH 10787 xmlGenericError(xmlGenericErrorContext, 10788 "PP: entering DTD\n"); 10789#endif 10790 break; 10791 case XML_PARSER_ENTITY_VALUE: 10792 xmlGenericError(xmlGenericErrorContext, 10793 "PP: internal error, state == ENTITY_VALUE\n"); 10794 ctxt->instate = XML_PARSER_CONTENT; 10795#ifdef DEBUG_PUSH 10796 xmlGenericError(xmlGenericErrorContext, 10797 "PP: entering DTD\n"); 10798#endif 10799 break; 10800 case XML_PARSER_ATTRIBUTE_VALUE: 10801 xmlGenericError(xmlGenericErrorContext, 10802 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 10803 ctxt->instate = XML_PARSER_START_TAG; 10804#ifdef DEBUG_PUSH 10805 xmlGenericError(xmlGenericErrorContext, 10806 "PP: entering START_TAG\n"); 10807#endif 10808 break; 10809 case XML_PARSER_SYSTEM_LITERAL: 10810 xmlGenericError(xmlGenericErrorContext, 10811 "PP: internal error, state == SYSTEM_LITERAL\n"); 10812 ctxt->instate = XML_PARSER_START_TAG; 10813#ifdef DEBUG_PUSH 10814 xmlGenericError(xmlGenericErrorContext, 10815 "PP: entering START_TAG\n"); 10816#endif 10817 break; 10818 case XML_PARSER_PUBLIC_LITERAL: 10819 xmlGenericError(xmlGenericErrorContext, 10820 "PP: internal error, state == PUBLIC_LITERAL\n"); 10821 ctxt->instate = XML_PARSER_START_TAG; 10822#ifdef DEBUG_PUSH 10823 xmlGenericError(xmlGenericErrorContext, 10824 "PP: entering START_TAG\n"); 10825#endif 10826 break; 10827 } 10828 } 10829done: 10830#ifdef DEBUG_PUSH 10831 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 10832#endif 10833 return(ret); 10834encoding_error: 10835 { 10836 char buffer[150]; 10837 10838 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 10839 ctxt->input->cur[0], ctxt->input->cur[1], 10840 ctxt->input->cur[2], ctxt->input->cur[3]); 10841 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 10842 "Input is not proper UTF-8, indicate encoding !\n%s", 10843 BAD_CAST buffer, NULL); 10844 } 10845 return(0); 10846} 10847 10848/** 10849 * xmlParseChunk: 10850 * @ctxt: an XML parser context 10851 * @chunk: an char array 10852 * @size: the size in byte of the chunk 10853 * @terminate: last chunk indicator 10854 * 10855 * Parse a Chunk of memory 10856 * 10857 * Returns zero if no error, the xmlParserErrors otherwise. 10858 */ 10859int 10860xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 10861 int terminate) { 10862 int end_in_lf = 0; 10863 10864 if (ctxt == NULL) 10865 return(XML_ERR_INTERNAL_ERROR); 10866 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10867 return(ctxt->errNo); 10868 if (ctxt->instate == XML_PARSER_START) 10869 xmlDetectSAX2(ctxt); 10870 if ((size > 0) && (chunk != NULL) && (!terminate) && 10871 (chunk[size - 1] == '\r')) { 10872 end_in_lf = 1; 10873 size--; 10874 } 10875 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10876 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 10877 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10878 int cur = ctxt->input->cur - ctxt->input->base; 10879 int res; 10880 10881 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10882 if (res < 0) { 10883 ctxt->errNo = XML_PARSER_EOF; 10884 ctxt->disableSAX = 1; 10885 return (XML_PARSER_EOF); 10886 } 10887 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10888 ctxt->input->cur = ctxt->input->base + cur; 10889 ctxt->input->end = 10890 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10891#ifdef DEBUG_PUSH 10892 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10893#endif 10894 10895 } else if (ctxt->instate != XML_PARSER_EOF) { 10896 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 10897 xmlParserInputBufferPtr in = ctxt->input->buf; 10898 if ((in->encoder != NULL) && (in->buffer != NULL) && 10899 (in->raw != NULL)) { 10900 int nbchars; 10901 10902 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 10903 if (nbchars < 0) { 10904 /* TODO 2.6.0 */ 10905 xmlGenericError(xmlGenericErrorContext, 10906 "xmlParseChunk: encoder error\n"); 10907 return(XML_ERR_INVALID_ENCODING); 10908 } 10909 } 10910 } 10911 } 10912 xmlParseTryOrFinish(ctxt, terminate); 10913 if ((end_in_lf == 1) && (ctxt->input != NULL) && 10914 (ctxt->input->buf != NULL)) { 10915 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 10916 } 10917 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10918 return(ctxt->errNo); 10919 if (terminate) { 10920 /* 10921 * Check for termination 10922 */ 10923 int avail = 0; 10924 10925 if (ctxt->input != NULL) { 10926 if (ctxt->input->buf == NULL) 10927 avail = ctxt->input->length - 10928 (ctxt->input->cur - ctxt->input->base); 10929 else 10930 avail = ctxt->input->buf->buffer->use - 10931 (ctxt->input->cur - ctxt->input->base); 10932 } 10933 10934 if ((ctxt->instate != XML_PARSER_EOF) && 10935 (ctxt->instate != XML_PARSER_EPILOG)) { 10936 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10937 } 10938 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 10939 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10940 } 10941 if (ctxt->instate != XML_PARSER_EOF) { 10942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10943 ctxt->sax->endDocument(ctxt->userData); 10944 } 10945 ctxt->instate = XML_PARSER_EOF; 10946 } 10947 return((xmlParserErrors) ctxt->errNo); 10948} 10949 10950/************************************************************************ 10951 * * 10952 * I/O front end functions to the parser * 10953 * * 10954 ************************************************************************/ 10955 10956/** 10957 * xmlCreatePushParserCtxt: 10958 * @sax: a SAX handler 10959 * @user_data: The user data returned on SAX callbacks 10960 * @chunk: a pointer to an array of chars 10961 * @size: number of chars in the array 10962 * @filename: an optional file name or URI 10963 * 10964 * Create a parser context for using the XML parser in push mode. 10965 * If @buffer and @size are non-NULL, the data is used to detect 10966 * the encoding. The remaining characters will be parsed so they 10967 * don't need to be fed in again through xmlParseChunk. 10968 * To allow content encoding detection, @size should be >= 4 10969 * The value of @filename is used for fetching external entities 10970 * and error/warning reports. 10971 * 10972 * Returns the new parser context or NULL 10973 */ 10974 10975xmlParserCtxtPtr 10976xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10977 const char *chunk, int size, const char *filename) { 10978 xmlParserCtxtPtr ctxt; 10979 xmlParserInputPtr inputStream; 10980 xmlParserInputBufferPtr buf; 10981 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 10982 10983 /* 10984 * plug some encoding conversion routines 10985 */ 10986 if ((chunk != NULL) && (size >= 4)) 10987 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 10988 10989 buf = xmlAllocParserInputBuffer(enc); 10990 if (buf == NULL) return(NULL); 10991 10992 ctxt = xmlNewParserCtxt(); 10993 if (ctxt == NULL) { 10994 xmlErrMemory(NULL, "creating parser: out of memory\n"); 10995 xmlFreeParserInputBuffer(buf); 10996 return(NULL); 10997 } 10998 ctxt->dictNames = 1; 10999 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11000 if (ctxt->pushTab == NULL) { 11001 xmlErrMemory(ctxt, NULL); 11002 xmlFreeParserInputBuffer(buf); 11003 xmlFreeParserCtxt(ctxt); 11004 return(NULL); 11005 } 11006 if (sax != NULL) { 11007#ifdef LIBXML_SAX1_ENABLED 11008 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11009#endif /* LIBXML_SAX1_ENABLED */ 11010 xmlFree(ctxt->sax); 11011 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11012 if (ctxt->sax == NULL) { 11013 xmlErrMemory(ctxt, NULL); 11014 xmlFreeParserInputBuffer(buf); 11015 xmlFreeParserCtxt(ctxt); 11016 return(NULL); 11017 } 11018 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11019 if (sax->initialized == XML_SAX2_MAGIC) 11020 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11021 else 11022 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11023 if (user_data != NULL) 11024 ctxt->userData = user_data; 11025 } 11026 if (filename == NULL) { 11027 ctxt->directory = NULL; 11028 } else { 11029 ctxt->directory = xmlParserGetDirectory(filename); 11030 } 11031 11032 inputStream = xmlNewInputStream(ctxt); 11033 if (inputStream == NULL) { 11034 xmlFreeParserCtxt(ctxt); 11035 xmlFreeParserInputBuffer(buf); 11036 return(NULL); 11037 } 11038 11039 if (filename == NULL) 11040 inputStream->filename = NULL; 11041 else { 11042 inputStream->filename = (char *) 11043 xmlCanonicPath((const xmlChar *) filename); 11044 if (inputStream->filename == NULL) { 11045 xmlFreeParserCtxt(ctxt); 11046 xmlFreeParserInputBuffer(buf); 11047 return(NULL); 11048 } 11049 } 11050 inputStream->buf = buf; 11051 inputStream->base = inputStream->buf->buffer->content; 11052 inputStream->cur = inputStream->buf->buffer->content; 11053 inputStream->end = 11054 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11055 11056 inputPush(ctxt, inputStream); 11057 11058 /* 11059 * If the caller didn't provide an initial 'chunk' for determining 11060 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11061 * that it can be automatically determined later 11062 */ 11063 if ((size == 0) || (chunk == NULL)) { 11064 ctxt->charset = XML_CHAR_ENCODING_NONE; 11065 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11066 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11067 int cur = ctxt->input->cur - ctxt->input->base; 11068 11069 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11070 11071 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11072 ctxt->input->cur = ctxt->input->base + cur; 11073 ctxt->input->end = 11074 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11075#ifdef DEBUG_PUSH 11076 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11077#endif 11078 } 11079 11080 if (enc != XML_CHAR_ENCODING_NONE) { 11081 xmlSwitchEncoding(ctxt, enc); 11082 } 11083 11084 return(ctxt); 11085} 11086#endif /* LIBXML_PUSH_ENABLED */ 11087 11088/** 11089 * xmlStopParser: 11090 * @ctxt: an XML parser context 11091 * 11092 * Blocks further parser processing 11093 */ 11094void 11095xmlStopParser(xmlParserCtxtPtr ctxt) { 11096 if (ctxt == NULL) 11097 return; 11098 ctxt->instate = XML_PARSER_EOF; 11099 ctxt->disableSAX = 1; 11100 if (ctxt->input != NULL) { 11101 ctxt->input->cur = BAD_CAST""; 11102 ctxt->input->base = ctxt->input->cur; 11103 } 11104} 11105 11106/** 11107 * xmlCreateIOParserCtxt: 11108 * @sax: a SAX handler 11109 * @user_data: The user data returned on SAX callbacks 11110 * @ioread: an I/O read function 11111 * @ioclose: an I/O close function 11112 * @ioctx: an I/O handler 11113 * @enc: the charset encoding if known 11114 * 11115 * Create a parser context for using the XML parser with an existing 11116 * I/O stream 11117 * 11118 * Returns the new parser context or NULL 11119 */ 11120xmlParserCtxtPtr 11121xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11122 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11123 void *ioctx, xmlCharEncoding enc) { 11124 xmlParserCtxtPtr ctxt; 11125 xmlParserInputPtr inputStream; 11126 xmlParserInputBufferPtr buf; 11127 11128 if (ioread == NULL) return(NULL); 11129 11130 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11131 if (buf == NULL) return(NULL); 11132 11133 ctxt = xmlNewParserCtxt(); 11134 if (ctxt == NULL) { 11135 xmlFreeParserInputBuffer(buf); 11136 return(NULL); 11137 } 11138 if (sax != NULL) { 11139#ifdef LIBXML_SAX1_ENABLED 11140 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11141#endif /* LIBXML_SAX1_ENABLED */ 11142 xmlFree(ctxt->sax); 11143 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11144 if (ctxt->sax == NULL) { 11145 xmlErrMemory(ctxt, NULL); 11146 xmlFreeParserCtxt(ctxt); 11147 return(NULL); 11148 } 11149 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11150 if (sax->initialized == XML_SAX2_MAGIC) 11151 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11152 else 11153 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11154 if (user_data != NULL) 11155 ctxt->userData = user_data; 11156 } 11157 11158 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 11159 if (inputStream == NULL) { 11160 xmlFreeParserCtxt(ctxt); 11161 return(NULL); 11162 } 11163 inputPush(ctxt, inputStream); 11164 11165 return(ctxt); 11166} 11167 11168#ifdef LIBXML_VALID_ENABLED 11169/************************************************************************ 11170 * * 11171 * Front ends when parsing a DTD * 11172 * * 11173 ************************************************************************/ 11174 11175/** 11176 * xmlIOParseDTD: 11177 * @sax: the SAX handler block or NULL 11178 * @input: an Input Buffer 11179 * @enc: the charset encoding if known 11180 * 11181 * Load and parse a DTD 11182 * 11183 * Returns the resulting xmlDtdPtr or NULL in case of error. 11184 * @input will be freed by the function in any case. 11185 */ 11186 11187xmlDtdPtr 11188xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 11189 xmlCharEncoding enc) { 11190 xmlDtdPtr ret = NULL; 11191 xmlParserCtxtPtr ctxt; 11192 xmlParserInputPtr pinput = NULL; 11193 xmlChar start[4]; 11194 11195 if (input == NULL) 11196 return(NULL); 11197 11198 ctxt = xmlNewParserCtxt(); 11199 if (ctxt == NULL) { 11200 xmlFreeParserInputBuffer(input); 11201 return(NULL); 11202 } 11203 11204 /* 11205 * Set-up the SAX context 11206 */ 11207 if (sax != NULL) { 11208 if (ctxt->sax != NULL) 11209 xmlFree(ctxt->sax); 11210 ctxt->sax = sax; 11211 ctxt->userData = ctxt; 11212 } 11213 xmlDetectSAX2(ctxt); 11214 11215 /* 11216 * generate a parser input from the I/O handler 11217 */ 11218 11219 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 11220 if (pinput == NULL) { 11221 if (sax != NULL) ctxt->sax = NULL; 11222 xmlFreeParserInputBuffer(input); 11223 xmlFreeParserCtxt(ctxt); 11224 return(NULL); 11225 } 11226 11227 /* 11228 * plug some encoding conversion routines here. 11229 */ 11230 xmlPushInput(ctxt, pinput); 11231 if (enc != XML_CHAR_ENCODING_NONE) { 11232 xmlSwitchEncoding(ctxt, enc); 11233 } 11234 11235 pinput->filename = NULL; 11236 pinput->line = 1; 11237 pinput->col = 1; 11238 pinput->base = ctxt->input->cur; 11239 pinput->cur = ctxt->input->cur; 11240 pinput->free = NULL; 11241 11242 /* 11243 * let's parse that entity knowing it's an external subset. 11244 */ 11245 ctxt->inSubset = 2; 11246 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11247 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11248 BAD_CAST "none", BAD_CAST "none"); 11249 11250 if ((enc == XML_CHAR_ENCODING_NONE) && 11251 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 11252 /* 11253 * Get the 4 first bytes and decode the charset 11254 * if enc != XML_CHAR_ENCODING_NONE 11255 * plug some encoding conversion routines. 11256 */ 11257 start[0] = RAW; 11258 start[1] = NXT(1); 11259 start[2] = NXT(2); 11260 start[3] = NXT(3); 11261 enc = xmlDetectCharEncoding(start, 4); 11262 if (enc != XML_CHAR_ENCODING_NONE) { 11263 xmlSwitchEncoding(ctxt, enc); 11264 } 11265 } 11266 11267 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 11268 11269 if (ctxt->myDoc != NULL) { 11270 if (ctxt->wellFormed) { 11271 ret = ctxt->myDoc->extSubset; 11272 ctxt->myDoc->extSubset = NULL; 11273 if (ret != NULL) { 11274 xmlNodePtr tmp; 11275 11276 ret->doc = NULL; 11277 tmp = ret->children; 11278 while (tmp != NULL) { 11279 tmp->doc = NULL; 11280 tmp = tmp->next; 11281 } 11282 } 11283 } else { 11284 ret = NULL; 11285 } 11286 xmlFreeDoc(ctxt->myDoc); 11287 ctxt->myDoc = NULL; 11288 } 11289 if (sax != NULL) ctxt->sax = NULL; 11290 xmlFreeParserCtxt(ctxt); 11291 11292 return(ret); 11293} 11294 11295/** 11296 * xmlSAXParseDTD: 11297 * @sax: the SAX handler block 11298 * @ExternalID: a NAME* containing the External ID of the DTD 11299 * @SystemID: a NAME* containing the URL to the DTD 11300 * 11301 * Load and parse an external subset. 11302 * 11303 * Returns the resulting xmlDtdPtr or NULL in case of error. 11304 */ 11305 11306xmlDtdPtr 11307xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 11308 const xmlChar *SystemID) { 11309 xmlDtdPtr ret = NULL; 11310 xmlParserCtxtPtr ctxt; 11311 xmlParserInputPtr input = NULL; 11312 xmlCharEncoding enc; 11313 xmlChar* systemIdCanonic; 11314 11315 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 11316 11317 ctxt = xmlNewParserCtxt(); 11318 if (ctxt == NULL) { 11319 return(NULL); 11320 } 11321 11322 /* 11323 * Set-up the SAX context 11324 */ 11325 if (sax != NULL) { 11326 if (ctxt->sax != NULL) 11327 xmlFree(ctxt->sax); 11328 ctxt->sax = sax; 11329 ctxt->userData = ctxt; 11330 } 11331 11332 /* 11333 * Canonicalise the system ID 11334 */ 11335 systemIdCanonic = xmlCanonicPath(SystemID); 11336 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 11337 xmlFreeParserCtxt(ctxt); 11338 return(NULL); 11339 } 11340 11341 /* 11342 * Ask the Entity resolver to load the damn thing 11343 */ 11344 11345 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 11346 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 11347 systemIdCanonic); 11348 if (input == NULL) { 11349 if (sax != NULL) ctxt->sax = NULL; 11350 xmlFreeParserCtxt(ctxt); 11351 if (systemIdCanonic != NULL) 11352 xmlFree(systemIdCanonic); 11353 return(NULL); 11354 } 11355 11356 /* 11357 * plug some encoding conversion routines here. 11358 */ 11359 xmlPushInput(ctxt, input); 11360 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11361 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 11362 xmlSwitchEncoding(ctxt, enc); 11363 } 11364 11365 if (input->filename == NULL) 11366 input->filename = (char *) systemIdCanonic; 11367 else 11368 xmlFree(systemIdCanonic); 11369 input->line = 1; 11370 input->col = 1; 11371 input->base = ctxt->input->cur; 11372 input->cur = ctxt->input->cur; 11373 input->free = NULL; 11374 11375 /* 11376 * let's parse that entity knowing it's an external subset. 11377 */ 11378 ctxt->inSubset = 2; 11379 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11380 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11381 ExternalID, SystemID); 11382 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 11383 11384 if (ctxt->myDoc != NULL) { 11385 if (ctxt->wellFormed) { 11386 ret = ctxt->myDoc->extSubset; 11387 ctxt->myDoc->extSubset = NULL; 11388 if (ret != NULL) { 11389 xmlNodePtr tmp; 11390 11391 ret->doc = NULL; 11392 tmp = ret->children; 11393 while (tmp != NULL) { 11394 tmp->doc = NULL; 11395 tmp = tmp->next; 11396 } 11397 } 11398 } else { 11399 ret = NULL; 11400 } 11401 xmlFreeDoc(ctxt->myDoc); 11402 ctxt->myDoc = NULL; 11403 } 11404 if (sax != NULL) ctxt->sax = NULL; 11405 xmlFreeParserCtxt(ctxt); 11406 11407 return(ret); 11408} 11409 11410 11411/** 11412 * xmlParseDTD: 11413 * @ExternalID: a NAME* containing the External ID of the DTD 11414 * @SystemID: a NAME* containing the URL to the DTD 11415 * 11416 * Load and parse an external subset. 11417 * 11418 * Returns the resulting xmlDtdPtr or NULL in case of error. 11419 */ 11420 11421xmlDtdPtr 11422xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 11423 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 11424} 11425#endif /* LIBXML_VALID_ENABLED */ 11426 11427/************************************************************************ 11428 * * 11429 * Front ends when parsing an Entity * 11430 * * 11431 ************************************************************************/ 11432 11433/** 11434 * xmlParseCtxtExternalEntity: 11435 * @ctx: the existing parsing context 11436 * @URL: the URL for the entity to load 11437 * @ID: the System ID for the entity to load 11438 * @lst: the return value for the set of parsed nodes 11439 * 11440 * Parse an external general entity within an existing parsing context 11441 * An external general parsed entity is well-formed if it matches the 11442 * production labeled extParsedEnt. 11443 * 11444 * [78] extParsedEnt ::= TextDecl? content 11445 * 11446 * Returns 0 if the entity is well formed, -1 in case of args problem and 11447 * the parser error code otherwise 11448 */ 11449 11450int 11451xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 11452 const xmlChar *ID, xmlNodePtr *lst) { 11453 xmlParserCtxtPtr ctxt; 11454 xmlDocPtr newDoc; 11455 xmlNodePtr newRoot; 11456 xmlSAXHandlerPtr oldsax = NULL; 11457 int ret = 0; 11458 xmlChar start[4]; 11459 xmlCharEncoding enc; 11460 xmlParserInputPtr inputStream; 11461 char *directory = NULL; 11462 11463 if (ctx == NULL) return(-1); 11464 11465 if (ctx->depth > 40) { 11466 return(XML_ERR_ENTITY_LOOP); 11467 } 11468 11469 if (lst != NULL) 11470 *lst = NULL; 11471 if ((URL == NULL) && (ID == NULL)) 11472 return(-1); 11473 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 11474 return(-1); 11475 11476 ctxt = xmlNewParserCtxt(); 11477 if (ctxt == NULL) { 11478 return(-1); 11479 } 11480 11481 ctxt->userData = ctxt; 11482 ctxt->_private = ctx->_private; 11483 11484 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11485 if (inputStream == NULL) { 11486 xmlFreeParserCtxt(ctxt); 11487 return(-1); 11488 } 11489 11490 inputPush(ctxt, inputStream); 11491 11492 if ((ctxt->directory == NULL) && (directory == NULL)) 11493 directory = xmlParserGetDirectory((char *)URL); 11494 if ((ctxt->directory == NULL) && (directory != NULL)) 11495 ctxt->directory = directory; 11496 11497 oldsax = ctxt->sax; 11498 ctxt->sax = ctx->sax; 11499 xmlDetectSAX2(ctxt); 11500 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11501 if (newDoc == NULL) { 11502 xmlFreeParserCtxt(ctxt); 11503 return(-1); 11504 } 11505 if (ctx->myDoc->dict) { 11506 newDoc->dict = ctx->myDoc->dict; 11507 xmlDictReference(newDoc->dict); 11508 } 11509 if (ctx->myDoc != NULL) { 11510 newDoc->intSubset = ctx->myDoc->intSubset; 11511 newDoc->extSubset = ctx->myDoc->extSubset; 11512 } 11513 if (ctx->myDoc->URL != NULL) { 11514 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 11515 } 11516 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11517 if (newRoot == NULL) { 11518 ctxt->sax = oldsax; 11519 xmlFreeParserCtxt(ctxt); 11520 newDoc->intSubset = NULL; 11521 newDoc->extSubset = NULL; 11522 xmlFreeDoc(newDoc); 11523 return(-1); 11524 } 11525 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11526 nodePush(ctxt, newDoc->children); 11527 if (ctx->myDoc == NULL) { 11528 ctxt->myDoc = newDoc; 11529 } else { 11530 ctxt->myDoc = ctx->myDoc; 11531 newDoc->children->doc = ctx->myDoc; 11532 } 11533 11534 /* 11535 * Get the 4 first bytes and decode the charset 11536 * if enc != XML_CHAR_ENCODING_NONE 11537 * plug some encoding conversion routines. 11538 */ 11539 GROW 11540 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11541 start[0] = RAW; 11542 start[1] = NXT(1); 11543 start[2] = NXT(2); 11544 start[3] = NXT(3); 11545 enc = xmlDetectCharEncoding(start, 4); 11546 if (enc != XML_CHAR_ENCODING_NONE) { 11547 xmlSwitchEncoding(ctxt, enc); 11548 } 11549 } 11550 11551 /* 11552 * Parse a possible text declaration first 11553 */ 11554 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11555 xmlParseTextDecl(ctxt); 11556 } 11557 11558 /* 11559 * Doing validity checking on chunk doesn't make sense 11560 */ 11561 ctxt->instate = XML_PARSER_CONTENT; 11562 ctxt->validate = ctx->validate; 11563 ctxt->valid = ctx->valid; 11564 ctxt->loadsubset = ctx->loadsubset; 11565 ctxt->depth = ctx->depth + 1; 11566 ctxt->replaceEntities = ctx->replaceEntities; 11567 if (ctxt->validate) { 11568 ctxt->vctxt.error = ctx->vctxt.error; 11569 ctxt->vctxt.warning = ctx->vctxt.warning; 11570 } else { 11571 ctxt->vctxt.error = NULL; 11572 ctxt->vctxt.warning = NULL; 11573 } 11574 ctxt->vctxt.nodeTab = NULL; 11575 ctxt->vctxt.nodeNr = 0; 11576 ctxt->vctxt.nodeMax = 0; 11577 ctxt->vctxt.node = NULL; 11578 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11579 ctxt->dict = ctx->dict; 11580 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11581 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11582 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11583 ctxt->dictNames = ctx->dictNames; 11584 ctxt->attsDefault = ctx->attsDefault; 11585 ctxt->attsSpecial = ctx->attsSpecial; 11586 ctxt->linenumbers = ctx->linenumbers; 11587 11588 xmlParseContent(ctxt); 11589 11590 ctx->validate = ctxt->validate; 11591 ctx->valid = ctxt->valid; 11592 if ((RAW == '<') && (NXT(1) == '/')) { 11593 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11594 } else if (RAW != 0) { 11595 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11596 } 11597 if (ctxt->node != newDoc->children) { 11598 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11599 } 11600 11601 if (!ctxt->wellFormed) { 11602 if (ctxt->errNo == 0) 11603 ret = 1; 11604 else 11605 ret = ctxt->errNo; 11606 } else { 11607 if (lst != NULL) { 11608 xmlNodePtr cur; 11609 11610 /* 11611 * Return the newly created nodeset after unlinking it from 11612 * they pseudo parent. 11613 */ 11614 cur = newDoc->children->children; 11615 *lst = cur; 11616 while (cur != NULL) { 11617 cur->parent = NULL; 11618 cur = cur->next; 11619 } 11620 newDoc->children->children = NULL; 11621 } 11622 ret = 0; 11623 } 11624 ctxt->sax = oldsax; 11625 ctxt->dict = NULL; 11626 ctxt->attsDefault = NULL; 11627 ctxt->attsSpecial = NULL; 11628 xmlFreeParserCtxt(ctxt); 11629 newDoc->intSubset = NULL; 11630 newDoc->extSubset = NULL; 11631 xmlFreeDoc(newDoc); 11632 11633 return(ret); 11634} 11635 11636/** 11637 * xmlParseExternalEntityPrivate: 11638 * @doc: the document the chunk pertains to 11639 * @oldctxt: the previous parser context if available 11640 * @sax: the SAX handler bloc (possibly NULL) 11641 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11642 * @depth: Used for loop detection, use 0 11643 * @URL: the URL for the entity to load 11644 * @ID: the System ID for the entity to load 11645 * @list: the return value for the set of parsed nodes 11646 * 11647 * Private version of xmlParseExternalEntity() 11648 * 11649 * Returns 0 if the entity is well formed, -1 in case of args problem and 11650 * the parser error code otherwise 11651 */ 11652 11653static xmlParserErrors 11654xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 11655 xmlSAXHandlerPtr sax, 11656 void *user_data, int depth, const xmlChar *URL, 11657 const xmlChar *ID, xmlNodePtr *list) { 11658 xmlParserCtxtPtr ctxt; 11659 xmlDocPtr newDoc; 11660 xmlNodePtr newRoot; 11661 xmlSAXHandlerPtr oldsax = NULL; 11662 xmlParserErrors ret = XML_ERR_OK; 11663 xmlChar start[4]; 11664 xmlCharEncoding enc; 11665 11666 if (depth > 40) { 11667 return(XML_ERR_ENTITY_LOOP); 11668 } 11669 11670 11671 11672 if (list != NULL) 11673 *list = NULL; 11674 if ((URL == NULL) && (ID == NULL)) 11675 return(XML_ERR_INTERNAL_ERROR); 11676 if (doc == NULL) 11677 return(XML_ERR_INTERNAL_ERROR); 11678 11679 11680 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 11681 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11682 ctxt->userData = ctxt; 11683 if (oldctxt != NULL) { 11684 ctxt->_private = oldctxt->_private; 11685 ctxt->loadsubset = oldctxt->loadsubset; 11686 ctxt->validate = oldctxt->validate; 11687 ctxt->external = oldctxt->external; 11688 ctxt->record_info = oldctxt->record_info; 11689 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 11690 ctxt->node_seq.length = oldctxt->node_seq.length; 11691 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 11692 } else { 11693 /* 11694 * Doing validity checking on chunk without context 11695 * doesn't make sense 11696 */ 11697 ctxt->_private = NULL; 11698 ctxt->validate = 0; 11699 ctxt->external = 2; 11700 ctxt->loadsubset = 0; 11701 } 11702 if (sax != NULL) { 11703 oldsax = ctxt->sax; 11704 ctxt->sax = sax; 11705 if (user_data != NULL) 11706 ctxt->userData = user_data; 11707 } 11708 xmlDetectSAX2(ctxt); 11709 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11710 if (newDoc == NULL) { 11711 ctxt->node_seq.maximum = 0; 11712 ctxt->node_seq.length = 0; 11713 ctxt->node_seq.buffer = NULL; 11714 xmlFreeParserCtxt(ctxt); 11715 return(XML_ERR_INTERNAL_ERROR); 11716 } 11717 newDoc->intSubset = doc->intSubset; 11718 newDoc->extSubset = doc->extSubset; 11719 newDoc->dict = doc->dict; 11720 xmlDictReference(newDoc->dict); 11721 11722 if (doc->URL != NULL) { 11723 newDoc->URL = xmlStrdup(doc->URL); 11724 } 11725 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11726 if (newRoot == NULL) { 11727 if (sax != NULL) 11728 ctxt->sax = oldsax; 11729 ctxt->node_seq.maximum = 0; 11730 ctxt->node_seq.length = 0; 11731 ctxt->node_seq.buffer = NULL; 11732 xmlFreeParserCtxt(ctxt); 11733 newDoc->intSubset = NULL; 11734 newDoc->extSubset = NULL; 11735 xmlFreeDoc(newDoc); 11736 return(XML_ERR_INTERNAL_ERROR); 11737 } 11738 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11739 nodePush(ctxt, newDoc->children); 11740 ctxt->myDoc = doc; 11741 newRoot->doc = doc; 11742 11743 /* 11744 * Get the 4 first bytes and decode the charset 11745 * if enc != XML_CHAR_ENCODING_NONE 11746 * plug some encoding conversion routines. 11747 */ 11748 GROW; 11749 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11750 start[0] = RAW; 11751 start[1] = NXT(1); 11752 start[2] = NXT(2); 11753 start[3] = NXT(3); 11754 enc = xmlDetectCharEncoding(start, 4); 11755 if (enc != XML_CHAR_ENCODING_NONE) { 11756 xmlSwitchEncoding(ctxt, enc); 11757 } 11758 } 11759 11760 /* 11761 * Parse a possible text declaration first 11762 */ 11763 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11764 xmlParseTextDecl(ctxt); 11765 } 11766 11767 ctxt->instate = XML_PARSER_CONTENT; 11768 ctxt->depth = depth; 11769 11770 xmlParseContent(ctxt); 11771 11772 if ((RAW == '<') && (NXT(1) == '/')) { 11773 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11774 } else if (RAW != 0) { 11775 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11776 } 11777 if (ctxt->node != newDoc->children) { 11778 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11779 } 11780 11781 if (!ctxt->wellFormed) { 11782 if (ctxt->errNo == 0) 11783 ret = XML_ERR_INTERNAL_ERROR; 11784 else 11785 ret = (xmlParserErrors)ctxt->errNo; 11786 } else { 11787 if (list != NULL) { 11788 xmlNodePtr cur; 11789 11790 /* 11791 * Return the newly created nodeset after unlinking it from 11792 * they pseudo parent. 11793 */ 11794 cur = newDoc->children->children; 11795 *list = cur; 11796 while (cur != NULL) { 11797 cur->parent = NULL; 11798 cur = cur->next; 11799 } 11800 newDoc->children->children = NULL; 11801 } 11802 ret = XML_ERR_OK; 11803 } 11804 if (sax != NULL) 11805 ctxt->sax = oldsax; 11806 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 11807 oldctxt->node_seq.length = ctxt->node_seq.length; 11808 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 11809 ctxt->node_seq.maximum = 0; 11810 ctxt->node_seq.length = 0; 11811 ctxt->node_seq.buffer = NULL; 11812 xmlFreeParserCtxt(ctxt); 11813 newDoc->intSubset = NULL; 11814 newDoc->extSubset = NULL; 11815 xmlFreeDoc(newDoc); 11816 11817 return(ret); 11818} 11819 11820#ifdef LIBXML_SAX1_ENABLED 11821/** 11822 * xmlParseExternalEntity: 11823 * @doc: the document the chunk pertains to 11824 * @sax: the SAX handler bloc (possibly NULL) 11825 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11826 * @depth: Used for loop detection, use 0 11827 * @URL: the URL for the entity to load 11828 * @ID: the System ID for the entity to load 11829 * @lst: the return value for the set of parsed nodes 11830 * 11831 * Parse an external general entity 11832 * An external general parsed entity is well-formed if it matches the 11833 * production labeled extParsedEnt. 11834 * 11835 * [78] extParsedEnt ::= TextDecl? content 11836 * 11837 * Returns 0 if the entity is well formed, -1 in case of args problem and 11838 * the parser error code otherwise 11839 */ 11840 11841int 11842xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 11843 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 11844 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 11845 ID, lst)); 11846} 11847 11848/** 11849 * xmlParseBalancedChunkMemory: 11850 * @doc: the document the chunk pertains to 11851 * @sax: the SAX handler bloc (possibly NULL) 11852 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11853 * @depth: Used for loop detection, use 0 11854 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11855 * @lst: the return value for the set of parsed nodes 11856 * 11857 * Parse a well-balanced chunk of an XML document 11858 * called by the parser 11859 * The allowed sequence for the Well Balanced Chunk is the one defined by 11860 * the content production in the XML grammar: 11861 * 11862 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11863 * 11864 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11865 * the parser error code otherwise 11866 */ 11867 11868int 11869xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11870 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 11871 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 11872 depth, string, lst, 0 ); 11873} 11874#endif /* LIBXML_SAX1_ENABLED */ 11875 11876/** 11877 * xmlParseBalancedChunkMemoryInternal: 11878 * @oldctxt: the existing parsing context 11879 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11880 * @user_data: the user data field for the parser context 11881 * @lst: the return value for the set of parsed nodes 11882 * 11883 * 11884 * Parse a well-balanced chunk of an XML document 11885 * called by the parser 11886 * The allowed sequence for the Well Balanced Chunk is the one defined by 11887 * the content production in the XML grammar: 11888 * 11889 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11890 * 11891 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 11892 * error code otherwise 11893 * 11894 * In case recover is set to 1, the nodelist will not be empty even if 11895 * the parsed chunk is not well balanced. 11896 */ 11897static xmlParserErrors 11898xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 11899 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 11900 xmlParserCtxtPtr ctxt; 11901 xmlDocPtr newDoc = NULL; 11902 xmlNodePtr newRoot; 11903 xmlSAXHandlerPtr oldsax = NULL; 11904 xmlNodePtr content = NULL; 11905 xmlNodePtr last = NULL; 11906 int size; 11907 xmlParserErrors ret = XML_ERR_OK; 11908 11909 if (oldctxt->depth > 40) { 11910 return(XML_ERR_ENTITY_LOOP); 11911 } 11912 11913 11914 if (lst != NULL) 11915 *lst = NULL; 11916 if (string == NULL) 11917 return(XML_ERR_INTERNAL_ERROR); 11918 11919 size = xmlStrlen(string); 11920 11921 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11922 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11923 if (user_data != NULL) 11924 ctxt->userData = user_data; 11925 else 11926 ctxt->userData = ctxt; 11927 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11928 ctxt->dict = oldctxt->dict; 11929 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11930 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11931 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11932 11933 oldsax = ctxt->sax; 11934 ctxt->sax = oldctxt->sax; 11935 xmlDetectSAX2(ctxt); 11936 ctxt->replaceEntities = oldctxt->replaceEntities; 11937 ctxt->options = oldctxt->options; 11938 11939 ctxt->_private = oldctxt->_private; 11940 if (oldctxt->myDoc == NULL) { 11941 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11942 if (newDoc == NULL) { 11943 ctxt->sax = oldsax; 11944 ctxt->dict = NULL; 11945 xmlFreeParserCtxt(ctxt); 11946 return(XML_ERR_INTERNAL_ERROR); 11947 } 11948 newDoc->dict = ctxt->dict; 11949 xmlDictReference(newDoc->dict); 11950 ctxt->myDoc = newDoc; 11951 } else { 11952 ctxt->myDoc = oldctxt->myDoc; 11953 content = ctxt->myDoc->children; 11954 last = ctxt->myDoc->last; 11955 } 11956 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 11957 if (newRoot == NULL) { 11958 ctxt->sax = oldsax; 11959 ctxt->dict = NULL; 11960 xmlFreeParserCtxt(ctxt); 11961 if (newDoc != NULL) { 11962 xmlFreeDoc(newDoc); 11963 } 11964 return(XML_ERR_INTERNAL_ERROR); 11965 } 11966 ctxt->myDoc->children = NULL; 11967 ctxt->myDoc->last = NULL; 11968 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 11969 nodePush(ctxt, ctxt->myDoc->children); 11970 ctxt->instate = XML_PARSER_CONTENT; 11971 ctxt->depth = oldctxt->depth + 1; 11972 11973 ctxt->validate = 0; 11974 ctxt->loadsubset = oldctxt->loadsubset; 11975 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 11976 /* 11977 * ID/IDREF registration will be done in xmlValidateElement below 11978 */ 11979 ctxt->loadsubset |= XML_SKIP_IDS; 11980 } 11981 ctxt->dictNames = oldctxt->dictNames; 11982 ctxt->attsDefault = oldctxt->attsDefault; 11983 ctxt->attsSpecial = oldctxt->attsSpecial; 11984 11985 xmlParseContent(ctxt); 11986 if ((RAW == '<') && (NXT(1) == '/')) { 11987 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11988 } else if (RAW != 0) { 11989 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11990 } 11991 if (ctxt->node != ctxt->myDoc->children) { 11992 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11993 } 11994 11995 if (!ctxt->wellFormed) { 11996 if (ctxt->errNo == 0) 11997 ret = XML_ERR_INTERNAL_ERROR; 11998 else 11999 ret = (xmlParserErrors)ctxt->errNo; 12000 } else { 12001 ret = XML_ERR_OK; 12002 } 12003 12004 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12005 xmlNodePtr cur; 12006 12007 /* 12008 * Return the newly created nodeset after unlinking it from 12009 * they pseudo parent. 12010 */ 12011 cur = ctxt->myDoc->children->children; 12012 *lst = cur; 12013 while (cur != NULL) { 12014#ifdef LIBXML_VALID_ENABLED 12015 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12016 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12017 (cur->type == XML_ELEMENT_NODE)) { 12018 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12019 oldctxt->myDoc, cur); 12020 } 12021#endif /* LIBXML_VALID_ENABLED */ 12022 cur->parent = NULL; 12023 cur = cur->next; 12024 } 12025 ctxt->myDoc->children->children = NULL; 12026 } 12027 if (ctxt->myDoc != NULL) { 12028 xmlFreeNode(ctxt->myDoc->children); 12029 ctxt->myDoc->children = content; 12030 ctxt->myDoc->last = last; 12031 } 12032 12033 ctxt->sax = oldsax; 12034 ctxt->dict = NULL; 12035 ctxt->attsDefault = NULL; 12036 ctxt->attsSpecial = NULL; 12037 xmlFreeParserCtxt(ctxt); 12038 if (newDoc != NULL) { 12039 xmlFreeDoc(newDoc); 12040 } 12041 12042 return(ret); 12043} 12044 12045/** 12046 * xmlParseInNodeContext: 12047 * @node: the context node 12048 * @data: the input string 12049 * @datalen: the input string length in bytes 12050 * @options: a combination of xmlParserOption 12051 * @lst: the return value for the set of parsed nodes 12052 * 12053 * Parse a well-balanced chunk of an XML document 12054 * within the context (DTD, namespaces, etc ...) of the given node. 12055 * 12056 * The allowed sequence for the data is a Well Balanced Chunk defined by 12057 * the content production in the XML grammar: 12058 * 12059 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12060 * 12061 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12062 * error code otherwise 12063 */ 12064xmlParserErrors 12065xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12066 int options, xmlNodePtr *lst) { 12067#ifdef SAX2 12068 xmlParserCtxtPtr ctxt; 12069 xmlDocPtr doc = NULL; 12070 xmlNodePtr fake, cur; 12071 int nsnr = 0; 12072 12073 xmlParserErrors ret = XML_ERR_OK; 12074 12075 /* 12076 * check all input parameters, grab the document 12077 */ 12078 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 12079 return(XML_ERR_INTERNAL_ERROR); 12080 switch (node->type) { 12081 case XML_ELEMENT_NODE: 12082 case XML_ATTRIBUTE_NODE: 12083 case XML_TEXT_NODE: 12084 case XML_CDATA_SECTION_NODE: 12085 case XML_ENTITY_REF_NODE: 12086 case XML_PI_NODE: 12087 case XML_COMMENT_NODE: 12088 case XML_DOCUMENT_NODE: 12089 case XML_HTML_DOCUMENT_NODE: 12090 break; 12091 default: 12092 return(XML_ERR_INTERNAL_ERROR); 12093 12094 } 12095 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 12096 (node->type != XML_DOCUMENT_NODE) && 12097 (node->type != XML_HTML_DOCUMENT_NODE)) 12098 node = node->parent; 12099 if (node == NULL) 12100 return(XML_ERR_INTERNAL_ERROR); 12101 if (node->type == XML_ELEMENT_NODE) 12102 doc = node->doc; 12103 else 12104 doc = (xmlDocPtr) node; 12105 if (doc == NULL) 12106 return(XML_ERR_INTERNAL_ERROR); 12107 12108 /* 12109 * allocate a context and set-up everything not related to the 12110 * node position in the tree 12111 */ 12112 if (doc->type == XML_DOCUMENT_NODE) 12113 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 12114#ifdef LIBXML_HTML_ENABLED 12115 else if (doc->type == XML_HTML_DOCUMENT_NODE) 12116 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 12117#endif 12118 else 12119 return(XML_ERR_INTERNAL_ERROR); 12120 12121 if (ctxt == NULL) 12122 return(XML_ERR_NO_MEMORY); 12123 fake = xmlNewComment(NULL); 12124 if (fake == NULL) { 12125 xmlFreeParserCtxt(ctxt); 12126 return(XML_ERR_NO_MEMORY); 12127 } 12128 xmlAddChild(node, fake); 12129 12130 /* 12131 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 12132 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 12133 * we must wait until the last moment to free the original one. 12134 */ 12135 if (doc->dict != NULL) { 12136 if (ctxt->dict != NULL) 12137 xmlDictFree(ctxt->dict); 12138 ctxt->dict = doc->dict; 12139 } else 12140 options |= XML_PARSE_NODICT; 12141 12142 xmlCtxtUseOptions(ctxt, options); 12143 xmlDetectSAX2(ctxt); 12144 ctxt->myDoc = doc; 12145 12146 if (node->type == XML_ELEMENT_NODE) { 12147 nodePush(ctxt, node); 12148 /* 12149 * initialize the SAX2 namespaces stack 12150 */ 12151 cur = node; 12152 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 12153 xmlNsPtr ns = cur->nsDef; 12154 const xmlChar *iprefix, *ihref; 12155 12156 while (ns != NULL) { 12157 if (ctxt->dict) { 12158 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 12159 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 12160 } else { 12161 iprefix = ns->prefix; 12162 ihref = ns->href; 12163 } 12164 12165 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 12166 nsPush(ctxt, iprefix, ihref); 12167 nsnr++; 12168 } 12169 ns = ns->next; 12170 } 12171 cur = cur->parent; 12172 } 12173 ctxt->instate = XML_PARSER_CONTENT; 12174 } 12175 12176 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 12177 /* 12178 * ID/IDREF registration will be done in xmlValidateElement below 12179 */ 12180 ctxt->loadsubset |= XML_SKIP_IDS; 12181 } 12182 12183#ifdef LIBXML_HTML_ENABLED 12184 if (doc->type == XML_HTML_DOCUMENT_NODE) 12185 __htmlParseContent(ctxt); 12186 else 12187#endif 12188 xmlParseContent(ctxt); 12189 12190 nsPop(ctxt, nsnr); 12191 if ((RAW == '<') && (NXT(1) == '/')) { 12192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12193 } else if (RAW != 0) { 12194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12195 } 12196 if ((ctxt->node != NULL) && (ctxt->node != node)) { 12197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12198 ctxt->wellFormed = 0; 12199 } 12200 12201 if (!ctxt->wellFormed) { 12202 if (ctxt->errNo == 0) 12203 ret = XML_ERR_INTERNAL_ERROR; 12204 else 12205 ret = (xmlParserErrors)ctxt->errNo; 12206 } else { 12207 ret = XML_ERR_OK; 12208 } 12209 12210 /* 12211 * Return the newly created nodeset after unlinking it from 12212 * the pseudo sibling. 12213 */ 12214 12215 cur = fake->next; 12216 fake->next = NULL; 12217 node->last = fake; 12218 12219 if (cur != NULL) { 12220 cur->prev = NULL; 12221 } 12222 12223 *lst = cur; 12224 12225 while (cur != NULL) { 12226 cur->parent = NULL; 12227 cur = cur->next; 12228 } 12229 12230 xmlUnlinkNode(fake); 12231 xmlFreeNode(fake); 12232 12233 12234 if (ret != XML_ERR_OK) { 12235 xmlFreeNodeList(*lst); 12236 *lst = NULL; 12237 } 12238 12239 if (doc->dict != NULL) 12240 ctxt->dict = NULL; 12241 xmlFreeParserCtxt(ctxt); 12242 12243 return(ret); 12244#else /* !SAX2 */ 12245 return(XML_ERR_INTERNAL_ERROR); 12246#endif 12247} 12248 12249#ifdef LIBXML_SAX1_ENABLED 12250/** 12251 * xmlParseBalancedChunkMemoryRecover: 12252 * @doc: the document the chunk pertains to 12253 * @sax: the SAX handler bloc (possibly NULL) 12254 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12255 * @depth: Used for loop detection, use 0 12256 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12257 * @lst: the return value for the set of parsed nodes 12258 * @recover: return nodes even if the data is broken (use 0) 12259 * 12260 * 12261 * Parse a well-balanced chunk of an XML document 12262 * called by the parser 12263 * The allowed sequence for the Well Balanced Chunk is the one defined by 12264 * the content production in the XML grammar: 12265 * 12266 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12267 * 12268 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12269 * the parser error code otherwise 12270 * 12271 * In case recover is set to 1, the nodelist will not be empty even if 12272 * the parsed chunk is not well balanced. 12273 */ 12274int 12275xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12276 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 12277 int recover) { 12278 xmlParserCtxtPtr ctxt; 12279 xmlDocPtr newDoc; 12280 xmlSAXHandlerPtr oldsax = NULL; 12281 xmlNodePtr content, newRoot; 12282 int size; 12283 int ret = 0; 12284 12285 if (depth > 40) { 12286 return(XML_ERR_ENTITY_LOOP); 12287 } 12288 12289 12290 if (lst != NULL) 12291 *lst = NULL; 12292 if (string == NULL) 12293 return(-1); 12294 12295 size = xmlStrlen(string); 12296 12297 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12298 if (ctxt == NULL) return(-1); 12299 ctxt->userData = ctxt; 12300 if (sax != NULL) { 12301 oldsax = ctxt->sax; 12302 ctxt->sax = sax; 12303 if (user_data != NULL) 12304 ctxt->userData = user_data; 12305 } 12306 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12307 if (newDoc == NULL) { 12308 xmlFreeParserCtxt(ctxt); 12309 return(-1); 12310 } 12311 if ((doc != NULL) && (doc->dict != NULL)) { 12312 xmlDictFree(ctxt->dict); 12313 ctxt->dict = doc->dict; 12314 xmlDictReference(ctxt->dict); 12315 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12316 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12317 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12318 ctxt->dictNames = 1; 12319 } else { 12320 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); 12321 } 12322 if (doc != NULL) { 12323 newDoc->intSubset = doc->intSubset; 12324 newDoc->extSubset = doc->extSubset; 12325 } 12326 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12327 if (newRoot == NULL) { 12328 if (sax != NULL) 12329 ctxt->sax = oldsax; 12330 xmlFreeParserCtxt(ctxt); 12331 newDoc->intSubset = NULL; 12332 newDoc->extSubset = NULL; 12333 xmlFreeDoc(newDoc); 12334 return(-1); 12335 } 12336 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12337 nodePush(ctxt, newRoot); 12338 if (doc == NULL) { 12339 ctxt->myDoc = newDoc; 12340 } else { 12341 ctxt->myDoc = newDoc; 12342 newDoc->children->doc = doc; 12343 /* Ensure that doc has XML spec namespace */ 12344 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 12345 newDoc->oldNs = doc->oldNs; 12346 } 12347 ctxt->instate = XML_PARSER_CONTENT; 12348 ctxt->depth = depth; 12349 12350 /* 12351 * Doing validity checking on chunk doesn't make sense 12352 */ 12353 ctxt->validate = 0; 12354 ctxt->loadsubset = 0; 12355 xmlDetectSAX2(ctxt); 12356 12357 if ( doc != NULL ){ 12358 content = doc->children; 12359 doc->children = NULL; 12360 xmlParseContent(ctxt); 12361 doc->children = content; 12362 } 12363 else { 12364 xmlParseContent(ctxt); 12365 } 12366 if ((RAW == '<') && (NXT(1) == '/')) { 12367 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12368 } else if (RAW != 0) { 12369 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12370 } 12371 if (ctxt->node != newDoc->children) { 12372 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12373 } 12374 12375 if (!ctxt->wellFormed) { 12376 if (ctxt->errNo == 0) 12377 ret = 1; 12378 else 12379 ret = ctxt->errNo; 12380 } else { 12381 ret = 0; 12382 } 12383 12384 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 12385 xmlNodePtr cur; 12386 12387 /* 12388 * Return the newly created nodeset after unlinking it from 12389 * they pseudo parent. 12390 */ 12391 cur = newDoc->children->children; 12392 *lst = cur; 12393 while (cur != NULL) { 12394 xmlSetTreeDoc(cur, doc); 12395 cur->parent = NULL; 12396 cur = cur->next; 12397 } 12398 newDoc->children->children = NULL; 12399 } 12400 12401 if (sax != NULL) 12402 ctxt->sax = oldsax; 12403 xmlFreeParserCtxt(ctxt); 12404 newDoc->intSubset = NULL; 12405 newDoc->extSubset = NULL; 12406 newDoc->oldNs = NULL; 12407 xmlFreeDoc(newDoc); 12408 12409 return(ret); 12410} 12411 12412/** 12413 * xmlSAXParseEntity: 12414 * @sax: the SAX handler block 12415 * @filename: the filename 12416 * 12417 * parse an XML external entity out of context and build a tree. 12418 * It use the given SAX function block to handle the parsing callback. 12419 * If sax is NULL, fallback to the default DOM tree building routines. 12420 * 12421 * [78] extParsedEnt ::= TextDecl? content 12422 * 12423 * This correspond to a "Well Balanced" chunk 12424 * 12425 * Returns the resulting document tree 12426 */ 12427 12428xmlDocPtr 12429xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 12430 xmlDocPtr ret; 12431 xmlParserCtxtPtr ctxt; 12432 12433 ctxt = xmlCreateFileParserCtxt(filename); 12434 if (ctxt == NULL) { 12435 return(NULL); 12436 } 12437 if (sax != NULL) { 12438 if (ctxt->sax != NULL) 12439 xmlFree(ctxt->sax); 12440 ctxt->sax = sax; 12441 ctxt->userData = NULL; 12442 } 12443 12444 xmlParseExtParsedEnt(ctxt); 12445 12446 if (ctxt->wellFormed) 12447 ret = ctxt->myDoc; 12448 else { 12449 ret = NULL; 12450 xmlFreeDoc(ctxt->myDoc); 12451 ctxt->myDoc = NULL; 12452 } 12453 if (sax != NULL) 12454 ctxt->sax = NULL; 12455 xmlFreeParserCtxt(ctxt); 12456 12457 return(ret); 12458} 12459 12460/** 12461 * xmlParseEntity: 12462 * @filename: the filename 12463 * 12464 * parse an XML external entity out of context and build a tree. 12465 * 12466 * [78] extParsedEnt ::= TextDecl? content 12467 * 12468 * This correspond to a "Well Balanced" chunk 12469 * 12470 * Returns the resulting document tree 12471 */ 12472 12473xmlDocPtr 12474xmlParseEntity(const char *filename) { 12475 return(xmlSAXParseEntity(NULL, filename)); 12476} 12477#endif /* LIBXML_SAX1_ENABLED */ 12478 12479/** 12480 * xmlCreateEntityParserCtxt: 12481 * @URL: the entity URL 12482 * @ID: the entity PUBLIC ID 12483 * @base: a possible base for the target URI 12484 * 12485 * Create a parser context for an external entity 12486 * Automatic support for ZLIB/Compress compressed document is provided 12487 * by default if found at compile-time. 12488 * 12489 * Returns the new parser context or NULL 12490 */ 12491xmlParserCtxtPtr 12492xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 12493 const xmlChar *base) { 12494 xmlParserCtxtPtr ctxt; 12495 xmlParserInputPtr inputStream; 12496 char *directory = NULL; 12497 xmlChar *uri; 12498 12499 ctxt = xmlNewParserCtxt(); 12500 if (ctxt == NULL) { 12501 return(NULL); 12502 } 12503 12504 uri = xmlBuildURI(URL, base); 12505 12506 if (uri == NULL) { 12507 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 12508 if (inputStream == NULL) { 12509 xmlFreeParserCtxt(ctxt); 12510 return(NULL); 12511 } 12512 12513 inputPush(ctxt, inputStream); 12514 12515 if ((ctxt->directory == NULL) && (directory == NULL)) 12516 directory = xmlParserGetDirectory((char *)URL); 12517 if ((ctxt->directory == NULL) && (directory != NULL)) 12518 ctxt->directory = directory; 12519 } else { 12520 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 12521 if (inputStream == NULL) { 12522 xmlFree(uri); 12523 xmlFreeParserCtxt(ctxt); 12524 return(NULL); 12525 } 12526 12527 inputPush(ctxt, inputStream); 12528 12529 if ((ctxt->directory == NULL) && (directory == NULL)) 12530 directory = xmlParserGetDirectory((char *)uri); 12531 if ((ctxt->directory == NULL) && (directory != NULL)) 12532 ctxt->directory = directory; 12533 xmlFree(uri); 12534 } 12535 return(ctxt); 12536} 12537 12538/************************************************************************ 12539 * * 12540 * Front ends when parsing from a file * 12541 * * 12542 ************************************************************************/ 12543 12544/** 12545 * xmlCreateURLParserCtxt: 12546 * @filename: the filename or URL 12547 * @options: a combination of xmlParserOption 12548 * 12549 * Create a parser context for a file or URL content. 12550 * Automatic support for ZLIB/Compress compressed document is provided 12551 * by default if found at compile-time and for file accesses 12552 * 12553 * Returns the new parser context or NULL 12554 */ 12555xmlParserCtxtPtr 12556xmlCreateURLParserCtxt(const char *filename, int options) 12557{ 12558 xmlParserCtxtPtr ctxt; 12559 xmlParserInputPtr inputStream; 12560 char *directory = NULL; 12561 12562 ctxt = xmlNewParserCtxt(); 12563 if (ctxt == NULL) { 12564 xmlErrMemory(NULL, "cannot allocate parser context"); 12565 return(NULL); 12566 } 12567 12568 if (options) 12569 xmlCtxtUseOptions(ctxt, options); 12570 ctxt->linenumbers = 1; 12571 12572 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 12573 if (inputStream == NULL) { 12574 xmlFreeParserCtxt(ctxt); 12575 return(NULL); 12576 } 12577 12578 inputPush(ctxt, inputStream); 12579 if ((ctxt->directory == NULL) && (directory == NULL)) 12580 directory = xmlParserGetDirectory(filename); 12581 if ((ctxt->directory == NULL) && (directory != NULL)) 12582 ctxt->directory = directory; 12583 12584 return(ctxt); 12585} 12586 12587/** 12588 * xmlCreateFileParserCtxt: 12589 * @filename: the filename 12590 * 12591 * Create a parser context for a file content. 12592 * Automatic support for ZLIB/Compress compressed document is provided 12593 * by default if found at compile-time. 12594 * 12595 * Returns the new parser context or NULL 12596 */ 12597xmlParserCtxtPtr 12598xmlCreateFileParserCtxt(const char *filename) 12599{ 12600 return(xmlCreateURLParserCtxt(filename, 0)); 12601} 12602 12603#ifdef LIBXML_SAX1_ENABLED 12604/** 12605 * xmlSAXParseFileWithData: 12606 * @sax: the SAX handler block 12607 * @filename: the filename 12608 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12609 * documents 12610 * @data: the userdata 12611 * 12612 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12613 * compressed document is provided by default if found at compile-time. 12614 * It use the given SAX function block to handle the parsing callback. 12615 * If sax is NULL, fallback to the default DOM tree building routines. 12616 * 12617 * User data (void *) is stored within the parser context in the 12618 * context's _private member, so it is available nearly everywhere in libxml 12619 * 12620 * Returns the resulting document tree 12621 */ 12622 12623xmlDocPtr 12624xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 12625 int recovery, void *data) { 12626 xmlDocPtr ret; 12627 xmlParserCtxtPtr ctxt; 12628 12629 xmlInitParser(); 12630 12631 ctxt = xmlCreateFileParserCtxt(filename); 12632 if (ctxt == NULL) { 12633 return(NULL); 12634 } 12635 if (sax != NULL) { 12636 if (ctxt->sax != NULL) 12637 xmlFree(ctxt->sax); 12638 ctxt->sax = sax; 12639 } 12640 xmlDetectSAX2(ctxt); 12641 if (data!=NULL) { 12642 ctxt->_private = data; 12643 } 12644 12645 if (ctxt->directory == NULL) 12646 ctxt->directory = xmlParserGetDirectory(filename); 12647 12648 ctxt->recovery = recovery; 12649 12650 xmlParseDocument(ctxt); 12651 12652 if ((ctxt->wellFormed) || recovery) { 12653 ret = ctxt->myDoc; 12654 if (ret != NULL) { 12655 if (ctxt->input->buf->compressed > 0) 12656 ret->compression = 9; 12657 else 12658 ret->compression = ctxt->input->buf->compressed; 12659 } 12660 } 12661 else { 12662 ret = NULL; 12663 xmlFreeDoc(ctxt->myDoc); 12664 ctxt->myDoc = NULL; 12665 } 12666 if (sax != NULL) 12667 ctxt->sax = NULL; 12668 xmlFreeParserCtxt(ctxt); 12669 12670 return(ret); 12671} 12672 12673/** 12674 * xmlSAXParseFile: 12675 * @sax: the SAX handler block 12676 * @filename: the filename 12677 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12678 * documents 12679 * 12680 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12681 * compressed document is provided by default if found at compile-time. 12682 * It use the given SAX function block to handle the parsing callback. 12683 * If sax is NULL, fallback to the default DOM tree building routines. 12684 * 12685 * Returns the resulting document tree 12686 */ 12687 12688xmlDocPtr 12689xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 12690 int recovery) { 12691 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 12692} 12693 12694/** 12695 * xmlRecoverDoc: 12696 * @cur: a pointer to an array of xmlChar 12697 * 12698 * parse an XML in-memory document and build a tree. 12699 * In the case the document is not Well Formed, a tree is built anyway 12700 * 12701 * Returns the resulting document tree 12702 */ 12703 12704xmlDocPtr 12705xmlRecoverDoc(xmlChar *cur) { 12706 return(xmlSAXParseDoc(NULL, cur, 1)); 12707} 12708 12709/** 12710 * xmlParseFile: 12711 * @filename: the filename 12712 * 12713 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12714 * compressed document is provided by default if found at compile-time. 12715 * 12716 * Returns the resulting document tree if the file was wellformed, 12717 * NULL otherwise. 12718 */ 12719 12720xmlDocPtr 12721xmlParseFile(const char *filename) { 12722 return(xmlSAXParseFile(NULL, filename, 0)); 12723} 12724 12725/** 12726 * xmlRecoverFile: 12727 * @filename: the filename 12728 * 12729 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12730 * compressed document is provided by default if found at compile-time. 12731 * In the case the document is not Well Formed, a tree is built anyway 12732 * 12733 * Returns the resulting document tree 12734 */ 12735 12736xmlDocPtr 12737xmlRecoverFile(const char *filename) { 12738 return(xmlSAXParseFile(NULL, filename, 1)); 12739} 12740 12741 12742/** 12743 * xmlSetupParserForBuffer: 12744 * @ctxt: an XML parser context 12745 * @buffer: a xmlChar * buffer 12746 * @filename: a file name 12747 * 12748 * Setup the parser context to parse a new buffer; Clears any prior 12749 * contents from the parser context. The buffer parameter must not be 12750 * NULL, but the filename parameter can be 12751 */ 12752void 12753xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 12754 const char* filename) 12755{ 12756 xmlParserInputPtr input; 12757 12758 if ((ctxt == NULL) || (buffer == NULL)) 12759 return; 12760 12761 input = xmlNewInputStream(ctxt); 12762 if (input == NULL) { 12763 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 12764 xmlClearParserCtxt(ctxt); 12765 return; 12766 } 12767 12768 xmlClearParserCtxt(ctxt); 12769 if (filename != NULL) 12770 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 12771 input->base = buffer; 12772 input->cur = buffer; 12773 input->end = &buffer[xmlStrlen(buffer)]; 12774 inputPush(ctxt, input); 12775} 12776 12777/** 12778 * xmlSAXUserParseFile: 12779 * @sax: a SAX handler 12780 * @user_data: The user data returned on SAX callbacks 12781 * @filename: a file name 12782 * 12783 * parse an XML file and call the given SAX handler routines. 12784 * Automatic support for ZLIB/Compress compressed document is provided 12785 * 12786 * Returns 0 in case of success or a error number otherwise 12787 */ 12788int 12789xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 12790 const char *filename) { 12791 int ret = 0; 12792 xmlParserCtxtPtr ctxt; 12793 12794 ctxt = xmlCreateFileParserCtxt(filename); 12795 if (ctxt == NULL) return -1; 12796 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12797 xmlFree(ctxt->sax); 12798 ctxt->sax = sax; 12799 xmlDetectSAX2(ctxt); 12800 12801 if (user_data != NULL) 12802 ctxt->userData = user_data; 12803 12804 xmlParseDocument(ctxt); 12805 12806 if (ctxt->wellFormed) 12807 ret = 0; 12808 else { 12809 if (ctxt->errNo != 0) 12810 ret = ctxt->errNo; 12811 else 12812 ret = -1; 12813 } 12814 if (sax != NULL) 12815 ctxt->sax = NULL; 12816 if (ctxt->myDoc != NULL) { 12817 xmlFreeDoc(ctxt->myDoc); 12818 ctxt->myDoc = NULL; 12819 } 12820 xmlFreeParserCtxt(ctxt); 12821 12822 return ret; 12823} 12824#endif /* LIBXML_SAX1_ENABLED */ 12825 12826/************************************************************************ 12827 * * 12828 * Front ends when parsing from memory * 12829 * * 12830 ************************************************************************/ 12831 12832/** 12833 * xmlCreateMemoryParserCtxt: 12834 * @buffer: a pointer to a char array 12835 * @size: the size of the array 12836 * 12837 * Create a parser context for an XML in-memory document. 12838 * 12839 * Returns the new parser context or NULL 12840 */ 12841xmlParserCtxtPtr 12842xmlCreateMemoryParserCtxt(const char *buffer, int size) { 12843 xmlParserCtxtPtr ctxt; 12844 xmlParserInputPtr input; 12845 xmlParserInputBufferPtr buf; 12846 12847 if (buffer == NULL) 12848 return(NULL); 12849 if (size <= 0) 12850 return(NULL); 12851 12852 ctxt = xmlNewParserCtxt(); 12853 if (ctxt == NULL) 12854 return(NULL); 12855 12856 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 12857 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12858 if (buf == NULL) { 12859 xmlFreeParserCtxt(ctxt); 12860 return(NULL); 12861 } 12862 12863 input = xmlNewInputStream(ctxt); 12864 if (input == NULL) { 12865 xmlFreeParserInputBuffer(buf); 12866 xmlFreeParserCtxt(ctxt); 12867 return(NULL); 12868 } 12869 12870 input->filename = NULL; 12871 input->buf = buf; 12872 input->base = input->buf->buffer->content; 12873 input->cur = input->buf->buffer->content; 12874 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 12875 12876 inputPush(ctxt, input); 12877 return(ctxt); 12878} 12879 12880#ifdef LIBXML_SAX1_ENABLED 12881/** 12882 * xmlSAXParseMemoryWithData: 12883 * @sax: the SAX handler block 12884 * @buffer: an pointer to a char array 12885 * @size: the size of the array 12886 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12887 * documents 12888 * @data: the userdata 12889 * 12890 * parse an XML in-memory block and use the given SAX function block 12891 * to handle the parsing callback. If sax is NULL, fallback to the default 12892 * DOM tree building routines. 12893 * 12894 * User data (void *) is stored within the parser context in the 12895 * context's _private member, so it is available nearly everywhere in libxml 12896 * 12897 * Returns the resulting document tree 12898 */ 12899 12900xmlDocPtr 12901xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 12902 int size, int recovery, void *data) { 12903 xmlDocPtr ret; 12904 xmlParserCtxtPtr ctxt; 12905 12906 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12907 if (ctxt == NULL) return(NULL); 12908 if (sax != NULL) { 12909 if (ctxt->sax != NULL) 12910 xmlFree(ctxt->sax); 12911 ctxt->sax = sax; 12912 } 12913 xmlDetectSAX2(ctxt); 12914 if (data!=NULL) { 12915 ctxt->_private=data; 12916 } 12917 12918 ctxt->recovery = recovery; 12919 12920 xmlParseDocument(ctxt); 12921 12922 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 12923 else { 12924 ret = NULL; 12925 xmlFreeDoc(ctxt->myDoc); 12926 ctxt->myDoc = NULL; 12927 } 12928 if (sax != NULL) 12929 ctxt->sax = NULL; 12930 xmlFreeParserCtxt(ctxt); 12931 12932 return(ret); 12933} 12934 12935/** 12936 * xmlSAXParseMemory: 12937 * @sax: the SAX handler block 12938 * @buffer: an pointer to a char array 12939 * @size: the size of the array 12940 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 12941 * documents 12942 * 12943 * parse an XML in-memory block and use the given SAX function block 12944 * to handle the parsing callback. If sax is NULL, fallback to the default 12945 * DOM tree building routines. 12946 * 12947 * Returns the resulting document tree 12948 */ 12949xmlDocPtr 12950xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 12951 int size, int recovery) { 12952 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 12953} 12954 12955/** 12956 * xmlParseMemory: 12957 * @buffer: an pointer to a char array 12958 * @size: the size of the array 12959 * 12960 * parse an XML in-memory block and build a tree. 12961 * 12962 * Returns the resulting document tree 12963 */ 12964 12965xmlDocPtr xmlParseMemory(const char *buffer, int size) { 12966 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 12967} 12968 12969/** 12970 * xmlRecoverMemory: 12971 * @buffer: an pointer to a char array 12972 * @size: the size of the array 12973 * 12974 * parse an XML in-memory block and build a tree. 12975 * In the case the document is not Well Formed, a tree is built anyway 12976 * 12977 * Returns the resulting document tree 12978 */ 12979 12980xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 12981 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 12982} 12983 12984/** 12985 * xmlSAXUserParseMemory: 12986 * @sax: a SAX handler 12987 * @user_data: The user data returned on SAX callbacks 12988 * @buffer: an in-memory XML document input 12989 * @size: the length of the XML document in bytes 12990 * 12991 * A better SAX parsing routine. 12992 * parse an XML in-memory buffer and call the given SAX handler routines. 12993 * 12994 * Returns 0 in case of success or a error number otherwise 12995 */ 12996int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 12997 const char *buffer, int size) { 12998 int ret = 0; 12999 xmlParserCtxtPtr ctxt; 13000 13001 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13002 if (ctxt == NULL) return -1; 13003 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13004 xmlFree(ctxt->sax); 13005 ctxt->sax = sax; 13006 xmlDetectSAX2(ctxt); 13007 13008 if (user_data != NULL) 13009 ctxt->userData = user_data; 13010 13011 xmlParseDocument(ctxt); 13012 13013 if (ctxt->wellFormed) 13014 ret = 0; 13015 else { 13016 if (ctxt->errNo != 0) 13017 ret = ctxt->errNo; 13018 else 13019 ret = -1; 13020 } 13021 if (sax != NULL) 13022 ctxt->sax = NULL; 13023 if (ctxt->myDoc != NULL) { 13024 xmlFreeDoc(ctxt->myDoc); 13025 ctxt->myDoc = NULL; 13026 } 13027 xmlFreeParserCtxt(ctxt); 13028 13029 return ret; 13030} 13031#endif /* LIBXML_SAX1_ENABLED */ 13032 13033/** 13034 * xmlCreateDocParserCtxt: 13035 * @cur: a pointer to an array of xmlChar 13036 * 13037 * Creates a parser context for an XML in-memory document. 13038 * 13039 * Returns the new parser context or NULL 13040 */ 13041xmlParserCtxtPtr 13042xmlCreateDocParserCtxt(const xmlChar *cur) { 13043 int len; 13044 13045 if (cur == NULL) 13046 return(NULL); 13047 len = xmlStrlen(cur); 13048 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 13049} 13050 13051#ifdef LIBXML_SAX1_ENABLED 13052/** 13053 * xmlSAXParseDoc: 13054 * @sax: the SAX handler block 13055 * @cur: a pointer to an array of xmlChar 13056 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13057 * documents 13058 * 13059 * parse an XML in-memory document and build a tree. 13060 * It use the given SAX function block to handle the parsing callback. 13061 * If sax is NULL, fallback to the default DOM tree building routines. 13062 * 13063 * Returns the resulting document tree 13064 */ 13065 13066xmlDocPtr 13067xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 13068 xmlDocPtr ret; 13069 xmlParserCtxtPtr ctxt; 13070 xmlSAXHandlerPtr oldsax = NULL; 13071 13072 if (cur == NULL) return(NULL); 13073 13074 13075 ctxt = xmlCreateDocParserCtxt(cur); 13076 if (ctxt == NULL) return(NULL); 13077 if (sax != NULL) { 13078 oldsax = ctxt->sax; 13079 ctxt->sax = sax; 13080 ctxt->userData = NULL; 13081 } 13082 xmlDetectSAX2(ctxt); 13083 13084 xmlParseDocument(ctxt); 13085 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13086 else { 13087 ret = NULL; 13088 xmlFreeDoc(ctxt->myDoc); 13089 ctxt->myDoc = NULL; 13090 } 13091 if (sax != NULL) 13092 ctxt->sax = oldsax; 13093 xmlFreeParserCtxt(ctxt); 13094 13095 return(ret); 13096} 13097 13098/** 13099 * xmlParseDoc: 13100 * @cur: a pointer to an array of xmlChar 13101 * 13102 * parse an XML in-memory document and build a tree. 13103 * 13104 * Returns the resulting document tree 13105 */ 13106 13107xmlDocPtr 13108xmlParseDoc(const xmlChar *cur) { 13109 return(xmlSAXParseDoc(NULL, cur, 0)); 13110} 13111#endif /* LIBXML_SAX1_ENABLED */ 13112 13113#ifdef LIBXML_LEGACY_ENABLED 13114/************************************************************************ 13115 * * 13116 * Specific function to keep track of entities references * 13117 * and used by the XSLT debugger * 13118 * * 13119 ************************************************************************/ 13120 13121static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 13122 13123/** 13124 * xmlAddEntityReference: 13125 * @ent : A valid entity 13126 * @firstNode : A valid first node for children of entity 13127 * @lastNode : A valid last node of children entity 13128 * 13129 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 13130 */ 13131static void 13132xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 13133 xmlNodePtr lastNode) 13134{ 13135 if (xmlEntityRefFunc != NULL) { 13136 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 13137 } 13138} 13139 13140 13141/** 13142 * xmlSetEntityReferenceFunc: 13143 * @func: A valid function 13144 * 13145 * Set the function to call call back when a xml reference has been made 13146 */ 13147void 13148xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 13149{ 13150 xmlEntityRefFunc = func; 13151} 13152#endif /* LIBXML_LEGACY_ENABLED */ 13153 13154/************************************************************************ 13155 * * 13156 * Miscellaneous * 13157 * * 13158 ************************************************************************/ 13159 13160#ifdef LIBXML_XPATH_ENABLED 13161#include <libxml/xpath.h> 13162#endif 13163 13164extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 13165static int xmlParserInitialized = 0; 13166 13167/** 13168 * xmlInitParser: 13169 * 13170 * Initialization function for the XML parser. 13171 * This is not reentrant. Call once before processing in case of 13172 * use in multithreaded programs. 13173 */ 13174 13175void 13176xmlInitParser(void) { 13177 if (xmlParserInitialized != 0) 13178 return; 13179 13180#ifdef LIBXML_THREAD_ENABLED 13181 __xmlGlobalInitMutexLock(); 13182 if (xmlParserInitialized == 0) { 13183#endif 13184 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 13185 (xmlGenericError == NULL)) 13186 initGenericErrorDefaultFunc(NULL); 13187 xmlInitGlobals(); 13188 xmlInitThreads(); 13189 xmlInitMemory(); 13190 xmlInitCharEncodingHandlers(); 13191 xmlDefaultSAXHandlerInit(); 13192 xmlRegisterDefaultInputCallbacks(); 13193#ifdef LIBXML_OUTPUT_ENABLED 13194 xmlRegisterDefaultOutputCallbacks(); 13195#endif /* LIBXML_OUTPUT_ENABLED */ 13196#ifdef LIBXML_HTML_ENABLED 13197 htmlInitAutoClose(); 13198 htmlDefaultSAXHandlerInit(); 13199#endif 13200#ifdef LIBXML_XPATH_ENABLED 13201 xmlXPathInit(); 13202#endif 13203 xmlParserInitialized = 1; 13204#ifdef LIBXML_THREAD_ENABLED 13205 } 13206 __xmlGlobalInitMutexUnlock(); 13207#endif 13208} 13209 13210/** 13211 * xmlCleanupParser: 13212 * 13213 * This function name is somewhat misleading. It does not clean up 13214 * parser state, it cleans up memory allocated by the library itself. 13215 * It is a cleanup function for the XML library. It tries to reclaim all 13216 * related global memory allocated for the library processing. 13217 * It doesn't deallocate any document related memory. One should 13218 * call xmlCleanupParser() only when the process has finished using 13219 * the library and all XML/HTML documents built with it. 13220 * See also xmlInitParser() which has the opposite function of preparing 13221 * the library for operations. 13222 */ 13223 13224void 13225xmlCleanupParser(void) { 13226 if (!xmlParserInitialized) 13227 return; 13228 13229 xmlCleanupCharEncodingHandlers(); 13230#ifdef LIBXML_CATALOG_ENABLED 13231 xmlCatalogCleanup(); 13232#endif 13233 xmlDictCleanup(); 13234 xmlCleanupInputCallbacks(); 13235#ifdef LIBXML_OUTPUT_ENABLED 13236 xmlCleanupOutputCallbacks(); 13237#endif 13238#ifdef LIBXML_SCHEMAS_ENABLED 13239 xmlSchemaCleanupTypes(); 13240 xmlRelaxNGCleanupTypes(); 13241#endif 13242 xmlCleanupGlobals(); 13243 xmlResetLastError(); 13244 xmlCleanupThreads(); /* must be last if called not from the main thread */ 13245 xmlCleanupMemory(); 13246 xmlParserInitialized = 0; 13247} 13248 13249/************************************************************************ 13250 * * 13251 * New set (2.6.0) of simpler and more flexible APIs * 13252 * * 13253 ************************************************************************/ 13254 13255/** 13256 * DICT_FREE: 13257 * @str: a string 13258 * 13259 * Free a string if it is not owned by the "dict" dictionnary in the 13260 * current scope 13261 */ 13262#define DICT_FREE(str) \ 13263 if ((str) && ((!dict) || \ 13264 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 13265 xmlFree((char *)(str)); 13266 13267/** 13268 * xmlCtxtReset: 13269 * @ctxt: an XML parser context 13270 * 13271 * Reset a parser context 13272 */ 13273void 13274xmlCtxtReset(xmlParserCtxtPtr ctxt) 13275{ 13276 xmlParserInputPtr input; 13277 xmlDictPtr dict; 13278 13279 if (ctxt == NULL) 13280 return; 13281 13282 dict = ctxt->dict; 13283 13284 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 13285 xmlFreeInputStream(input); 13286 } 13287 ctxt->inputNr = 0; 13288 ctxt->input = NULL; 13289 13290 ctxt->spaceNr = 0; 13291 if (ctxt->spaceTab != NULL) { 13292 ctxt->spaceTab[0] = -1; 13293 ctxt->space = &ctxt->spaceTab[0]; 13294 } else { 13295 ctxt->space = NULL; 13296 } 13297 13298 13299 ctxt->nodeNr = 0; 13300 ctxt->node = NULL; 13301 13302 ctxt->nameNr = 0; 13303 ctxt->name = NULL; 13304 13305 DICT_FREE(ctxt->version); 13306 ctxt->version = NULL; 13307 DICT_FREE(ctxt->encoding); 13308 ctxt->encoding = NULL; 13309 DICT_FREE(ctxt->directory); 13310 ctxt->directory = NULL; 13311 DICT_FREE(ctxt->extSubURI); 13312 ctxt->extSubURI = NULL; 13313 DICT_FREE(ctxt->extSubSystem); 13314 ctxt->extSubSystem = NULL; 13315 if (ctxt->myDoc != NULL) 13316 xmlFreeDoc(ctxt->myDoc); 13317 ctxt->myDoc = NULL; 13318 13319 ctxt->standalone = -1; 13320 ctxt->hasExternalSubset = 0; 13321 ctxt->hasPErefs = 0; 13322 ctxt->html = 0; 13323 ctxt->external = 0; 13324 ctxt->instate = XML_PARSER_START; 13325 ctxt->token = 0; 13326 13327 ctxt->wellFormed = 1; 13328 ctxt->nsWellFormed = 1; 13329 ctxt->disableSAX = 0; 13330 ctxt->valid = 1; 13331#if 0 13332 ctxt->vctxt.userData = ctxt; 13333 ctxt->vctxt.error = xmlParserValidityError; 13334 ctxt->vctxt.warning = xmlParserValidityWarning; 13335#endif 13336 ctxt->record_info = 0; 13337 ctxt->nbChars = 0; 13338 ctxt->checkIndex = 0; 13339 ctxt->inSubset = 0; 13340 ctxt->errNo = XML_ERR_OK; 13341 ctxt->depth = 0; 13342 ctxt->charset = XML_CHAR_ENCODING_UTF8; 13343 ctxt->catalogs = NULL; 13344 xmlInitNodeInfoSeq(&ctxt->node_seq); 13345 13346 if (ctxt->attsDefault != NULL) { 13347 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 13348 ctxt->attsDefault = NULL; 13349 } 13350 if (ctxt->attsSpecial != NULL) { 13351 xmlHashFree(ctxt->attsSpecial, NULL); 13352 ctxt->attsSpecial = NULL; 13353 } 13354 13355#ifdef LIBXML_CATALOG_ENABLED 13356 if (ctxt->catalogs != NULL) 13357 xmlCatalogFreeLocal(ctxt->catalogs); 13358#endif 13359 if (ctxt->lastError.code != XML_ERR_OK) 13360 xmlResetError(&ctxt->lastError); 13361} 13362 13363/** 13364 * xmlCtxtResetPush: 13365 * @ctxt: an XML parser context 13366 * @chunk: a pointer to an array of chars 13367 * @size: number of chars in the array 13368 * @filename: an optional file name or URI 13369 * @encoding: the document encoding, or NULL 13370 * 13371 * Reset a push parser context 13372 * 13373 * Returns 0 in case of success and 1 in case of error 13374 */ 13375int 13376xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 13377 int size, const char *filename, const char *encoding) 13378{ 13379 xmlParserInputPtr inputStream; 13380 xmlParserInputBufferPtr buf; 13381 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 13382 13383 if (ctxt == NULL) 13384 return(1); 13385 13386 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 13387 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 13388 13389 buf = xmlAllocParserInputBuffer(enc); 13390 if (buf == NULL) 13391 return(1); 13392 13393 if (ctxt == NULL) { 13394 xmlFreeParserInputBuffer(buf); 13395 return(1); 13396 } 13397 13398 xmlCtxtReset(ctxt); 13399 13400 if (ctxt->pushTab == NULL) { 13401 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 13402 sizeof(xmlChar *)); 13403 if (ctxt->pushTab == NULL) { 13404 xmlErrMemory(ctxt, NULL); 13405 xmlFreeParserInputBuffer(buf); 13406 return(1); 13407 } 13408 } 13409 13410 if (filename == NULL) { 13411 ctxt->directory = NULL; 13412 } else { 13413 ctxt->directory = xmlParserGetDirectory(filename); 13414 } 13415 13416 inputStream = xmlNewInputStream(ctxt); 13417 if (inputStream == NULL) { 13418 xmlFreeParserInputBuffer(buf); 13419 return(1); 13420 } 13421 13422 if (filename == NULL) 13423 inputStream->filename = NULL; 13424 else 13425 inputStream->filename = (char *) 13426 xmlCanonicPath((const xmlChar *) filename); 13427 inputStream->buf = buf; 13428 inputStream->base = inputStream->buf->buffer->content; 13429 inputStream->cur = inputStream->buf->buffer->content; 13430 inputStream->end = 13431 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 13432 13433 inputPush(ctxt, inputStream); 13434 13435 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 13436 (ctxt->input->buf != NULL)) { 13437 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 13438 int cur = ctxt->input->cur - ctxt->input->base; 13439 13440 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 13441 13442 ctxt->input->base = ctxt->input->buf->buffer->content + base; 13443 ctxt->input->cur = ctxt->input->base + cur; 13444 ctxt->input->end = 13445 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 13446 use]; 13447#ifdef DEBUG_PUSH 13448 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 13449#endif 13450 } 13451 13452 if (encoding != NULL) { 13453 xmlCharEncodingHandlerPtr hdlr; 13454 13455 hdlr = xmlFindCharEncodingHandler(encoding); 13456 if (hdlr != NULL) { 13457 xmlSwitchToEncoding(ctxt, hdlr); 13458 } else { 13459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 13460 "Unsupported encoding %s\n", BAD_CAST encoding); 13461 } 13462 } else if (enc != XML_CHAR_ENCODING_NONE) { 13463 xmlSwitchEncoding(ctxt, enc); 13464 } 13465 13466 return(0); 13467} 13468 13469/** 13470 * xmlCtxtUseOptions: 13471 * @ctxt: an XML parser context 13472 * @options: a combination of xmlParserOption 13473 * 13474 * Applies the options to the parser context 13475 * 13476 * Returns 0 in case of success, the set of unknown or unimplemented options 13477 * in case of error. 13478 */ 13479int 13480xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 13481{ 13482 if (ctxt == NULL) 13483 return(-1); 13484 if (options & XML_PARSE_RECOVER) { 13485 ctxt->recovery = 1; 13486 options -= XML_PARSE_RECOVER; 13487 } else 13488 ctxt->recovery = 0; 13489 if (options & XML_PARSE_DTDLOAD) { 13490 ctxt->loadsubset = XML_DETECT_IDS; 13491 options -= XML_PARSE_DTDLOAD; 13492 } else 13493 ctxt->loadsubset = 0; 13494 if (options & XML_PARSE_DTDATTR) { 13495 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 13496 options -= XML_PARSE_DTDATTR; 13497 } 13498 if (options & XML_PARSE_NOENT) { 13499 ctxt->replaceEntities = 1; 13500 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 13501 options -= XML_PARSE_NOENT; 13502 } else 13503 ctxt->replaceEntities = 0; 13504 if (options & XML_PARSE_PEDANTIC) { 13505 ctxt->pedantic = 1; 13506 options -= XML_PARSE_PEDANTIC; 13507 } else 13508 ctxt->pedantic = 0; 13509 if (options & XML_PARSE_NOBLANKS) { 13510 ctxt->keepBlanks = 0; 13511 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 13512 options -= XML_PARSE_NOBLANKS; 13513 } else 13514 ctxt->keepBlanks = 1; 13515 if (options & XML_PARSE_DTDVALID) { 13516 ctxt->validate = 1; 13517 if (options & XML_PARSE_NOWARNING) 13518 ctxt->vctxt.warning = NULL; 13519 if (options & XML_PARSE_NOERROR) 13520 ctxt->vctxt.error = NULL; 13521 options -= XML_PARSE_DTDVALID; 13522 } else 13523 ctxt->validate = 0; 13524 if (options & XML_PARSE_NOWARNING) { 13525 ctxt->sax->warning = NULL; 13526 options -= XML_PARSE_NOWARNING; 13527 } 13528 if (options & XML_PARSE_NOERROR) { 13529 ctxt->sax->error = NULL; 13530 ctxt->sax->fatalError = NULL; 13531 options -= XML_PARSE_NOERROR; 13532 } 13533#ifdef LIBXML_SAX1_ENABLED 13534 if (options & XML_PARSE_SAX1) { 13535 ctxt->sax->startElement = xmlSAX2StartElement; 13536 ctxt->sax->endElement = xmlSAX2EndElement; 13537 ctxt->sax->startElementNs = NULL; 13538 ctxt->sax->endElementNs = NULL; 13539 ctxt->sax->initialized = 1; 13540 options -= XML_PARSE_SAX1; 13541 } 13542#endif /* LIBXML_SAX1_ENABLED */ 13543 if (options & XML_PARSE_NODICT) { 13544 ctxt->dictNames = 0; 13545 options -= XML_PARSE_NODICT; 13546 } else { 13547 ctxt->dictNames = 1; 13548 } 13549 if (options & XML_PARSE_NOCDATA) { 13550 ctxt->sax->cdataBlock = NULL; 13551 options -= XML_PARSE_NOCDATA; 13552 } 13553 if (options & XML_PARSE_NSCLEAN) { 13554 ctxt->options |= XML_PARSE_NSCLEAN; 13555 options -= XML_PARSE_NSCLEAN; 13556 } 13557 if (options & XML_PARSE_NONET) { 13558 ctxt->options |= XML_PARSE_NONET; 13559 options -= XML_PARSE_NONET; 13560 } 13561 if (options & XML_PARSE_COMPACT) { 13562 ctxt->options |= XML_PARSE_COMPACT; 13563 options -= XML_PARSE_COMPACT; 13564 } 13565 ctxt->linenumbers = 1; 13566 return (options); 13567} 13568 13569/** 13570 * xmlDoRead: 13571 * @ctxt: an XML parser context 13572 * @URL: the base URL to use for the document 13573 * @encoding: the document encoding, or NULL 13574 * @options: a combination of xmlParserOption 13575 * @reuse: keep the context for reuse 13576 * 13577 * Common front-end for the xmlRead functions 13578 * 13579 * Returns the resulting document tree or NULL 13580 */ 13581static xmlDocPtr 13582xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 13583 int options, int reuse) 13584{ 13585 xmlDocPtr ret; 13586 13587 xmlCtxtUseOptions(ctxt, options); 13588 if (encoding != NULL) { 13589 xmlCharEncodingHandlerPtr hdlr; 13590 13591 hdlr = xmlFindCharEncodingHandler(encoding); 13592 if (hdlr != NULL) 13593 xmlSwitchToEncoding(ctxt, hdlr); 13594 } 13595 if ((URL != NULL) && (ctxt->input != NULL) && 13596 (ctxt->input->filename == NULL)) 13597 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 13598 xmlParseDocument(ctxt); 13599 if ((ctxt->wellFormed) || ctxt->recovery) 13600 ret = ctxt->myDoc; 13601 else { 13602 ret = NULL; 13603 if (ctxt->myDoc != NULL) { 13604 xmlFreeDoc(ctxt->myDoc); 13605 } 13606 } 13607 ctxt->myDoc = NULL; 13608 if (!reuse) { 13609 xmlFreeParserCtxt(ctxt); 13610 } 13611 13612 return (ret); 13613} 13614 13615/** 13616 * xmlReadDoc: 13617 * @cur: a pointer to a zero terminated string 13618 * @URL: the base URL to use for the document 13619 * @encoding: the document encoding, or NULL 13620 * @options: a combination of xmlParserOption 13621 * 13622 * parse an XML in-memory document and build a tree. 13623 * 13624 * Returns the resulting document tree 13625 */ 13626xmlDocPtr 13627xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 13628{ 13629 xmlParserCtxtPtr ctxt; 13630 13631 if (cur == NULL) 13632 return (NULL); 13633 13634 ctxt = xmlCreateDocParserCtxt(cur); 13635 if (ctxt == NULL) 13636 return (NULL); 13637 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13638} 13639 13640/** 13641 * xmlReadFile: 13642 * @filename: a file or URL 13643 * @encoding: the document encoding, or NULL 13644 * @options: a combination of xmlParserOption 13645 * 13646 * parse an XML file from the filesystem or the network. 13647 * 13648 * Returns the resulting document tree 13649 */ 13650xmlDocPtr 13651xmlReadFile(const char *filename, const char *encoding, int options) 13652{ 13653 xmlParserCtxtPtr ctxt; 13654 13655 ctxt = xmlCreateURLParserCtxt(filename, options); 13656 if (ctxt == NULL) 13657 return (NULL); 13658 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 13659} 13660 13661/** 13662 * xmlReadMemory: 13663 * @buffer: a pointer to a char array 13664 * @size: the size of the array 13665 * @URL: the base URL to use for the document 13666 * @encoding: the document encoding, or NULL 13667 * @options: a combination of xmlParserOption 13668 * 13669 * parse an XML in-memory document and build a tree. 13670 * 13671 * Returns the resulting document tree 13672 */ 13673xmlDocPtr 13674xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 13675{ 13676 xmlParserCtxtPtr ctxt; 13677 13678 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13679 if (ctxt == NULL) 13680 return (NULL); 13681 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13682} 13683 13684/** 13685 * xmlReadFd: 13686 * @fd: an open file descriptor 13687 * @URL: the base URL to use for the document 13688 * @encoding: the document encoding, or NULL 13689 * @options: a combination of xmlParserOption 13690 * 13691 * parse an XML from a file descriptor and build a tree. 13692 * NOTE that the file descriptor will not be closed when the 13693 * reader is closed or reset. 13694 * 13695 * Returns the resulting document tree 13696 */ 13697xmlDocPtr 13698xmlReadFd(int fd, const char *URL, const char *encoding, int options) 13699{ 13700 xmlParserCtxtPtr ctxt; 13701 xmlParserInputBufferPtr input; 13702 xmlParserInputPtr stream; 13703 13704 if (fd < 0) 13705 return (NULL); 13706 13707 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13708 if (input == NULL) 13709 return (NULL); 13710 input->closecallback = NULL; 13711 ctxt = xmlNewParserCtxt(); 13712 if (ctxt == NULL) { 13713 xmlFreeParserInputBuffer(input); 13714 return (NULL); 13715 } 13716 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13717 if (stream == NULL) { 13718 xmlFreeParserInputBuffer(input); 13719 xmlFreeParserCtxt(ctxt); 13720 return (NULL); 13721 } 13722 inputPush(ctxt, stream); 13723 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13724} 13725 13726/** 13727 * xmlReadIO: 13728 * @ioread: an I/O read function 13729 * @ioclose: an I/O close function 13730 * @ioctx: an I/O handler 13731 * @URL: the base URL to use for the document 13732 * @encoding: the document encoding, or NULL 13733 * @options: a combination of xmlParserOption 13734 * 13735 * parse an XML document from I/O functions and source and build a tree. 13736 * 13737 * Returns the resulting document tree 13738 */ 13739xmlDocPtr 13740xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 13741 void *ioctx, const char *URL, const char *encoding, int options) 13742{ 13743 xmlParserCtxtPtr ctxt; 13744 xmlParserInputBufferPtr input; 13745 xmlParserInputPtr stream; 13746 13747 if (ioread == NULL) 13748 return (NULL); 13749 13750 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13751 XML_CHAR_ENCODING_NONE); 13752 if (input == NULL) 13753 return (NULL); 13754 ctxt = xmlNewParserCtxt(); 13755 if (ctxt == NULL) { 13756 xmlFreeParserInputBuffer(input); 13757 return (NULL); 13758 } 13759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13760 if (stream == NULL) { 13761 xmlFreeParserInputBuffer(input); 13762 xmlFreeParserCtxt(ctxt); 13763 return (NULL); 13764 } 13765 inputPush(ctxt, stream); 13766 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13767} 13768 13769/** 13770 * xmlCtxtReadDoc: 13771 * @ctxt: an XML parser context 13772 * @cur: a pointer to a zero terminated string 13773 * @URL: the base URL to use for the document 13774 * @encoding: the document encoding, or NULL 13775 * @options: a combination of xmlParserOption 13776 * 13777 * parse an XML in-memory document and build a tree. 13778 * This reuses the existing @ctxt parser context 13779 * 13780 * Returns the resulting document tree 13781 */ 13782xmlDocPtr 13783xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 13784 const char *URL, const char *encoding, int options) 13785{ 13786 xmlParserInputPtr stream; 13787 13788 if (cur == NULL) 13789 return (NULL); 13790 if (ctxt == NULL) 13791 return (NULL); 13792 13793 xmlCtxtReset(ctxt); 13794 13795 stream = xmlNewStringInputStream(ctxt, cur); 13796 if (stream == NULL) { 13797 return (NULL); 13798 } 13799 inputPush(ctxt, stream); 13800 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13801} 13802 13803/** 13804 * xmlCtxtReadFile: 13805 * @ctxt: an XML parser context 13806 * @filename: a file or URL 13807 * @encoding: the document encoding, or NULL 13808 * @options: a combination of xmlParserOption 13809 * 13810 * parse an XML file from the filesystem or the network. 13811 * This reuses the existing @ctxt parser context 13812 * 13813 * Returns the resulting document tree 13814 */ 13815xmlDocPtr 13816xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 13817 const char *encoding, int options) 13818{ 13819 xmlParserInputPtr stream; 13820 13821 if (filename == NULL) 13822 return (NULL); 13823 if (ctxt == NULL) 13824 return (NULL); 13825 13826 xmlCtxtReset(ctxt); 13827 13828 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 13829 if (stream == NULL) { 13830 return (NULL); 13831 } 13832 inputPush(ctxt, stream); 13833 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 13834} 13835 13836/** 13837 * xmlCtxtReadMemory: 13838 * @ctxt: an XML parser context 13839 * @buffer: a pointer to a char array 13840 * @size: the size of the array 13841 * @URL: the base URL to use for the document 13842 * @encoding: the document encoding, or NULL 13843 * @options: a combination of xmlParserOption 13844 * 13845 * parse an XML in-memory document and build a tree. 13846 * This reuses the existing @ctxt parser context 13847 * 13848 * Returns the resulting document tree 13849 */ 13850xmlDocPtr 13851xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 13852 const char *URL, const char *encoding, int options) 13853{ 13854 xmlParserInputBufferPtr input; 13855 xmlParserInputPtr stream; 13856 13857 if (ctxt == NULL) 13858 return (NULL); 13859 if (buffer == NULL) 13860 return (NULL); 13861 13862 xmlCtxtReset(ctxt); 13863 13864 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13865 if (input == NULL) { 13866 return(NULL); 13867 } 13868 13869 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13870 if (stream == NULL) { 13871 xmlFreeParserInputBuffer(input); 13872 return(NULL); 13873 } 13874 13875 inputPush(ctxt, stream); 13876 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13877} 13878 13879/** 13880 * xmlCtxtReadFd: 13881 * @ctxt: an XML parser context 13882 * @fd: an open file descriptor 13883 * @URL: the base URL to use for the document 13884 * @encoding: the document encoding, or NULL 13885 * @options: a combination of xmlParserOption 13886 * 13887 * parse an XML from a file descriptor and build a tree. 13888 * This reuses the existing @ctxt parser context 13889 * NOTE that the file descriptor will not be closed when the 13890 * reader is closed or reset. 13891 * 13892 * Returns the resulting document tree 13893 */ 13894xmlDocPtr 13895xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 13896 const char *URL, const char *encoding, int options) 13897{ 13898 xmlParserInputBufferPtr input; 13899 xmlParserInputPtr stream; 13900 13901 if (fd < 0) 13902 return (NULL); 13903 if (ctxt == NULL) 13904 return (NULL); 13905 13906 xmlCtxtReset(ctxt); 13907 13908 13909 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13910 if (input == NULL) 13911 return (NULL); 13912 input->closecallback = NULL; 13913 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13914 if (stream == NULL) { 13915 xmlFreeParserInputBuffer(input); 13916 return (NULL); 13917 } 13918 inputPush(ctxt, stream); 13919 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13920} 13921 13922/** 13923 * xmlCtxtReadIO: 13924 * @ctxt: an XML parser context 13925 * @ioread: an I/O read function 13926 * @ioclose: an I/O close function 13927 * @ioctx: an I/O handler 13928 * @URL: the base URL to use for the document 13929 * @encoding: the document encoding, or NULL 13930 * @options: a combination of xmlParserOption 13931 * 13932 * parse an XML document from I/O functions and source and build a tree. 13933 * This reuses the existing @ctxt parser context 13934 * 13935 * Returns the resulting document tree 13936 */ 13937xmlDocPtr 13938xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 13939 xmlInputCloseCallback ioclose, void *ioctx, 13940 const char *URL, 13941 const char *encoding, int options) 13942{ 13943 xmlParserInputBufferPtr input; 13944 xmlParserInputPtr stream; 13945 13946 if (ioread == NULL) 13947 return (NULL); 13948 if (ctxt == NULL) 13949 return (NULL); 13950 13951 xmlCtxtReset(ctxt); 13952 13953 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13954 XML_CHAR_ENCODING_NONE); 13955 if (input == NULL) 13956 return (NULL); 13957 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13958 if (stream == NULL) { 13959 xmlFreeParserInputBuffer(input); 13960 return (NULL); 13961 } 13962 inputPush(ctxt, stream); 13963 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13964} 13965 13966#define bottom_parser 13967#include "elfgcchack.h" 13968