parser.c revision 770075b500b153cdedc7cc00f085fd78dbe489c7
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60 61#ifdef HAVE_CTYPE_H 62#include <ctype.h> 63#endif 64#ifdef HAVE_STDLIB_H 65#include <stdlib.h> 66#endif 67#ifdef HAVE_SYS_STAT_H 68#include <sys/stat.h> 69#endif 70#ifdef HAVE_FCNTL_H 71#include <fcntl.h> 72#endif 73#ifdef HAVE_UNISTD_H 74#include <unistd.h> 75#endif 76#ifdef HAVE_ZLIB_H 77#include <zlib.h> 78#endif 79 80/** 81 * xmlParserMaxDepth: 82 * 83 * arbitrary depth limit for the XML documents that we allow to 84 * process. This is not a limitation of the parser but a safety 85 * boundary feature. 86 */ 87unsigned int xmlParserMaxDepth = 1024; 88 89#define SAX2 1 90 91#define XML_PARSER_BIG_BUFFER_SIZE 300 92#define XML_PARSER_BUFFER_SIZE 100 93 94#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 95 96/* 97 * List of XML prefixed PI allowed by W3C specs 98 */ 99 100static const char *xmlW3CPIs[] = { 101 "xml-stylesheet", 102 NULL 103}; 104 105 106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 108 const xmlChar **str); 109 110static xmlParserErrors 111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 112 xmlSAXHandlerPtr sax, 113 void *user_data, int depth, const xmlChar *URL, 114 const xmlChar *ID, xmlNodePtr *list); 115 116#ifdef LIBXML_LEGACY_ENABLED 117static void 118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 119 xmlNodePtr lastNode); 120#endif /* LIBXML_LEGACY_ENABLED */ 121 122static xmlParserErrors 123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 124 const xmlChar *string, void *user_data, xmlNodePtr *lst); 125 126/************************************************************************ 127 * * 128 * Some factorized error routines * 129 * * 130 ************************************************************************/ 131 132/** 133 * xmlErrAttributeDup: 134 * @ctxt: an XML parser context 135 * @prefix: the attribute prefix 136 * @localname: the attribute localname 137 * 138 * Handle a redefinition of attribute error 139 */ 140static void 141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 142 const xmlChar * localname) 143{ 144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145 (ctxt->instate == XML_PARSER_EOF)) 146 return; 147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 148 if (prefix == NULL) 149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 150 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 151 (const char *) localname, NULL, NULL, 0, 0, 152 "Attribute %s redefined\n", localname); 153 else 154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 155 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 156 (const char *) prefix, (const char *) localname, 157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 158 localname); 159 ctxt->wellFormed = 0; 160 if (ctxt->recovery == 0) 161 ctxt->disableSAX = 1; 162} 163 164/** 165 * xmlFatalErr: 166 * @ctxt: an XML parser context 167 * @error: the error number 168 * @extra: extra information string 169 * 170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 171 */ 172static void 173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 174{ 175 const char *errmsg; 176 177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 178 (ctxt->instate == XML_PARSER_EOF)) 179 return; 180 switch (error) { 181 case XML_ERR_INVALID_HEX_CHARREF: 182 errmsg = "CharRef: invalid hexadecimal value\n"; 183 break; 184 case XML_ERR_INVALID_DEC_CHARREF: 185 errmsg = "CharRef: invalid decimal value\n"; 186 break; 187 case XML_ERR_INVALID_CHARREF: 188 errmsg = "CharRef: invalid value\n"; 189 break; 190 case XML_ERR_INTERNAL_ERROR: 191 errmsg = "internal error"; 192 break; 193 case XML_ERR_PEREF_AT_EOF: 194 errmsg = "PEReference at end of document\n"; 195 break; 196 case XML_ERR_PEREF_IN_PROLOG: 197 errmsg = "PEReference in prolog\n"; 198 break; 199 case XML_ERR_PEREF_IN_EPILOG: 200 errmsg = "PEReference in epilog\n"; 201 break; 202 case XML_ERR_PEREF_NO_NAME: 203 errmsg = "PEReference: no name\n"; 204 break; 205 case XML_ERR_PEREF_SEMICOL_MISSING: 206 errmsg = "PEReference: expecting ';'\n"; 207 break; 208 case XML_ERR_ENTITY_LOOP: 209 errmsg = "Detected an entity reference loop\n"; 210 break; 211 case XML_ERR_ENTITY_NOT_STARTED: 212 errmsg = "EntityValue: \" or ' expected\n"; 213 break; 214 case XML_ERR_ENTITY_PE_INTERNAL: 215 errmsg = "PEReferences forbidden in internal subset\n"; 216 break; 217 case XML_ERR_ENTITY_NOT_FINISHED: 218 errmsg = "EntityValue: \" or ' expected\n"; 219 break; 220 case XML_ERR_ATTRIBUTE_NOT_STARTED: 221 errmsg = "AttValue: \" or ' expected\n"; 222 break; 223 case XML_ERR_LT_IN_ATTRIBUTE: 224 errmsg = "Unescaped '<' not allowed in attributes values\n"; 225 break; 226 case XML_ERR_LITERAL_NOT_STARTED: 227 errmsg = "SystemLiteral \" or ' expected\n"; 228 break; 229 case XML_ERR_LITERAL_NOT_FINISHED: 230 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 231 break; 232 case XML_ERR_MISPLACED_CDATA_END: 233 errmsg = "Sequence ']]>' not allowed in content\n"; 234 break; 235 case XML_ERR_URI_REQUIRED: 236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 237 break; 238 case XML_ERR_PUBID_REQUIRED: 239 errmsg = "PUBLIC, the Public Identifier is missing\n"; 240 break; 241 case XML_ERR_HYPHEN_IN_COMMENT: 242 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 243 break; 244 case XML_ERR_PI_NOT_STARTED: 245 errmsg = "xmlParsePI : no target name\n"; 246 break; 247 case XML_ERR_RESERVED_XML_NAME: 248 errmsg = "Invalid PI name\n"; 249 break; 250 case XML_ERR_NOTATION_NOT_STARTED: 251 errmsg = "NOTATION: Name expected here\n"; 252 break; 253 case XML_ERR_NOTATION_NOT_FINISHED: 254 errmsg = "'>' required to close NOTATION declaration\n"; 255 break; 256 case XML_ERR_VALUE_REQUIRED: 257 errmsg = "Entity value required\n"; 258 break; 259 case XML_ERR_URI_FRAGMENT: 260 errmsg = "Fragment not allowed"; 261 break; 262 case XML_ERR_ATTLIST_NOT_STARTED: 263 errmsg = "'(' required to start ATTLIST enumeration\n"; 264 break; 265 case XML_ERR_NMTOKEN_REQUIRED: 266 errmsg = "NmToken expected in ATTLIST enumeration\n"; 267 break; 268 case XML_ERR_ATTLIST_NOT_FINISHED: 269 errmsg = "')' required to finish ATTLIST enumeration\n"; 270 break; 271 case XML_ERR_MIXED_NOT_STARTED: 272 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 273 break; 274 case XML_ERR_PCDATA_REQUIRED: 275 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 276 break; 277 case XML_ERR_ELEMCONTENT_NOT_STARTED: 278 errmsg = "ContentDecl : Name or '(' expected\n"; 279 break; 280 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 281 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 282 break; 283 case XML_ERR_PEREF_IN_INT_SUBSET: 284 errmsg = 285 "PEReference: forbidden within markup decl in internal subset\n"; 286 break; 287 case XML_ERR_GT_REQUIRED: 288 errmsg = "expected '>'\n"; 289 break; 290 case XML_ERR_CONDSEC_INVALID: 291 errmsg = "XML conditional section '[' expected\n"; 292 break; 293 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 294 errmsg = "Content error in the external subset\n"; 295 break; 296 case XML_ERR_CONDSEC_INVALID_KEYWORD: 297 errmsg = 298 "conditional section INCLUDE or IGNORE keyword expected\n"; 299 break; 300 case XML_ERR_CONDSEC_NOT_FINISHED: 301 errmsg = "XML conditional section not closed\n"; 302 break; 303 case XML_ERR_XMLDECL_NOT_STARTED: 304 errmsg = "Text declaration '<?xml' required\n"; 305 break; 306 case XML_ERR_XMLDECL_NOT_FINISHED: 307 errmsg = "parsing XML declaration: '?>' expected\n"; 308 break; 309 case XML_ERR_EXT_ENTITY_STANDALONE: 310 errmsg = "external parsed entities cannot be standalone\n"; 311 break; 312 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 313 errmsg = "EntityRef: expecting ';'\n"; 314 break; 315 case XML_ERR_DOCTYPE_NOT_FINISHED: 316 errmsg = "DOCTYPE improperly terminated\n"; 317 break; 318 case XML_ERR_LTSLASH_REQUIRED: 319 errmsg = "EndTag: '</' not found\n"; 320 break; 321 case XML_ERR_EQUAL_REQUIRED: 322 errmsg = "expected '='\n"; 323 break; 324 case XML_ERR_STRING_NOT_CLOSED: 325 errmsg = "String not closed expecting \" or '\n"; 326 break; 327 case XML_ERR_STRING_NOT_STARTED: 328 errmsg = "String not started expecting ' or \"\n"; 329 break; 330 case XML_ERR_ENCODING_NAME: 331 errmsg = "Invalid XML encoding name\n"; 332 break; 333 case XML_ERR_STANDALONE_VALUE: 334 errmsg = "standalone accepts only 'yes' or 'no'\n"; 335 break; 336 case XML_ERR_DOCUMENT_EMPTY: 337 errmsg = "Document is empty\n"; 338 break; 339 case XML_ERR_DOCUMENT_END: 340 errmsg = "Extra content at the end of the document\n"; 341 break; 342 case XML_ERR_NOT_WELL_BALANCED: 343 errmsg = "chunk is not well balanced\n"; 344 break; 345 case XML_ERR_EXTRA_CONTENT: 346 errmsg = "extra content at the end of well balanced chunk\n"; 347 break; 348 case XML_ERR_VERSION_MISSING: 349 errmsg = "Malformed declaration expecting version\n"; 350 break; 351#if 0 352 case: 353 errmsg = "\n"; 354 break; 355#endif 356 default: 357 errmsg = "Unregistered error message\n"; 358 } 359 ctxt->errNo = error; 360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 362 info); 363 ctxt->wellFormed = 0; 364 if (ctxt->recovery == 0) 365 ctxt->disableSAX = 1; 366} 367 368/** 369 * xmlFatalErrMsg: 370 * @ctxt: an XML parser context 371 * @error: the error number 372 * @msg: the error message 373 * 374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 375 */ 376static void 377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 378 const char *msg) 379{ 380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 381 (ctxt->instate == XML_PARSER_EOF)) 382 return; 383 ctxt->errNo = error; 384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 386 ctxt->wellFormed = 0; 387 if (ctxt->recovery == 0) 388 ctxt->disableSAX = 1; 389} 390 391/** 392 * xmlWarningMsg: 393 * @ctxt: an XML parser context 394 * @error: the error number 395 * @msg: the error message 396 * @str1: extra data 397 * @str2: extra data 398 * 399 * Handle a warning. 400 */ 401static void 402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 403 const char *msg, const xmlChar *str1, const xmlChar *str2) 404{ 405 xmlStructuredErrorFunc schannel = NULL; 406 407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 408 (ctxt->instate == XML_PARSER_EOF)) 409 return; 410 ctxt->errNo = error; 411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 412 schannel = ctxt->sax->serror; 413 __xmlRaiseError(schannel, 414 (ctxt->sax) ? ctxt->sax->warning : NULL, 415 ctxt->userData, 416 ctxt, NULL, XML_FROM_PARSER, error, 417 XML_ERR_WARNING, NULL, 0, 418 (const char *) str1, (const char *) str2, NULL, 0, 0, 419 msg, (const char *) str1, (const char *) str2); 420} 421 422/** 423 * xmlValidityError: 424 * @ctxt: an XML parser context 425 * @error: the error number 426 * @msg: the error message 427 * @str1: extra data 428 * 429 * Handle a validity error. 430 */ 431static void 432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 433 const char *msg, const xmlChar *str1) 434{ 435 xmlStructuredErrorFunc schannel = NULL; 436 437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 438 (ctxt->instate == XML_PARSER_EOF)) 439 return; 440 ctxt->errNo = error; 441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 442 schannel = ctxt->sax->serror; 443 __xmlRaiseError(schannel, 444 ctxt->vctxt.error, ctxt->vctxt.userData, 445 ctxt, NULL, XML_FROM_DTD, error, 446 XML_ERR_ERROR, NULL, 0, (const char *) str1, 447 NULL, NULL, 0, 0, 448 msg, (const char *) str1); 449 ctxt->valid = 0; 450} 451 452/** 453 * xmlFatalErrMsgInt: 454 * @ctxt: an XML parser context 455 * @error: the error number 456 * @msg: the error message 457 * @val: an integer value 458 * 459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 460 */ 461static void 462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 463 const char *msg, int val) 464{ 465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 466 (ctxt->instate == XML_PARSER_EOF)) 467 return; 468 ctxt->errNo = error; 469 __xmlRaiseError(NULL, NULL, NULL, 470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 472 ctxt->wellFormed = 0; 473 if (ctxt->recovery == 0) 474 ctxt->disableSAX = 1; 475} 476 477/** 478 * xmlFatalErrMsgStrIntStr: 479 * @ctxt: an XML parser context 480 * @error: the error number 481 * @msg: the error message 482 * @str1: an string info 483 * @val: an integer value 484 * @str2: an string info 485 * 486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 487 */ 488static void 489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 490 const char *msg, const xmlChar *str1, int val, 491 const xmlChar *str2) 492{ 493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 494 (ctxt->instate == XML_PARSER_EOF)) 495 return; 496 ctxt->errNo = error; 497 __xmlRaiseError(NULL, NULL, NULL, 498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 499 NULL, 0, (const char *) str1, (const char *) str2, 500 NULL, val, 0, msg, str1, val, str2); 501 ctxt->wellFormed = 0; 502 if (ctxt->recovery == 0) 503 ctxt->disableSAX = 1; 504} 505 506/** 507 * xmlFatalErrMsgStr: 508 * @ctxt: an XML parser context 509 * @error: the error number 510 * @msg: the error message 511 * @val: a string value 512 * 513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 514 */ 515static void 516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 517 const char *msg, const xmlChar * val) 518{ 519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 520 (ctxt->instate == XML_PARSER_EOF)) 521 return; 522 ctxt->errNo = error; 523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 524 XML_FROM_PARSER, error, XML_ERR_FATAL, 525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 526 val); 527 ctxt->wellFormed = 0; 528 if (ctxt->recovery == 0) 529 ctxt->disableSAX = 1; 530} 531 532/** 533 * xmlErrMsgStr: 534 * @ctxt: an XML parser context 535 * @error: the error number 536 * @msg: the error message 537 * @val: a string value 538 * 539 * Handle a non fatal parser error 540 */ 541static void 542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 543 const char *msg, const xmlChar * val) 544{ 545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 546 (ctxt->instate == XML_PARSER_EOF)) 547 return; 548 ctxt->errNo = error; 549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 550 XML_FROM_PARSER, error, XML_ERR_ERROR, 551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 552 val); 553} 554 555/** 556 * xmlNsErr: 557 * @ctxt: an XML parser context 558 * @error: the error number 559 * @msg: the message 560 * @info1: extra information string 561 * @info2: extra information string 562 * 563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 564 */ 565static void 566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 567 const char *msg, 568 const xmlChar * info1, const xmlChar * info2, 569 const xmlChar * info3) 570{ 571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 572 (ctxt->instate == XML_PARSER_EOF)) 573 return; 574 ctxt->errNo = error; 575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 576 XML_ERR_ERROR, NULL, 0, (const char *) info1, 577 (const char *) info2, (const char *) info3, 0, 0, msg, 578 info1, info2, info3); 579 ctxt->nsWellFormed = 0; 580} 581 582/************************************************************************ 583 * * 584 * SAX2 defaulted attributes handling * 585 * * 586 ************************************************************************/ 587 588/** 589 * xmlDetectSAX2: 590 * @ctxt: an XML parser context 591 * 592 * Do the SAX2 detection and specific intialization 593 */ 594static void 595xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 596 if (ctxt == NULL) return; 597#ifdef LIBXML_SAX1_ENABLED 598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 599 ((ctxt->sax->startElementNs != NULL) || 600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 601#else 602 ctxt->sax2 = 1; 603#endif /* LIBXML_SAX1_ENABLED */ 604 605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 608} 609 610typedef struct _xmlDefAttrs xmlDefAttrs; 611typedef xmlDefAttrs *xmlDefAttrsPtr; 612struct _xmlDefAttrs { 613 int nbAttrs; /* number of defaulted attributes on that element */ 614 int maxAttrs; /* the size of the array */ 615 const xmlChar *values[4]; /* array of localname/prefix/values */ 616}; 617 618/** 619 * xmlAddDefAttrs: 620 * @ctxt: an XML parser context 621 * @fullname: the element fullname 622 * @fullattr: the attribute fullname 623 * @value: the attribute value 624 * 625 * Add a defaulted attribute for an element 626 */ 627static void 628xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 629 const xmlChar *fullname, 630 const xmlChar *fullattr, 631 const xmlChar *value) { 632 xmlDefAttrsPtr defaults; 633 int len; 634 const xmlChar *name; 635 const xmlChar *prefix; 636 637 if (ctxt->attsDefault == NULL) { 638 ctxt->attsDefault = xmlHashCreate(10); 639 if (ctxt->attsDefault == NULL) 640 goto mem_error; 641 } 642 643 /* 644 * plit the element name into prefix:localname , the string found 645 * are within the DTD and hen not associated to namespace names. 646 */ 647 name = xmlSplitQName3(fullname, &len); 648 if (name == NULL) { 649 name = xmlDictLookup(ctxt->dict, fullname, -1); 650 prefix = NULL; 651 } else { 652 name = xmlDictLookup(ctxt->dict, name, -1); 653 prefix = xmlDictLookup(ctxt->dict, fullname, len); 654 } 655 656 /* 657 * make sure there is some storage 658 */ 659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 660 if (defaults == NULL) { 661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 662 12 * sizeof(const xmlChar *)); 663 if (defaults == NULL) 664 goto mem_error; 665 defaults->maxAttrs = 4; 666 defaults->nbAttrs = 0; 667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 668 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 671 if (defaults == NULL) 672 goto mem_error; 673 defaults->maxAttrs *= 2; 674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 675 } 676 677 /* 678 * plit the element name into prefix:localname , the string found 679 * are within the DTD and hen not associated to namespace names. 680 */ 681 name = xmlSplitQName3(fullattr, &len); 682 if (name == NULL) { 683 name = xmlDictLookup(ctxt->dict, fullattr, -1); 684 prefix = NULL; 685 } else { 686 name = xmlDictLookup(ctxt->dict, name, -1); 687 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 688 } 689 690 defaults->values[4 * defaults->nbAttrs] = name; 691 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 692 /* intern the string and precompute the end */ 693 len = xmlStrlen(value); 694 value = xmlDictLookup(ctxt->dict, value, len); 695 defaults->values[4 * defaults->nbAttrs + 2] = value; 696 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 697 defaults->nbAttrs++; 698 699 return; 700 701mem_error: 702 xmlErrMemory(ctxt, NULL); 703 return; 704} 705 706/** 707 * xmlAddSpecialAttr: 708 * @ctxt: an XML parser context 709 * @fullname: the element fullname 710 * @fullattr: the attribute fullname 711 * @type: the attribute type 712 * 713 * Register that this attribute is not CDATA 714 */ 715static void 716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 717 const xmlChar *fullname, 718 const xmlChar *fullattr, 719 int type) 720{ 721 if (ctxt->attsSpecial == NULL) { 722 ctxt->attsSpecial = xmlHashCreate(10); 723 if (ctxt->attsSpecial == NULL) 724 goto mem_error; 725 } 726 727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 728 (void *) (long) type); 729 return; 730 731mem_error: 732 xmlErrMemory(ctxt, NULL); 733 return; 734} 735 736/** 737 * xmlCheckLanguageID: 738 * @lang: pointer to the string value 739 * 740 * Checks that the value conforms to the LanguageID production: 741 * 742 * NOTE: this is somewhat deprecated, those productions were removed from 743 * the XML Second edition. 744 * 745 * [33] LanguageID ::= Langcode ('-' Subcode)* 746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 750 * [38] Subcode ::= ([a-z] | [A-Z])+ 751 * 752 * Returns 1 if correct 0 otherwise 753 **/ 754int 755xmlCheckLanguageID(const xmlChar * lang) 756{ 757 const xmlChar *cur = lang; 758 759 if (cur == NULL) 760 return (0); 761 if (((cur[0] == 'i') && (cur[1] == '-')) || 762 ((cur[0] == 'I') && (cur[1] == '-'))) { 763 /* 764 * IANA code 765 */ 766 cur += 2; 767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 768 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 769 cur++; 770 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 771 ((cur[0] == 'X') && (cur[1] == '-'))) { 772 /* 773 * User code 774 */ 775 cur += 2; 776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 777 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 778 cur++; 779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 781 /* 782 * ISO639 783 */ 784 cur++; 785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 786 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 787 cur++; 788 else 789 return (0); 790 } else 791 return (0); 792 while (cur[0] != 0) { /* non input consuming */ 793 if (cur[0] != '-') 794 return (0); 795 cur++; 796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 797 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 798 cur++; 799 else 800 return (0); 801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 802 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 803 cur++; 804 } 805 return (1); 806} 807 808/************************************************************************ 809 * * 810 * Parser stacks related functions and macros * 811 * * 812 ************************************************************************/ 813 814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 815 const xmlChar ** str); 816 817#ifdef SAX2 818/** 819 * nsPush: 820 * @ctxt: an XML parser context 821 * @prefix: the namespace prefix or NULL 822 * @URL: the namespace name 823 * 824 * Pushes a new parser namespace on top of the ns stack 825 * 826 * Returns -1 in case of error, -2 if the namespace should be discarded 827 * and the index in the stack otherwise. 828 */ 829static int 830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 831{ 832 if (ctxt->options & XML_PARSE_NSCLEAN) { 833 int i; 834 for (i = 0;i < ctxt->nsNr;i += 2) { 835 if (ctxt->nsTab[i] == prefix) { 836 /* in scope */ 837 if (ctxt->nsTab[i + 1] == URL) 838 return(-2); 839 /* out of scope keep it */ 840 break; 841 } 842 } 843 } 844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 845 ctxt->nsMax = 10; 846 ctxt->nsNr = 0; 847 ctxt->nsTab = (const xmlChar **) 848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 849 if (ctxt->nsTab == NULL) { 850 xmlErrMemory(ctxt, NULL); 851 ctxt->nsMax = 0; 852 return (-1); 853 } 854 } else if (ctxt->nsNr >= ctxt->nsMax) { 855 ctxt->nsMax *= 2; 856 ctxt->nsTab = (const xmlChar **) 857 xmlRealloc((char *) ctxt->nsTab, 858 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 859 if (ctxt->nsTab == NULL) { 860 xmlErrMemory(ctxt, NULL); 861 ctxt->nsMax /= 2; 862 return (-1); 863 } 864 } 865 ctxt->nsTab[ctxt->nsNr++] = prefix; 866 ctxt->nsTab[ctxt->nsNr++] = URL; 867 return (ctxt->nsNr); 868} 869/** 870 * nsPop: 871 * @ctxt: an XML parser context 872 * @nr: the number to pop 873 * 874 * Pops the top @nr parser prefix/namespace from the ns stack 875 * 876 * Returns the number of namespaces removed 877 */ 878static int 879nsPop(xmlParserCtxtPtr ctxt, int nr) 880{ 881 int i; 882 883 if (ctxt->nsTab == NULL) return(0); 884 if (ctxt->nsNr < nr) { 885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 886 nr = ctxt->nsNr; 887 } 888 if (ctxt->nsNr <= 0) 889 return (0); 890 891 for (i = 0;i < nr;i++) { 892 ctxt->nsNr--; 893 ctxt->nsTab[ctxt->nsNr] = NULL; 894 } 895 return(nr); 896} 897#endif 898 899static int 900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 901 const xmlChar **atts; 902 int *attallocs; 903 int maxatts; 904 905 if (ctxt->atts == NULL) { 906 maxatts = 55; /* allow for 10 attrs by default */ 907 atts = (const xmlChar **) 908 xmlMalloc(maxatts * sizeof(xmlChar *)); 909 if (atts == NULL) goto mem_error; 910 ctxt->atts = atts; 911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 912 if (attallocs == NULL) goto mem_error; 913 ctxt->attallocs = attallocs; 914 ctxt->maxatts = maxatts; 915 } else if (nr + 5 > ctxt->maxatts) { 916 maxatts = (nr + 5) * 2; 917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 918 maxatts * sizeof(const xmlChar *)); 919 if (atts == NULL) goto mem_error; 920 ctxt->atts = atts; 921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 922 (maxatts / 5) * sizeof(int)); 923 if (attallocs == NULL) goto mem_error; 924 ctxt->attallocs = attallocs; 925 ctxt->maxatts = maxatts; 926 } 927 return(ctxt->maxatts); 928mem_error: 929 xmlErrMemory(ctxt, NULL); 930 return(-1); 931} 932 933/** 934 * inputPush: 935 * @ctxt: an XML parser context 936 * @value: the parser input 937 * 938 * Pushes a new parser input on top of the input stack 939 * 940 * Returns 0 in case of error, the index in the stack otherwise 941 */ 942extern int 943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 944{ 945 if (ctxt->inputNr >= ctxt->inputMax) { 946 ctxt->inputMax *= 2; 947 ctxt->inputTab = 948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 949 ctxt->inputMax * 950 sizeof(ctxt->inputTab[0])); 951 if (ctxt->inputTab == NULL) { 952 xmlErrMemory(ctxt, NULL); 953 return (0); 954 } 955 } 956 ctxt->inputTab[ctxt->inputNr] = value; 957 ctxt->input = value; 958 return (ctxt->inputNr++); 959} 960/** 961 * inputPop: 962 * @ctxt: an XML parser context 963 * 964 * Pops the top parser input from the input stack 965 * 966 * Returns the input just removed 967 */ 968extern xmlParserInputPtr 969inputPop(xmlParserCtxtPtr ctxt) 970{ 971 xmlParserInputPtr ret; 972 973 if (ctxt->inputNr <= 0) 974 return (0); 975 ctxt->inputNr--; 976 if (ctxt->inputNr > 0) 977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 978 else 979 ctxt->input = NULL; 980 ret = ctxt->inputTab[ctxt->inputNr]; 981 ctxt->inputTab[ctxt->inputNr] = 0; 982 return (ret); 983} 984/** 985 * nodePush: 986 * @ctxt: an XML parser context 987 * @value: the element node 988 * 989 * Pushes a new element node on top of the node stack 990 * 991 * Returns 0 in case of error, the index in the stack otherwise 992 */ 993extern int 994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 995{ 996 if (ctxt->nodeNr >= ctxt->nodeMax) { 997 ctxt->nodeMax *= 2; 998 ctxt->nodeTab = 999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1000 ctxt->nodeMax * 1001 sizeof(ctxt->nodeTab[0])); 1002 if (ctxt->nodeTab == NULL) { 1003 xmlErrMemory(ctxt, NULL); 1004 return (0); 1005 } 1006 } 1007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 1008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1009 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 1010 xmlParserMaxDepth); 1011 ctxt->instate = XML_PARSER_EOF; 1012 return(0); 1013 } 1014 ctxt->nodeTab[ctxt->nodeNr] = value; 1015 ctxt->node = value; 1016 return (ctxt->nodeNr++); 1017} 1018/** 1019 * nodePop: 1020 * @ctxt: an XML parser context 1021 * 1022 * Pops the top element node from the node stack 1023 * 1024 * Returns the node just removed 1025 */ 1026extern xmlNodePtr 1027nodePop(xmlParserCtxtPtr ctxt) 1028{ 1029 xmlNodePtr ret; 1030 1031 if (ctxt->nodeNr <= 0) 1032 return (0); 1033 ctxt->nodeNr--; 1034 if (ctxt->nodeNr > 0) 1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1036 else 1037 ctxt->node = NULL; 1038 ret = ctxt->nodeTab[ctxt->nodeNr]; 1039 ctxt->nodeTab[ctxt->nodeNr] = 0; 1040 return (ret); 1041} 1042/** 1043 * nameNsPush: 1044 * @ctxt: an XML parser context 1045 * @value: the element name 1046 * @prefix: the element prefix 1047 * @URI: the element namespace name 1048 * 1049 * Pushes a new element name/prefix/URL on top of the name stack 1050 * 1051 * Returns -1 in case of error, the index in the stack otherwise 1052 */ 1053static int 1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1055 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1056{ 1057 if (ctxt->nameNr >= ctxt->nameMax) { 1058 const xmlChar * *tmp; 1059 void **tmp2; 1060 ctxt->nameMax *= 2; 1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1062 ctxt->nameMax * 1063 sizeof(ctxt->nameTab[0])); 1064 if (tmp == NULL) { 1065 ctxt->nameMax /= 2; 1066 goto mem_error; 1067 } 1068 ctxt->nameTab = tmp; 1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1070 ctxt->nameMax * 3 * 1071 sizeof(ctxt->pushTab[0])); 1072 if (tmp2 == NULL) { 1073 ctxt->nameMax /= 2; 1074 goto mem_error; 1075 } 1076 ctxt->pushTab = tmp2; 1077 } 1078 ctxt->nameTab[ctxt->nameNr] = value; 1079 ctxt->name = value; 1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1083 return (ctxt->nameNr++); 1084mem_error: 1085 xmlErrMemory(ctxt, NULL); 1086 return (-1); 1087} 1088/** 1089 * nameNsPop: 1090 * @ctxt: an XML parser context 1091 * 1092 * Pops the top element/prefix/URI name from the name stack 1093 * 1094 * Returns the name just removed 1095 */ 1096static const xmlChar * 1097nameNsPop(xmlParserCtxtPtr ctxt) 1098{ 1099 const xmlChar *ret; 1100 1101 if (ctxt->nameNr <= 0) 1102 return (0); 1103 ctxt->nameNr--; 1104 if (ctxt->nameNr > 0) 1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1106 else 1107 ctxt->name = NULL; 1108 ret = ctxt->nameTab[ctxt->nameNr]; 1109 ctxt->nameTab[ctxt->nameNr] = NULL; 1110 return (ret); 1111} 1112 1113/** 1114 * namePush: 1115 * @ctxt: an XML parser context 1116 * @value: the element name 1117 * 1118 * Pushes a new element name on top of the name stack 1119 * 1120 * Returns -1 in case of error, the index in the stack otherwise 1121 */ 1122extern int 1123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1124{ 1125 if (ctxt->nameNr >= ctxt->nameMax) { 1126 const xmlChar * *tmp; 1127 ctxt->nameMax *= 2; 1128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1129 ctxt->nameMax * 1130 sizeof(ctxt->nameTab[0])); 1131 if (tmp == NULL) { 1132 ctxt->nameMax /= 2; 1133 goto mem_error; 1134 } 1135 ctxt->nameTab = tmp; 1136 } 1137 ctxt->nameTab[ctxt->nameNr] = value; 1138 ctxt->name = value; 1139 return (ctxt->nameNr++); 1140mem_error: 1141 xmlErrMemory(ctxt, NULL); 1142 return (-1); 1143} 1144/** 1145 * namePop: 1146 * @ctxt: an XML parser context 1147 * 1148 * Pops the top element name from the name stack 1149 * 1150 * Returns the name just removed 1151 */ 1152extern const xmlChar * 1153namePop(xmlParserCtxtPtr ctxt) 1154{ 1155 const xmlChar *ret; 1156 1157 if (ctxt->nameNr <= 0) 1158 return (0); 1159 ctxt->nameNr--; 1160 if (ctxt->nameNr > 0) 1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1162 else 1163 ctxt->name = NULL; 1164 ret = ctxt->nameTab[ctxt->nameNr]; 1165 ctxt->nameTab[ctxt->nameNr] = 0; 1166 return (ret); 1167} 1168 1169static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1170 if (ctxt->spaceNr >= ctxt->spaceMax) { 1171 ctxt->spaceMax *= 2; 1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1174 if (ctxt->spaceTab == NULL) { 1175 xmlErrMemory(ctxt, NULL); 1176 return(0); 1177 } 1178 } 1179 ctxt->spaceTab[ctxt->spaceNr] = val; 1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1181 return(ctxt->spaceNr++); 1182} 1183 1184static int spacePop(xmlParserCtxtPtr ctxt) { 1185 int ret; 1186 if (ctxt->spaceNr <= 0) return(0); 1187 ctxt->spaceNr--; 1188 if (ctxt->spaceNr > 0) 1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1190 else 1191 ctxt->space = NULL; 1192 ret = ctxt->spaceTab[ctxt->spaceNr]; 1193 ctxt->spaceTab[ctxt->spaceNr] = -1; 1194 return(ret); 1195} 1196 1197/* 1198 * Macros for accessing the content. Those should be used only by the parser, 1199 * and not exported. 1200 * 1201 * Dirty macros, i.e. one often need to make assumption on the context to 1202 * use them 1203 * 1204 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1205 * To be used with extreme caution since operations consuming 1206 * characters may move the input buffer to a different location ! 1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1208 * This should be used internally by the parser 1209 * only to compare to ASCII values otherwise it would break when 1210 * running with UTF-8 encoding. 1211 * RAW same as CUR but in the input buffer, bypass any token 1212 * extraction that may have been done 1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1214 * to compare on ASCII based substring. 1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1216 * strings without newlines within the parser. 1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1218 * defined char within the parser. 1219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1220 * 1221 * NEXT Skip to the next character, this does the proper decoding 1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1223 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1224 * CUR_CHAR(l) returns the current unicode character (int), set l 1225 * to the number of xmlChars used for the encoding [0-5]. 1226 * CUR_SCHAR same but operate on a string instead of the context 1227 * COPY_BUF copy the current unicode char to the target buffer, increment 1228 * the index 1229 * GROW, SHRINK handling of input buffers 1230 */ 1231 1232#define RAW (*ctxt->input->cur) 1233#define CUR (*ctxt->input->cur) 1234#define NXT(val) ctxt->input->cur[(val)] 1235#define CUR_PTR ctxt->input->cur 1236 1237#define CMP4( s, c1, c2, c3, c4 ) \ 1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1240#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1250 ((unsigned char *) s)[ 8 ] == c9 ) 1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1253 ((unsigned char *) s)[ 9 ] == c10 ) 1254 1255#define SKIP(val) do { \ 1256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1258 if ((*ctxt->input->cur == 0) && \ 1259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1260 xmlPopInput(ctxt); \ 1261 } while (0) 1262 1263#define SHRINK if ((ctxt->progressive == 0) && \ 1264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1266 xmlSHRINK (ctxt); 1267 1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1269 xmlParserInputShrink(ctxt->input); 1270 if ((*ctxt->input->cur == 0) && 1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1272 xmlPopInput(ctxt); 1273 } 1274 1275#define GROW if ((ctxt->progressive == 0) && \ 1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1277 xmlGROW (ctxt); 1278 1279static void xmlGROW (xmlParserCtxtPtr ctxt) { 1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1281 if ((*ctxt->input->cur == 0) && 1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1283 xmlPopInput(ctxt); 1284} 1285 1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1287 1288#define NEXT xmlNextChar(ctxt) 1289 1290#define NEXT1 { \ 1291 ctxt->input->col++; \ 1292 ctxt->input->cur++; \ 1293 ctxt->nbChars++; \ 1294 if (*ctxt->input->cur == 0) \ 1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1296 } 1297 1298#define NEXTL(l) do { \ 1299 if (*(ctxt->input->cur) == '\n') { \ 1300 ctxt->input->line++; ctxt->input->col = 1; \ 1301 } else ctxt->input->col++; \ 1302 ctxt->input->cur += l; \ 1303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1304 } while (0) 1305 1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1308 1309#define COPY_BUF(l,b,i,v) \ 1310 if (l == 1) b[i++] = (xmlChar) v; \ 1311 else i += xmlCopyCharMultiByte(&b[i],v) 1312 1313/** 1314 * xmlSkipBlankChars: 1315 * @ctxt: the XML parser context 1316 * 1317 * skip all blanks character found at that point in the input streams. 1318 * It pops up finished entities in the process if allowable at that point. 1319 * 1320 * Returns the number of space chars skipped 1321 */ 1322 1323int 1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1325 int res = 0; 1326 1327 /* 1328 * It's Okay to use CUR/NEXT here since all the blanks are on 1329 * the ASCII range. 1330 */ 1331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1332 const xmlChar *cur; 1333 /* 1334 * if we are in the document content, go really fast 1335 */ 1336 cur = ctxt->input->cur; 1337 while (IS_BLANK_CH(*cur)) { 1338 if (*cur == '\n') { 1339 ctxt->input->line++; ctxt->input->col = 1; 1340 } 1341 cur++; 1342 res++; 1343 if (*cur == 0) { 1344 ctxt->input->cur = cur; 1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1346 cur = ctxt->input->cur; 1347 } 1348 } 1349 ctxt->input->cur = cur; 1350 } else { 1351 int cur; 1352 do { 1353 cur = CUR; 1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 1355 NEXT; 1356 cur = CUR; 1357 res++; 1358 } 1359 while ((cur == 0) && (ctxt->inputNr > 1) && 1360 (ctxt->instate != XML_PARSER_COMMENT)) { 1361 xmlPopInput(ctxt); 1362 cur = CUR; 1363 } 1364 /* 1365 * Need to handle support of entities branching here 1366 */ 1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1369 } 1370 return(res); 1371} 1372 1373/************************************************************************ 1374 * * 1375 * Commodity functions to handle entities * 1376 * * 1377 ************************************************************************/ 1378 1379/** 1380 * xmlPopInput: 1381 * @ctxt: an XML parser context 1382 * 1383 * xmlPopInput: the current input pointed by ctxt->input came to an end 1384 * pop it and return the next char. 1385 * 1386 * Returns the current xmlChar in the parser context 1387 */ 1388xmlChar 1389xmlPopInput(xmlParserCtxtPtr ctxt) { 1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 1391 if (xmlParserDebugEntities) 1392 xmlGenericError(xmlGenericErrorContext, 1393 "Popping input %d\n", ctxt->inputNr); 1394 xmlFreeInputStream(inputPop(ctxt)); 1395 if ((*ctxt->input->cur == 0) && 1396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1397 return(xmlPopInput(ctxt)); 1398 return(CUR); 1399} 1400 1401/** 1402 * xmlPushInput: 1403 * @ctxt: an XML parser context 1404 * @input: an XML parser input fragment (entity, XML fragment ...). 1405 * 1406 * xmlPushInput: switch to a new input stream which is stacked on top 1407 * of the previous one(s). 1408 */ 1409void 1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1411 if (input == NULL) return; 1412 1413 if (xmlParserDebugEntities) { 1414 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1415 xmlGenericError(xmlGenericErrorContext, 1416 "%s(%d): ", ctxt->input->filename, 1417 ctxt->input->line); 1418 xmlGenericError(xmlGenericErrorContext, 1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1420 } 1421 inputPush(ctxt, input); 1422 GROW; 1423} 1424 1425/** 1426 * xmlParseCharRef: 1427 * @ctxt: an XML parser context 1428 * 1429 * parse Reference declarations 1430 * 1431 * [66] CharRef ::= '&#' [0-9]+ ';' | 1432 * '&#x' [0-9a-fA-F]+ ';' 1433 * 1434 * [ WFC: Legal Character ] 1435 * Characters referred to using character references must match the 1436 * production for Char. 1437 * 1438 * Returns the value parsed (as an int), 0 in case of error 1439 */ 1440int 1441xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1442 unsigned int val = 0; 1443 int count = 0; 1444 1445 /* 1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1447 */ 1448 if ((RAW == '&') && (NXT(1) == '#') && 1449 (NXT(2) == 'x')) { 1450 SKIP(3); 1451 GROW; 1452 while (RAW != ';') { /* loop blocked by count */ 1453 if (count++ > 20) { 1454 count = 0; 1455 GROW; 1456 } 1457 if ((RAW >= '0') && (RAW <= '9')) 1458 val = val * 16 + (CUR - '0'); 1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1460 val = val * 16 + (CUR - 'a') + 10; 1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1462 val = val * 16 + (CUR - 'A') + 10; 1463 else { 1464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1465 val = 0; 1466 break; 1467 } 1468 NEXT; 1469 count++; 1470 } 1471 if (RAW == ';') { 1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1473 ctxt->input->col++; 1474 ctxt->nbChars ++; 1475 ctxt->input->cur++; 1476 } 1477 } else if ((RAW == '&') && (NXT(1) == '#')) { 1478 SKIP(2); 1479 GROW; 1480 while (RAW != ';') { /* loop blocked by count */ 1481 if (count++ > 20) { 1482 count = 0; 1483 GROW; 1484 } 1485 if ((RAW >= '0') && (RAW <= '9')) 1486 val = val * 10 + (CUR - '0'); 1487 else { 1488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1489 val = 0; 1490 break; 1491 } 1492 NEXT; 1493 count++; 1494 } 1495 if (RAW == ';') { 1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1497 ctxt->input->col++; 1498 ctxt->nbChars ++; 1499 ctxt->input->cur++; 1500 } 1501 } else { 1502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1503 } 1504 1505 /* 1506 * [ WFC: Legal Character ] 1507 * Characters referred to using character references must match the 1508 * production for Char. 1509 */ 1510 if (IS_CHAR(val)) { 1511 return(val); 1512 } else { 1513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1514 "xmlParseCharRef: invalid xmlChar value %d\n", 1515 val); 1516 } 1517 return(0); 1518} 1519 1520/** 1521 * xmlParseStringCharRef: 1522 * @ctxt: an XML parser context 1523 * @str: a pointer to an index in the string 1524 * 1525 * parse Reference declarations, variant parsing from a string rather 1526 * than an an input flow. 1527 * 1528 * [66] CharRef ::= '&#' [0-9]+ ';' | 1529 * '&#x' [0-9a-fA-F]+ ';' 1530 * 1531 * [ WFC: Legal Character ] 1532 * Characters referred to using character references must match the 1533 * production for Char. 1534 * 1535 * Returns the value parsed (as an int), 0 in case of error, str will be 1536 * updated to the current value of the index 1537 */ 1538static int 1539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1540 const xmlChar *ptr; 1541 xmlChar cur; 1542 int val = 0; 1543 1544 if ((str == NULL) || (*str == NULL)) return(0); 1545 ptr = *str; 1546 cur = *ptr; 1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1548 ptr += 3; 1549 cur = *ptr; 1550 while (cur != ';') { /* Non input consuming loop */ 1551 if ((cur >= '0') && (cur <= '9')) 1552 val = val * 16 + (cur - '0'); 1553 else if ((cur >= 'a') && (cur <= 'f')) 1554 val = val * 16 + (cur - 'a') + 10; 1555 else if ((cur >= 'A') && (cur <= 'F')) 1556 val = val * 16 + (cur - 'A') + 10; 1557 else { 1558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1559 val = 0; 1560 break; 1561 } 1562 ptr++; 1563 cur = *ptr; 1564 } 1565 if (cur == ';') 1566 ptr++; 1567 } else if ((cur == '&') && (ptr[1] == '#')){ 1568 ptr += 2; 1569 cur = *ptr; 1570 while (cur != ';') { /* Non input consuming loops */ 1571 if ((cur >= '0') && (cur <= '9')) 1572 val = val * 10 + (cur - '0'); 1573 else { 1574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1575 val = 0; 1576 break; 1577 } 1578 ptr++; 1579 cur = *ptr; 1580 } 1581 if (cur == ';') 1582 ptr++; 1583 } else { 1584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1585 return(0); 1586 } 1587 *str = ptr; 1588 1589 /* 1590 * [ WFC: Legal Character ] 1591 * Characters referred to using character references must match the 1592 * production for Char. 1593 */ 1594 if (IS_CHAR(val)) { 1595 return(val); 1596 } else { 1597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1598 "xmlParseStringCharRef: invalid xmlChar value %d\n", 1599 val); 1600 } 1601 return(0); 1602} 1603 1604/** 1605 * xmlNewBlanksWrapperInputStream: 1606 * @ctxt: an XML parser context 1607 * @entity: an Entity pointer 1608 * 1609 * Create a new input stream for wrapping 1610 * blanks around a PEReference 1611 * 1612 * Returns the new input stream or NULL 1613 */ 1614 1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 1616 1617static xmlParserInputPtr 1618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1619 xmlParserInputPtr input; 1620 xmlChar *buffer; 1621 size_t length; 1622 if (entity == NULL) { 1623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 1624 "xmlNewBlanksWrapperInputStream entity\n"); 1625 return(NULL); 1626 } 1627 if (xmlParserDebugEntities) 1628 xmlGenericError(xmlGenericErrorContext, 1629 "new blanks wrapper for entity: %s\n", entity->name); 1630 input = xmlNewInputStream(ctxt); 1631 if (input == NULL) { 1632 return(NULL); 1633 } 1634 length = xmlStrlen(entity->name) + 5; 1635 buffer = xmlMallocAtomic(length); 1636 if (buffer == NULL) { 1637 xmlErrMemory(ctxt, NULL); 1638 return(NULL); 1639 } 1640 buffer [0] = ' '; 1641 buffer [1] = '%'; 1642 buffer [length-3] = ';'; 1643 buffer [length-2] = ' '; 1644 buffer [length-1] = 0; 1645 memcpy(buffer + 2, entity->name, length - 5); 1646 input->free = deallocblankswrapper; 1647 input->base = buffer; 1648 input->cur = buffer; 1649 input->length = length; 1650 input->end = &buffer[length]; 1651 return(input); 1652} 1653 1654/** 1655 * xmlParserHandlePEReference: 1656 * @ctxt: the parser context 1657 * 1658 * [69] PEReference ::= '%' Name ';' 1659 * 1660 * [ WFC: No Recursion ] 1661 * A parsed entity must not contain a recursive 1662 * reference to itself, either directly or indirectly. 1663 * 1664 * [ WFC: Entity Declared ] 1665 * In a document without any DTD, a document with only an internal DTD 1666 * subset which contains no parameter entity references, or a document 1667 * with "standalone='yes'", ... ... The declaration of a parameter 1668 * entity must precede any reference to it... 1669 * 1670 * [ VC: Entity Declared ] 1671 * In a document with an external subset or external parameter entities 1672 * with "standalone='no'", ... ... The declaration of a parameter entity 1673 * must precede any reference to it... 1674 * 1675 * [ WFC: In DTD ] 1676 * Parameter-entity references may only appear in the DTD. 1677 * NOTE: misleading but this is handled. 1678 * 1679 * A PEReference may have been detected in the current input stream 1680 * the handling is done accordingly to 1681 * http://www.w3.org/TR/REC-xml#entproc 1682 * i.e. 1683 * - Included in literal in entity values 1684 * - Included as Parameter Entity reference within DTDs 1685 */ 1686void 1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1688 const xmlChar *name; 1689 xmlEntityPtr entity = NULL; 1690 xmlParserInputPtr input; 1691 1692 if (RAW != '%') return; 1693 switch(ctxt->instate) { 1694 case XML_PARSER_CDATA_SECTION: 1695 return; 1696 case XML_PARSER_COMMENT: 1697 return; 1698 case XML_PARSER_START_TAG: 1699 return; 1700 case XML_PARSER_END_TAG: 1701 return; 1702 case XML_PARSER_EOF: 1703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 1704 return; 1705 case XML_PARSER_PROLOG: 1706 case XML_PARSER_START: 1707 case XML_PARSER_MISC: 1708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 1709 return; 1710 case XML_PARSER_ENTITY_DECL: 1711 case XML_PARSER_CONTENT: 1712 case XML_PARSER_ATTRIBUTE_VALUE: 1713 case XML_PARSER_PI: 1714 case XML_PARSER_SYSTEM_LITERAL: 1715 case XML_PARSER_PUBLIC_LITERAL: 1716 /* we just ignore it there */ 1717 return; 1718 case XML_PARSER_EPILOG: 1719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 1720 return; 1721 case XML_PARSER_ENTITY_VALUE: 1722 /* 1723 * NOTE: in the case of entity values, we don't do the 1724 * substitution here since we need the literal 1725 * entity value to be able to save the internal 1726 * subset of the document. 1727 * This will be handled by xmlStringDecodeEntities 1728 */ 1729 return; 1730 case XML_PARSER_DTD: 1731 /* 1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 1733 * In the internal DTD subset, parameter-entity references 1734 * can occur only where markup declarations can occur, not 1735 * within markup declarations. 1736 * In that case this is handled in xmlParseMarkupDecl 1737 */ 1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 1739 return; 1740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 1741 return; 1742 break; 1743 case XML_PARSER_IGNORE: 1744 return; 1745 } 1746 1747 NEXT; 1748 name = xmlParseName(ctxt); 1749 if (xmlParserDebugEntities) 1750 xmlGenericError(xmlGenericErrorContext, 1751 "PEReference: %s\n", name); 1752 if (name == NULL) { 1753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 1754 } else { 1755 if (RAW == ';') { 1756 NEXT; 1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 1759 if (entity == NULL) { 1760 1761 /* 1762 * [ WFC: Entity Declared ] 1763 * In a document without any DTD, a document with only an 1764 * internal DTD subset which contains no parameter entity 1765 * references, or a document with "standalone='yes'", ... 1766 * ... The declaration of a parameter entity must precede 1767 * any reference to it... 1768 */ 1769 if ((ctxt->standalone == 1) || 1770 ((ctxt->hasExternalSubset == 0) && 1771 (ctxt->hasPErefs == 0))) { 1772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 1773 "PEReference: %%%s; not found\n", name); 1774 } else { 1775 /* 1776 * [ VC: Entity Declared ] 1777 * In a document with an external subset or external 1778 * parameter entities with "standalone='no'", ... 1779 * ... The declaration of a parameter entity must precede 1780 * any reference to it... 1781 */ 1782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 1784 "PEReference: %%%s; not found\n", 1785 name); 1786 } else 1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 1788 "PEReference: %%%s; not found\n", 1789 name, NULL); 1790 ctxt->valid = 0; 1791 } 1792 } else if (ctxt->input->free != deallocblankswrapper) { 1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 1794 xmlPushInput(ctxt, input); 1795 } else { 1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 1798 xmlChar start[4]; 1799 xmlCharEncoding enc; 1800 1801 /* 1802 * handle the extra spaces added before and after 1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE 1804 * this is done independently. 1805 */ 1806 input = xmlNewEntityInputStream(ctxt, entity); 1807 xmlPushInput(ctxt, input); 1808 1809 /* 1810 * Get the 4 first bytes and decode the charset 1811 * if enc != XML_CHAR_ENCODING_NONE 1812 * plug some encoding conversion routines. 1813 */ 1814 GROW 1815 if (entity->length >= 4) { 1816 start[0] = RAW; 1817 start[1] = NXT(1); 1818 start[2] = NXT(2); 1819 start[3] = NXT(3); 1820 enc = xmlDetectCharEncoding(start, 4); 1821 if (enc != XML_CHAR_ENCODING_NONE) { 1822 xmlSwitchEncoding(ctxt, enc); 1823 } 1824 } 1825 1826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 1827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 1828 (IS_BLANK_CH(NXT(5)))) { 1829 xmlParseTextDecl(ctxt); 1830 } 1831 } else { 1832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 1833 "PEReference: %s is not a parameter entity\n", 1834 name); 1835 } 1836 } 1837 } else { 1838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 1839 } 1840 } 1841} 1842 1843/* 1844 * Macro used to grow the current buffer. 1845 */ 1846#define growBuffer(buffer) { \ 1847 buffer##_size *= 2; \ 1848 buffer = (xmlChar *) \ 1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1850 if (buffer == NULL) goto mem_error; \ 1851} 1852 1853/** 1854 * xmlStringLenDecodeEntities: 1855 * @ctxt: the parser context 1856 * @str: the input string 1857 * @len: the string length 1858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1859 * @end: an end marker xmlChar, 0 if none 1860 * @end2: an end marker xmlChar, 0 if none 1861 * @end3: an end marker xmlChar, 0 if none 1862 * 1863 * Takes a entity string content and process to do the adequate substitutions. 1864 * 1865 * [67] Reference ::= EntityRef | CharRef 1866 * 1867 * [69] PEReference ::= '%' Name ';' 1868 * 1869 * Returns A newly allocated string with the substitution done. The caller 1870 * must deallocate it ! 1871 */ 1872xmlChar * 1873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 1874 int what, xmlChar end, xmlChar end2, xmlChar end3) { 1875 xmlChar *buffer = NULL; 1876 int buffer_size = 0; 1877 1878 xmlChar *current = NULL; 1879 const xmlChar *last; 1880 xmlEntityPtr ent; 1881 int c,l; 1882 int nbchars = 0; 1883 1884 if ((str == NULL) || (len < 0)) 1885 return(NULL); 1886 last = str + len; 1887 1888 if (ctxt->depth > 40) { 1889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 1890 return(NULL); 1891 } 1892 1893 /* 1894 * allocate a translation buffer. 1895 */ 1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 1898 if (buffer == NULL) goto mem_error; 1899 1900 /* 1901 * OK loop until we reach one of the ending char or a size limit. 1902 * we are operating on already parsed values. 1903 */ 1904 if (str < last) 1905 c = CUR_SCHAR(str, l); 1906 else 1907 c = 0; 1908 while ((c != 0) && (c != end) && /* non input consuming loop */ 1909 (c != end2) && (c != end3)) { 1910 1911 if (c == 0) break; 1912 if ((c == '&') && (str[1] == '#')) { 1913 int val = xmlParseStringCharRef(ctxt, &str); 1914 if (val != 0) { 1915 COPY_BUF(0,buffer,nbchars,val); 1916 } 1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1918 if (xmlParserDebugEntities) 1919 xmlGenericError(xmlGenericErrorContext, 1920 "String decoding Entity Reference: %.30s\n", 1921 str); 1922 ent = xmlParseStringEntityRef(ctxt, &str); 1923 if ((ent != NULL) && 1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1925 if (ent->content != NULL) { 1926 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1927 } else { 1928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 1929 "predefined entity has no content\n"); 1930 } 1931 } else if ((ent != NULL) && (ent->content != NULL)) { 1932 xmlChar *rep; 1933 1934 ctxt->depth++; 1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1936 0, 0, 0); 1937 ctxt->depth--; 1938 if (rep != NULL) { 1939 current = rep; 1940 while (*current != 0) { /* non input consuming loop */ 1941 buffer[nbchars++] = *current++; 1942 if (nbchars > 1943 buffer_size - XML_PARSER_BUFFER_SIZE) { 1944 growBuffer(buffer); 1945 } 1946 } 1947 xmlFree(rep); 1948 } 1949 } else if (ent != NULL) { 1950 int i = xmlStrlen(ent->name); 1951 const xmlChar *cur = ent->name; 1952 1953 buffer[nbchars++] = '&'; 1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1955 growBuffer(buffer); 1956 } 1957 for (;i > 0;i--) 1958 buffer[nbchars++] = *cur++; 1959 buffer[nbchars++] = ';'; 1960 } 1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1962 if (xmlParserDebugEntities) 1963 xmlGenericError(xmlGenericErrorContext, 1964 "String decoding PE Reference: %.30s\n", str); 1965 ent = xmlParseStringPEReference(ctxt, &str); 1966 if (ent != NULL) { 1967 xmlChar *rep; 1968 1969 ctxt->depth++; 1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1971 0, 0, 0); 1972 ctxt->depth--; 1973 if (rep != NULL) { 1974 current = rep; 1975 while (*current != 0) { /* non input consuming loop */ 1976 buffer[nbchars++] = *current++; 1977 if (nbchars > 1978 buffer_size - XML_PARSER_BUFFER_SIZE) { 1979 growBuffer(buffer); 1980 } 1981 } 1982 xmlFree(rep); 1983 } 1984 } 1985 } else { 1986 COPY_BUF(l,buffer,nbchars,c); 1987 str += l; 1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1989 growBuffer(buffer); 1990 } 1991 } 1992 if (str < last) 1993 c = CUR_SCHAR(str, l); 1994 else 1995 c = 0; 1996 } 1997 buffer[nbchars++] = 0; 1998 return(buffer); 1999 2000mem_error: 2001 xmlErrMemory(ctxt, NULL); 2002 return(NULL); 2003} 2004 2005/** 2006 * xmlStringDecodeEntities: 2007 * @ctxt: the parser context 2008 * @str: the input string 2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2010 * @end: an end marker xmlChar, 0 if none 2011 * @end2: an end marker xmlChar, 0 if none 2012 * @end3: an end marker xmlChar, 0 if none 2013 * 2014 * Takes a entity string content and process to do the adequate substitutions. 2015 * 2016 * [67] Reference ::= EntityRef | CharRef 2017 * 2018 * [69] PEReference ::= '%' Name ';' 2019 * 2020 * Returns A newly allocated string with the substitution done. The caller 2021 * must deallocate it ! 2022 */ 2023xmlChar * 2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2025 xmlChar end, xmlChar end2, xmlChar end3) { 2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2027 end, end2, end3)); 2028} 2029 2030/************************************************************************ 2031 * * 2032 * Commodity functions, cleanup needed ? * 2033 * * 2034 ************************************************************************/ 2035 2036/** 2037 * areBlanks: 2038 * @ctxt: an XML parser context 2039 * @str: a xmlChar * 2040 * @len: the size of @str 2041 * @blank_chars: we know the chars are blanks 2042 * 2043 * Is this a sequence of blank chars that one can ignore ? 2044 * 2045 * Returns 1 if ignorable 0 otherwise. 2046 */ 2047 2048static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2049 int blank_chars) { 2050 int i, ret; 2051 xmlNodePtr lastChild; 2052 2053 /* 2054 * Don't spend time trying to differentiate them, the same callback is 2055 * used ! 2056 */ 2057 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2058 return(0); 2059 2060 /* 2061 * Check for xml:space value. 2062 */ 2063 if (*(ctxt->space) == 1) 2064 return(0); 2065 2066 /* 2067 * Check that the string is made of blanks 2068 */ 2069 if (blank_chars == 0) { 2070 for (i = 0;i < len;i++) 2071 if (!(IS_BLANK_CH(str[i]))) return(0); 2072 } 2073 2074 /* 2075 * Look if the element is mixed content in the DTD if available 2076 */ 2077 if (ctxt->node == NULL) return(0); 2078 if (ctxt->myDoc != NULL) { 2079 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2080 if (ret == 0) return(1); 2081 if (ret == 1) return(0); 2082 } 2083 2084 /* 2085 * Otherwise, heuristic :-\ 2086 */ 2087 if (RAW != '<') return(0); 2088 if ((ctxt->node->children == NULL) && 2089 (RAW == '<') && (NXT(1) == '/')) return(0); 2090 2091 lastChild = xmlGetLastChild(ctxt->node); 2092 if (lastChild == NULL) { 2093 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2094 (ctxt->node->content != NULL)) return(0); 2095 } else if (xmlNodeIsText(lastChild)) 2096 return(0); 2097 else if ((ctxt->node->children != NULL) && 2098 (xmlNodeIsText(ctxt->node->children))) 2099 return(0); 2100 return(1); 2101} 2102 2103/************************************************************************ 2104 * * 2105 * Extra stuff for namespace support * 2106 * Relates to http://www.w3.org/TR/WD-xml-names * 2107 * * 2108 ************************************************************************/ 2109 2110/** 2111 * xmlSplitQName: 2112 * @ctxt: an XML parser context 2113 * @name: an XML parser context 2114 * @prefix: a xmlChar ** 2115 * 2116 * parse an UTF8 encoded XML qualified name string 2117 * 2118 * [NS 5] QName ::= (Prefix ':')? LocalPart 2119 * 2120 * [NS 6] Prefix ::= NCName 2121 * 2122 * [NS 7] LocalPart ::= NCName 2123 * 2124 * Returns the local part, and prefix is updated 2125 * to get the Prefix if any. 2126 */ 2127 2128xmlChar * 2129xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2130 xmlChar buf[XML_MAX_NAMELEN + 5]; 2131 xmlChar *buffer = NULL; 2132 int len = 0; 2133 int max = XML_MAX_NAMELEN; 2134 xmlChar *ret = NULL; 2135 const xmlChar *cur = name; 2136 int c; 2137 2138 *prefix = NULL; 2139 2140 if (cur == NULL) return(NULL); 2141 2142#ifndef XML_XML_NAMESPACE 2143 /* xml: prefix is not really a namespace */ 2144 if ((cur[0] == 'x') && (cur[1] == 'm') && 2145 (cur[2] == 'l') && (cur[3] == ':')) 2146 return(xmlStrdup(name)); 2147#endif 2148 2149 /* nasty but well=formed */ 2150 if (cur[0] == ':') 2151 return(xmlStrdup(name)); 2152 2153 c = *cur++; 2154 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2155 buf[len++] = c; 2156 c = *cur++; 2157 } 2158 if (len >= max) { 2159 /* 2160 * Okay someone managed to make a huge name, so he's ready to pay 2161 * for the processing speed. 2162 */ 2163 max = len * 2; 2164 2165 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2166 if (buffer == NULL) { 2167 xmlErrMemory(ctxt, NULL); 2168 return(NULL); 2169 } 2170 memcpy(buffer, buf, len); 2171 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2172 if (len + 10 > max) { 2173 max *= 2; 2174 buffer = (xmlChar *) xmlRealloc(buffer, 2175 max * sizeof(xmlChar)); 2176 if (buffer == NULL) { 2177 xmlErrMemory(ctxt, NULL); 2178 return(NULL); 2179 } 2180 } 2181 buffer[len++] = c; 2182 c = *cur++; 2183 } 2184 buffer[len] = 0; 2185 } 2186 2187 /* nasty but well=formed 2188 if ((c == ':') && (*cur == 0)) { 2189 return(xmlStrdup(name)); 2190 } */ 2191 2192 if (buffer == NULL) 2193 ret = xmlStrndup(buf, len); 2194 else { 2195 ret = buffer; 2196 buffer = NULL; 2197 max = XML_MAX_NAMELEN; 2198 } 2199 2200 2201 if (c == ':') { 2202 c = *cur; 2203 *prefix = ret; 2204 if (c == 0) { 2205 return(xmlStrndup(BAD_CAST "", 0)); 2206 } 2207 len = 0; 2208 2209 /* 2210 * Check that the first character is proper to start 2211 * a new name 2212 */ 2213 if (!(((c >= 0x61) && (c <= 0x7A)) || 2214 ((c >= 0x41) && (c <= 0x5A)) || 2215 (c == '_') || (c == ':'))) { 2216 int l; 2217 int first = CUR_SCHAR(cur, l); 2218 2219 if (!IS_LETTER(first) && (first != '_')) { 2220 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2221 "Name %s is not XML Namespace compliant\n", 2222 name); 2223 } 2224 } 2225 cur++; 2226 2227 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2228 buf[len++] = c; 2229 c = *cur++; 2230 } 2231 if (len >= max) { 2232 /* 2233 * Okay someone managed to make a huge name, so he's ready to pay 2234 * for the processing speed. 2235 */ 2236 max = len * 2; 2237 2238 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2239 if (buffer == NULL) { 2240 xmlErrMemory(ctxt, NULL); 2241 return(NULL); 2242 } 2243 memcpy(buffer, buf, len); 2244 while (c != 0) { /* tested bigname2.xml */ 2245 if (len + 10 > max) { 2246 max *= 2; 2247 buffer = (xmlChar *) xmlRealloc(buffer, 2248 max * sizeof(xmlChar)); 2249 if (buffer == NULL) { 2250 xmlErrMemory(ctxt, NULL); 2251 return(NULL); 2252 } 2253 } 2254 buffer[len++] = c; 2255 c = *cur++; 2256 } 2257 buffer[len] = 0; 2258 } 2259 2260 if (buffer == NULL) 2261 ret = xmlStrndup(buf, len); 2262 else { 2263 ret = buffer; 2264 } 2265 } 2266 2267 return(ret); 2268} 2269 2270/************************************************************************ 2271 * * 2272 * The parser itself * 2273 * Relates to http://www.w3.org/TR/REC-xml * 2274 * * 2275 ************************************************************************/ 2276 2277static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2278static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2279 int *len, int *alloc, int normalize); 2280 2281/** 2282 * xmlParseName: 2283 * @ctxt: an XML parser context 2284 * 2285 * parse an XML name. 2286 * 2287 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2288 * CombiningChar | Extender 2289 * 2290 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2291 * 2292 * [6] Names ::= Name (S Name)* 2293 * 2294 * Returns the Name parsed or NULL 2295 */ 2296 2297const xmlChar * 2298xmlParseName(xmlParserCtxtPtr ctxt) { 2299 const xmlChar *in; 2300 const xmlChar *ret; 2301 int count = 0; 2302 2303 GROW; 2304 2305 /* 2306 * Accelerator for simple ASCII names 2307 */ 2308 in = ctxt->input->cur; 2309 if (((*in >= 0x61) && (*in <= 0x7A)) || 2310 ((*in >= 0x41) && (*in <= 0x5A)) || 2311 (*in == '_') || (*in == ':')) { 2312 in++; 2313 while (((*in >= 0x61) && (*in <= 0x7A)) || 2314 ((*in >= 0x41) && (*in <= 0x5A)) || 2315 ((*in >= 0x30) && (*in <= 0x39)) || 2316 (*in == '_') || (*in == '-') || 2317 (*in == ':') || (*in == '.')) 2318 in++; 2319 if ((*in > 0) && (*in < 0x80)) { 2320 count = in - ctxt->input->cur; 2321 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2322 ctxt->input->cur = in; 2323 ctxt->nbChars += count; 2324 ctxt->input->col += count; 2325 if (ret == NULL) 2326 xmlErrMemory(ctxt, NULL); 2327 return(ret); 2328 } 2329 } 2330 return(xmlParseNameComplex(ctxt)); 2331} 2332 2333/** 2334 * xmlParseNameAndCompare: 2335 * @ctxt: an XML parser context 2336 * 2337 * parse an XML name and compares for match 2338 * (specialized for endtag parsing) 2339 * 2340 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2341 * and the name for mismatch 2342 */ 2343 2344static const xmlChar * 2345xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2346 register const xmlChar *cmp = other; 2347 register const xmlChar *in; 2348 const xmlChar *ret; 2349 2350 GROW; 2351 2352 in = ctxt->input->cur; 2353 while (*in != 0 && *in == *cmp) { 2354 ++in; 2355 ++cmp; 2356 } 2357 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2358 /* success */ 2359 ctxt->input->cur = in; 2360 return (const xmlChar*) 1; 2361 } 2362 /* failure (or end of input buffer), check with full function */ 2363 ret = xmlParseName (ctxt); 2364 /* strings coming from the dictionnary direct compare possible */ 2365 if (ret == other) { 2366 return (const xmlChar*) 1; 2367 } 2368 return ret; 2369} 2370 2371static const xmlChar * 2372xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2373 int len = 0, l; 2374 int c; 2375 int count = 0; 2376 2377 /* 2378 * Handler for more complex cases 2379 */ 2380 GROW; 2381 c = CUR_CHAR(l); 2382 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2383 (!IS_LETTER(c) && (c != '_') && 2384 (c != ':'))) { 2385 return(NULL); 2386 } 2387 2388 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2389 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2390 (c == '.') || (c == '-') || 2391 (c == '_') || (c == ':') || 2392 (IS_COMBINING(c)) || 2393 (IS_EXTENDER(c)))) { 2394 if (count++ > 100) { 2395 count = 0; 2396 GROW; 2397 } 2398 len += l; 2399 NEXTL(l); 2400 c = CUR_CHAR(l); 2401 } 2402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2403} 2404 2405/** 2406 * xmlParseStringName: 2407 * @ctxt: an XML parser context 2408 * @str: a pointer to the string pointer (IN/OUT) 2409 * 2410 * parse an XML name. 2411 * 2412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2413 * CombiningChar | Extender 2414 * 2415 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2416 * 2417 * [6] Names ::= Name (S Name)* 2418 * 2419 * Returns the Name parsed or NULL. The @str pointer 2420 * is updated to the current location in the string. 2421 */ 2422 2423static xmlChar * 2424xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2425 xmlChar buf[XML_MAX_NAMELEN + 5]; 2426 const xmlChar *cur = *str; 2427 int len = 0, l; 2428 int c; 2429 2430 c = CUR_SCHAR(cur, l); 2431 if (!IS_LETTER(c) && (c != '_') && 2432 (c != ':')) { 2433 return(NULL); 2434 } 2435 2436 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2437 (c == '.') || (c == '-') || 2438 (c == '_') || (c == ':') || 2439 (IS_COMBINING(c)) || 2440 (IS_EXTENDER(c))) { 2441 COPY_BUF(l,buf,len,c); 2442 cur += l; 2443 c = CUR_SCHAR(cur, l); 2444 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2445 /* 2446 * Okay someone managed to make a huge name, so he's ready to pay 2447 * for the processing speed. 2448 */ 2449 xmlChar *buffer; 2450 int max = len * 2; 2451 2452 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2453 if (buffer == NULL) { 2454 xmlErrMemory(ctxt, NULL); 2455 return(NULL); 2456 } 2457 memcpy(buffer, buf, len); 2458 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2459 /* test bigentname.xml */ 2460 (c == '.') || (c == '-') || 2461 (c == '_') || (c == ':') || 2462 (IS_COMBINING(c)) || 2463 (IS_EXTENDER(c))) { 2464 if (len + 10 > max) { 2465 max *= 2; 2466 buffer = (xmlChar *) xmlRealloc(buffer, 2467 max * sizeof(xmlChar)); 2468 if (buffer == NULL) { 2469 xmlErrMemory(ctxt, NULL); 2470 return(NULL); 2471 } 2472 } 2473 COPY_BUF(l,buffer,len,c); 2474 cur += l; 2475 c = CUR_SCHAR(cur, l); 2476 } 2477 buffer[len] = 0; 2478 *str = cur; 2479 return(buffer); 2480 } 2481 } 2482 *str = cur; 2483 return(xmlStrndup(buf, len)); 2484} 2485 2486/** 2487 * xmlParseNmtoken: 2488 * @ctxt: an XML parser context 2489 * 2490 * parse an XML Nmtoken. 2491 * 2492 * [7] Nmtoken ::= (NameChar)+ 2493 * 2494 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2495 * 2496 * Returns the Nmtoken parsed or NULL 2497 */ 2498 2499xmlChar * 2500xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2501 xmlChar buf[XML_MAX_NAMELEN + 5]; 2502 int len = 0, l; 2503 int c; 2504 int count = 0; 2505 2506 GROW; 2507 c = CUR_CHAR(l); 2508 2509 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2510 (c == '.') || (c == '-') || 2511 (c == '_') || (c == ':') || 2512 (IS_COMBINING(c)) || 2513 (IS_EXTENDER(c))) { 2514 if (count++ > 100) { 2515 count = 0; 2516 GROW; 2517 } 2518 COPY_BUF(l,buf,len,c); 2519 NEXTL(l); 2520 c = CUR_CHAR(l); 2521 if (len >= XML_MAX_NAMELEN) { 2522 /* 2523 * Okay someone managed to make a huge token, so he's ready to pay 2524 * for the processing speed. 2525 */ 2526 xmlChar *buffer; 2527 int max = len * 2; 2528 2529 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2530 if (buffer == NULL) { 2531 xmlErrMemory(ctxt, NULL); 2532 return(NULL); 2533 } 2534 memcpy(buffer, buf, len); 2535 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2536 (c == '.') || (c == '-') || 2537 (c == '_') || (c == ':') || 2538 (IS_COMBINING(c)) || 2539 (IS_EXTENDER(c))) { 2540 if (count++ > 100) { 2541 count = 0; 2542 GROW; 2543 } 2544 if (len + 10 > max) { 2545 max *= 2; 2546 buffer = (xmlChar *) xmlRealloc(buffer, 2547 max * sizeof(xmlChar)); 2548 if (buffer == NULL) { 2549 xmlErrMemory(ctxt, NULL); 2550 return(NULL); 2551 } 2552 } 2553 COPY_BUF(l,buffer,len,c); 2554 NEXTL(l); 2555 c = CUR_CHAR(l); 2556 } 2557 buffer[len] = 0; 2558 return(buffer); 2559 } 2560 } 2561 if (len == 0) 2562 return(NULL); 2563 return(xmlStrndup(buf, len)); 2564} 2565 2566/** 2567 * xmlParseEntityValue: 2568 * @ctxt: an XML parser context 2569 * @orig: if non-NULL store a copy of the original entity value 2570 * 2571 * parse a value for ENTITY declarations 2572 * 2573 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2574 * "'" ([^%&'] | PEReference | Reference)* "'" 2575 * 2576 * Returns the EntityValue parsed with reference substituted or NULL 2577 */ 2578 2579xmlChar * 2580xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2581 xmlChar *buf = NULL; 2582 int len = 0; 2583 int size = XML_PARSER_BUFFER_SIZE; 2584 int c, l; 2585 xmlChar stop; 2586 xmlChar *ret = NULL; 2587 const xmlChar *cur = NULL; 2588 xmlParserInputPtr input; 2589 2590 if (RAW == '"') stop = '"'; 2591 else if (RAW == '\'') stop = '\''; 2592 else { 2593 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 2594 return(NULL); 2595 } 2596 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 2597 if (buf == NULL) { 2598 xmlErrMemory(ctxt, NULL); 2599 return(NULL); 2600 } 2601 2602 /* 2603 * The content of the entity definition is copied in a buffer. 2604 */ 2605 2606 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2607 input = ctxt->input; 2608 GROW; 2609 NEXT; 2610 c = CUR_CHAR(l); 2611 /* 2612 * NOTE: 4.4.5 Included in Literal 2613 * When a parameter entity reference appears in a literal entity 2614 * value, ... a single or double quote character in the replacement 2615 * text is always treated as a normal data character and will not 2616 * terminate the literal. 2617 * In practice it means we stop the loop only when back at parsing 2618 * the initial entity and the quote is found 2619 */ 2620 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2621 (ctxt->input != input))) { 2622 if (len + 5 >= size) { 2623 size *= 2; 2624 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2625 if (buf == NULL) { 2626 xmlErrMemory(ctxt, NULL); 2627 return(NULL); 2628 } 2629 } 2630 COPY_BUF(l,buf,len,c); 2631 NEXTL(l); 2632 /* 2633 * Pop-up of finished entities. 2634 */ 2635 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2636 xmlPopInput(ctxt); 2637 2638 GROW; 2639 c = CUR_CHAR(l); 2640 if (c == 0) { 2641 GROW; 2642 c = CUR_CHAR(l); 2643 } 2644 } 2645 buf[len] = 0; 2646 2647 /* 2648 * Raise problem w.r.t. '&' and '%' being used in non-entities 2649 * reference constructs. Note Charref will be handled in 2650 * xmlStringDecodeEntities() 2651 */ 2652 cur = buf; 2653 while (*cur != 0) { /* non input consuming */ 2654 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2655 xmlChar *name; 2656 xmlChar tmp = *cur; 2657 2658 cur++; 2659 name = xmlParseStringName(ctxt, &cur); 2660 if ((name == NULL) || (*cur != ';')) { 2661 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 2662 "EntityValue: '%c' forbidden except for entities references\n", 2663 tmp); 2664 } 2665 if ((tmp == '%') && (ctxt->inSubset == 1) && 2666 (ctxt->inputNr == 1)) { 2667 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 2668 } 2669 if (name != NULL) 2670 xmlFree(name); 2671 if (*cur == 0) 2672 break; 2673 } 2674 cur++; 2675 } 2676 2677 /* 2678 * Then PEReference entities are substituted. 2679 */ 2680 if (c != stop) { 2681 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 2682 xmlFree(buf); 2683 } else { 2684 NEXT; 2685 /* 2686 * NOTE: 4.4.7 Bypassed 2687 * When a general entity reference appears in the EntityValue in 2688 * an entity declaration, it is bypassed and left as is. 2689 * so XML_SUBSTITUTE_REF is not set here. 2690 */ 2691 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2692 0, 0, 0); 2693 if (orig != NULL) 2694 *orig = buf; 2695 else 2696 xmlFree(buf); 2697 } 2698 2699 return(ret); 2700} 2701 2702/** 2703 * xmlParseAttValueComplex: 2704 * @ctxt: an XML parser context 2705 * @len: the resulting attribute len 2706 * @normalize: wether to apply the inner normalization 2707 * 2708 * parse a value for an attribute, this is the fallback function 2709 * of xmlParseAttValue() when the attribute parsing requires handling 2710 * of non-ASCII characters, or normalization compaction. 2711 * 2712 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2713 */ 2714static xmlChar * 2715xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 2716 xmlChar limit = 0; 2717 xmlChar *buf = NULL; 2718 int len = 0; 2719 int buf_size = 0; 2720 int c, l, in_space = 0; 2721 xmlChar *current = NULL; 2722 xmlEntityPtr ent; 2723 2724 if (NXT(0) == '"') { 2725 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2726 limit = '"'; 2727 NEXT; 2728 } else if (NXT(0) == '\'') { 2729 limit = '\''; 2730 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2731 NEXT; 2732 } else { 2733 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 2734 return(NULL); 2735 } 2736 2737 /* 2738 * allocate a translation buffer. 2739 */ 2740 buf_size = XML_PARSER_BUFFER_SIZE; 2741 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 2742 if (buf == NULL) goto mem_error; 2743 2744 /* 2745 * OK loop until we reach one of the ending char or a size limit. 2746 */ 2747 c = CUR_CHAR(l); 2748 while ((NXT(0) != limit) && /* checked */ 2749 (c != '<')) { 2750 if (c == 0) break; 2751 if (c == '&') { 2752 in_space = 0; 2753 if (NXT(1) == '#') { 2754 int val = xmlParseCharRef(ctxt); 2755 2756 if (val == '&') { 2757 if (ctxt->replaceEntities) { 2758 if (len > buf_size - 10) { 2759 growBuffer(buf); 2760 } 2761 buf[len++] = '&'; 2762 } else { 2763 /* 2764 * The reparsing will be done in xmlStringGetNodeList() 2765 * called by the attribute() function in SAX.c 2766 */ 2767 if (len > buf_size - 10) { 2768 growBuffer(buf); 2769 } 2770 buf[len++] = '&'; 2771 buf[len++] = '#'; 2772 buf[len++] = '3'; 2773 buf[len++] = '8'; 2774 buf[len++] = ';'; 2775 } 2776 } else { 2777 if (len > buf_size - 10) { 2778 growBuffer(buf); 2779 } 2780 len += xmlCopyChar(0, &buf[len], val); 2781 } 2782 } else { 2783 ent = xmlParseEntityRef(ctxt); 2784 if ((ent != NULL) && 2785 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2786 if (len > buf_size - 10) { 2787 growBuffer(buf); 2788 } 2789 if ((ctxt->replaceEntities == 0) && 2790 (ent->content[0] == '&')) { 2791 buf[len++] = '&'; 2792 buf[len++] = '#'; 2793 buf[len++] = '3'; 2794 buf[len++] = '8'; 2795 buf[len++] = ';'; 2796 } else { 2797 buf[len++] = ent->content[0]; 2798 } 2799 } else if ((ent != NULL) && 2800 (ctxt->replaceEntities != 0)) { 2801 xmlChar *rep; 2802 2803 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2804 rep = xmlStringDecodeEntities(ctxt, ent->content, 2805 XML_SUBSTITUTE_REF, 2806 0, 0, 0); 2807 if (rep != NULL) { 2808 current = rep; 2809 while (*current != 0) { /* non input consuming */ 2810 buf[len++] = *current++; 2811 if (len > buf_size - 10) { 2812 growBuffer(buf); 2813 } 2814 } 2815 xmlFree(rep); 2816 } 2817 } else { 2818 if (len > buf_size - 10) { 2819 growBuffer(buf); 2820 } 2821 if (ent->content != NULL) 2822 buf[len++] = ent->content[0]; 2823 } 2824 } else if (ent != NULL) { 2825 int i = xmlStrlen(ent->name); 2826 const xmlChar *cur = ent->name; 2827 2828 /* 2829 * This may look absurd but is needed to detect 2830 * entities problems 2831 */ 2832 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2833 (ent->content != NULL)) { 2834 xmlChar *rep; 2835 rep = xmlStringDecodeEntities(ctxt, ent->content, 2836 XML_SUBSTITUTE_REF, 0, 0, 0); 2837 if (rep != NULL) 2838 xmlFree(rep); 2839 } 2840 2841 /* 2842 * Just output the reference 2843 */ 2844 buf[len++] = '&'; 2845 if (len > buf_size - i - 10) { 2846 growBuffer(buf); 2847 } 2848 for (;i > 0;i--) 2849 buf[len++] = *cur++; 2850 buf[len++] = ';'; 2851 } 2852 } 2853 } else { 2854 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2855 if ((len != 0) || (!normalize)) { 2856 if ((!normalize) || (!in_space)) { 2857 COPY_BUF(l,buf,len,0x20); 2858 if (len > buf_size - 10) { 2859 growBuffer(buf); 2860 } 2861 } 2862 in_space = 1; 2863 } 2864 } else { 2865 in_space = 0; 2866 COPY_BUF(l,buf,len,c); 2867 if (len > buf_size - 10) { 2868 growBuffer(buf); 2869 } 2870 } 2871 NEXTL(l); 2872 } 2873 GROW; 2874 c = CUR_CHAR(l); 2875 } 2876 if ((in_space) && (normalize)) { 2877 while (buf[len - 1] == 0x20) len--; 2878 } 2879 buf[len] = 0; 2880 if (RAW == '<') { 2881 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 2882 } else if (RAW != limit) { 2883 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 2884 "AttValue: ' expected\n"); 2885 } else 2886 NEXT; 2887 if (attlen != NULL) *attlen = len; 2888 return(buf); 2889 2890mem_error: 2891 xmlErrMemory(ctxt, NULL); 2892 return(NULL); 2893} 2894 2895/** 2896 * xmlParseAttValue: 2897 * @ctxt: an XML parser context 2898 * 2899 * parse a value for an attribute 2900 * Note: the parser won't do substitution of entities here, this 2901 * will be handled later in xmlStringGetNodeList 2902 * 2903 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2904 * "'" ([^<&'] | Reference)* "'" 2905 * 2906 * 3.3.3 Attribute-Value Normalization: 2907 * Before the value of an attribute is passed to the application or 2908 * checked for validity, the XML processor must normalize it as follows: 2909 * - a character reference is processed by appending the referenced 2910 * character to the attribute value 2911 * - an entity reference is processed by recursively processing the 2912 * replacement text of the entity 2913 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2914 * appending #x20 to the normalized value, except that only a single 2915 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2916 * parsed entity or the literal entity value of an internal parsed entity 2917 * - other characters are processed by appending them to the normalized value 2918 * If the declared value is not CDATA, then the XML processor must further 2919 * process the normalized attribute value by discarding any leading and 2920 * trailing space (#x20) characters, and by replacing sequences of space 2921 * (#x20) characters by a single space (#x20) character. 2922 * All attributes for which no declaration has been read should be treated 2923 * by a non-validating parser as if declared CDATA. 2924 * 2925 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2926 */ 2927 2928 2929xmlChar * 2930xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2931 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 2932} 2933 2934/** 2935 * xmlParseSystemLiteral: 2936 * @ctxt: an XML parser context 2937 * 2938 * parse an XML Literal 2939 * 2940 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2941 * 2942 * Returns the SystemLiteral parsed or NULL 2943 */ 2944 2945xmlChar * 2946xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2947 xmlChar *buf = NULL; 2948 int len = 0; 2949 int size = XML_PARSER_BUFFER_SIZE; 2950 int cur, l; 2951 xmlChar stop; 2952 int state = ctxt->instate; 2953 int count = 0; 2954 2955 SHRINK; 2956 if (RAW == '"') { 2957 NEXT; 2958 stop = '"'; 2959 } else if (RAW == '\'') { 2960 NEXT; 2961 stop = '\''; 2962 } else { 2963 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 2964 return(NULL); 2965 } 2966 2967 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 2968 if (buf == NULL) { 2969 xmlErrMemory(ctxt, NULL); 2970 return(NULL); 2971 } 2972 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 2973 cur = CUR_CHAR(l); 2974 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 2975 if (len + 5 >= size) { 2976 size *= 2; 2977 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2978 if (buf == NULL) { 2979 xmlErrMemory(ctxt, NULL); 2980 ctxt->instate = (xmlParserInputState) state; 2981 return(NULL); 2982 } 2983 } 2984 count++; 2985 if (count > 50) { 2986 GROW; 2987 count = 0; 2988 } 2989 COPY_BUF(l,buf,len,cur); 2990 NEXTL(l); 2991 cur = CUR_CHAR(l); 2992 if (cur == 0) { 2993 GROW; 2994 SHRINK; 2995 cur = CUR_CHAR(l); 2996 } 2997 } 2998 buf[len] = 0; 2999 ctxt->instate = (xmlParserInputState) state; 3000 if (!IS_CHAR(cur)) { 3001 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3002 } else { 3003 NEXT; 3004 } 3005 return(buf); 3006} 3007 3008/** 3009 * xmlParsePubidLiteral: 3010 * @ctxt: an XML parser context 3011 * 3012 * parse an XML public literal 3013 * 3014 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3015 * 3016 * Returns the PubidLiteral parsed or NULL. 3017 */ 3018 3019xmlChar * 3020xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3021 xmlChar *buf = NULL; 3022 int len = 0; 3023 int size = XML_PARSER_BUFFER_SIZE; 3024 xmlChar cur; 3025 xmlChar stop; 3026 int count = 0; 3027 xmlParserInputState oldstate = ctxt->instate; 3028 3029 SHRINK; 3030 if (RAW == '"') { 3031 NEXT; 3032 stop = '"'; 3033 } else if (RAW == '\'') { 3034 NEXT; 3035 stop = '\''; 3036 } else { 3037 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3038 return(NULL); 3039 } 3040 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3041 if (buf == NULL) { 3042 xmlErrMemory(ctxt, NULL); 3043 return(NULL); 3044 } 3045 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3046 cur = CUR; 3047 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3048 if (len + 1 >= size) { 3049 size *= 2; 3050 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3051 if (buf == NULL) { 3052 xmlErrMemory(ctxt, NULL); 3053 return(NULL); 3054 } 3055 } 3056 buf[len++] = cur; 3057 count++; 3058 if (count > 50) { 3059 GROW; 3060 count = 0; 3061 } 3062 NEXT; 3063 cur = CUR; 3064 if (cur == 0) { 3065 GROW; 3066 SHRINK; 3067 cur = CUR; 3068 } 3069 } 3070 buf[len] = 0; 3071 if (cur != stop) { 3072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3073 } else { 3074 NEXT; 3075 } 3076 ctxt->instate = oldstate; 3077 return(buf); 3078} 3079 3080void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3081/** 3082 * xmlParseCharData: 3083 * @ctxt: an XML parser context 3084 * @cdata: int indicating whether we are within a CDATA section 3085 * 3086 * parse a CharData section. 3087 * if we are within a CDATA section ']]>' marks an end of section. 3088 * 3089 * The right angle bracket (>) may be represented using the string ">", 3090 * and must, for compatibility, be escaped using ">" or a character 3091 * reference when it appears in the string "]]>" in content, when that 3092 * string is not marking the end of a CDATA section. 3093 * 3094 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3095 */ 3096 3097void 3098xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3099 const xmlChar *in; 3100 int nbchar = 0; 3101 int line = ctxt->input->line; 3102 int col = ctxt->input->col; 3103 3104 SHRINK; 3105 GROW; 3106 /* 3107 * Accelerated common case where input don't need to be 3108 * modified before passing it to the handler. 3109 */ 3110 if (!cdata) { 3111 in = ctxt->input->cur; 3112 do { 3113get_more_space: 3114 while (*in == 0x20) in++; 3115 if (*in == 0xA) { 3116 ctxt->input->line++; 3117 in++; 3118 while (*in == 0xA) { 3119 ctxt->input->line++; 3120 in++; 3121 } 3122 goto get_more_space; 3123 } 3124 if (*in == '<') { 3125 nbchar = in - ctxt->input->cur; 3126 if (nbchar > 0) { 3127 const xmlChar *tmp = ctxt->input->cur; 3128 ctxt->input->cur = in; 3129 3130 if (ctxt->sax->ignorableWhitespace != 3131 ctxt->sax->characters) { 3132 if (areBlanks(ctxt, tmp, nbchar, 1)) { 3133 ctxt->sax->ignorableWhitespace(ctxt->userData, 3134 tmp, nbchar); 3135 } else if (ctxt->sax->characters != NULL) 3136 ctxt->sax->characters(ctxt->userData, 3137 tmp, nbchar); 3138 } else if (ctxt->sax->characters != NULL) { 3139 ctxt->sax->characters(ctxt->userData, 3140 tmp, nbchar); 3141 } 3142 } 3143 return; 3144 } 3145get_more: 3146 while (((*in > ']') && (*in <= 0x7F)) || 3147 ((*in > '&') && (*in < '<')) || 3148 ((*in > '<') && (*in < ']')) || 3149 ((*in >= 0x20) && (*in < '&')) || 3150 (*in == 0x09)) 3151 in++; 3152 if (*in == 0xA) { 3153 ctxt->input->line++; 3154 in++; 3155 while (*in == 0xA) { 3156 ctxt->input->line++; 3157 in++; 3158 } 3159 goto get_more; 3160 } 3161 if (*in == ']') { 3162 if ((in[1] == ']') && (in[2] == '>')) { 3163 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3164 ctxt->input->cur = in; 3165 return; 3166 } 3167 in++; 3168 goto get_more; 3169 } 3170 nbchar = in - ctxt->input->cur; 3171 if (nbchar > 0) { 3172 if ((ctxt->sax->ignorableWhitespace != 3173 ctxt->sax->characters) && 3174 (IS_BLANK_CH(*ctxt->input->cur))) { 3175 const xmlChar *tmp = ctxt->input->cur; 3176 ctxt->input->cur = in; 3177 3178 if (areBlanks(ctxt, tmp, nbchar, 0)) { 3179 ctxt->sax->ignorableWhitespace(ctxt->userData, 3180 tmp, nbchar); 3181 } else if (ctxt->sax->characters != NULL) 3182 ctxt->sax->characters(ctxt->userData, 3183 tmp, nbchar); 3184 line = ctxt->input->line; 3185 col = ctxt->input->col; 3186 } else { 3187 if (ctxt->sax->characters != NULL) 3188 ctxt->sax->characters(ctxt->userData, 3189 ctxt->input->cur, nbchar); 3190 line = ctxt->input->line; 3191 col = ctxt->input->col; 3192 } 3193 } 3194 ctxt->input->cur = in; 3195 if (*in == 0xD) { 3196 in++; 3197 if (*in == 0xA) { 3198 ctxt->input->cur = in; 3199 in++; 3200 ctxt->input->line++; 3201 continue; /* while */ 3202 } 3203 in--; 3204 } 3205 if (*in == '<') { 3206 return; 3207 } 3208 if (*in == '&') { 3209 return; 3210 } 3211 SHRINK; 3212 GROW; 3213 in = ctxt->input->cur; 3214 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3215 nbchar = 0; 3216 } 3217 ctxt->input->line = line; 3218 ctxt->input->col = col; 3219 xmlParseCharDataComplex(ctxt, cdata); 3220} 3221 3222/** 3223 * xmlParseCharDataComplex: 3224 * @ctxt: an XML parser context 3225 * @cdata: int indicating whether we are within a CDATA section 3226 * 3227 * parse a CharData section.this is the fallback function 3228 * of xmlParseCharData() when the parsing requires handling 3229 * of non-ASCII characters. 3230 */ 3231void 3232xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3233 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3234 int nbchar = 0; 3235 int cur, l; 3236 int count = 0; 3237 3238 SHRINK; 3239 GROW; 3240 cur = CUR_CHAR(l); 3241 while ((cur != '<') && /* checked */ 3242 (cur != '&') && 3243 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3244 if ((cur == ']') && (NXT(1) == ']') && 3245 (NXT(2) == '>')) { 3246 if (cdata) break; 3247 else { 3248 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3249 } 3250 } 3251 COPY_BUF(l,buf,nbchar,cur); 3252 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3253 buf[nbchar] = 0; 3254 3255 /* 3256 * OK the segment is to be consumed as chars. 3257 */ 3258 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3259 if (areBlanks(ctxt, buf, nbchar, 0)) { 3260 if (ctxt->sax->ignorableWhitespace != NULL) 3261 ctxt->sax->ignorableWhitespace(ctxt->userData, 3262 buf, nbchar); 3263 } else { 3264 if (ctxt->sax->characters != NULL) 3265 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3266 } 3267 } 3268 nbchar = 0; 3269 } 3270 count++; 3271 if (count > 50) { 3272 GROW; 3273 count = 0; 3274 } 3275 NEXTL(l); 3276 cur = CUR_CHAR(l); 3277 } 3278 if (nbchar != 0) { 3279 buf[nbchar] = 0; 3280 /* 3281 * OK the segment is to be consumed as chars. 3282 */ 3283 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3284 if (areBlanks(ctxt, buf, nbchar, 0)) { 3285 if (ctxt->sax->ignorableWhitespace != NULL) 3286 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3287 } else { 3288 if (ctxt->sax->characters != NULL) 3289 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3290 } 3291 } 3292 } 3293} 3294 3295/** 3296 * xmlParseExternalID: 3297 * @ctxt: an XML parser context 3298 * @publicID: a xmlChar** receiving PubidLiteral 3299 * @strict: indicate whether we should restrict parsing to only 3300 * production [75], see NOTE below 3301 * 3302 * Parse an External ID or a Public ID 3303 * 3304 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3305 * 'PUBLIC' S PubidLiteral S SystemLiteral 3306 * 3307 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3308 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3309 * 3310 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3311 * 3312 * Returns the function returns SystemLiteral and in the second 3313 * case publicID receives PubidLiteral, is strict is off 3314 * it is possible to return NULL and have publicID set. 3315 */ 3316 3317xmlChar * 3318xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3319 xmlChar *URI = NULL; 3320 3321 SHRINK; 3322 3323 *publicID = NULL; 3324 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3325 SKIP(6); 3326 if (!IS_BLANK_CH(CUR)) { 3327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3328 "Space required after 'SYSTEM'\n"); 3329 } 3330 SKIP_BLANKS; 3331 URI = xmlParseSystemLiteral(ctxt); 3332 if (URI == NULL) { 3333 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3334 } 3335 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3336 SKIP(6); 3337 if (!IS_BLANK_CH(CUR)) { 3338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3339 "Space required after 'PUBLIC'\n"); 3340 } 3341 SKIP_BLANKS; 3342 *publicID = xmlParsePubidLiteral(ctxt); 3343 if (*publicID == NULL) { 3344 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3345 } 3346 if (strict) { 3347 /* 3348 * We don't handle [83] so "S SystemLiteral" is required. 3349 */ 3350 if (!IS_BLANK_CH(CUR)) { 3351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3352 "Space required after the Public Identifier\n"); 3353 } 3354 } else { 3355 /* 3356 * We handle [83] so we return immediately, if 3357 * "S SystemLiteral" is not detected. From a purely parsing 3358 * point of view that's a nice mess. 3359 */ 3360 const xmlChar *ptr; 3361 GROW; 3362 3363 ptr = CUR_PTR; 3364 if (!IS_BLANK_CH(*ptr)) return(NULL); 3365 3366 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3367 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3368 } 3369 SKIP_BLANKS; 3370 URI = xmlParseSystemLiteral(ctxt); 3371 if (URI == NULL) { 3372 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3373 } 3374 } 3375 return(URI); 3376} 3377 3378/** 3379 * xmlParseComment: 3380 * @ctxt: an XML parser context 3381 * 3382 * Skip an XML (SGML) comment <!-- .... --> 3383 * The spec says that "For compatibility, the string "--" (double-hyphen) 3384 * must not occur within comments. " 3385 * 3386 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3387 */ 3388void 3389xmlParseComment(xmlParserCtxtPtr ctxt) { 3390 xmlChar *buf = NULL; 3391 int len; 3392 int size = XML_PARSER_BUFFER_SIZE; 3393 int q, ql; 3394 int r, rl; 3395 int cur, l; 3396 xmlParserInputState state; 3397 xmlParserInputPtr input = ctxt->input; 3398 int count = 0; 3399 3400 /* 3401 * Check that there is a comment right here. 3402 */ 3403 if ((RAW != '<') || (NXT(1) != '!') || 3404 (NXT(2) != '-') || (NXT(3) != '-')) return; 3405 3406 state = ctxt->instate; 3407 ctxt->instate = XML_PARSER_COMMENT; 3408 SHRINK; 3409 SKIP(4); 3410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3411 if (buf == NULL) { 3412 xmlErrMemory(ctxt, NULL); 3413 ctxt->instate = state; 3414 return; 3415 } 3416 q = CUR_CHAR(ql); 3417 if (q == 0) 3418 goto not_terminated; 3419 NEXTL(ql); 3420 r = CUR_CHAR(rl); 3421 if (r == 0) 3422 goto not_terminated; 3423 NEXTL(rl); 3424 cur = CUR_CHAR(l); 3425 if (cur == 0) 3426 goto not_terminated; 3427 len = 0; 3428 while (IS_CHAR(cur) && /* checked */ 3429 ((cur != '>') || 3430 (r != '-') || (q != '-'))) { 3431 if ((r == '-') && (q == '-')) { 3432 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 3433 } 3434 if (len + 5 >= size) { 3435 size *= 2; 3436 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3437 if (buf == NULL) { 3438 xmlErrMemory(ctxt, NULL); 3439 ctxt->instate = state; 3440 return; 3441 } 3442 } 3443 COPY_BUF(ql,buf,len,q); 3444 q = r; 3445 ql = rl; 3446 r = cur; 3447 rl = l; 3448 3449 count++; 3450 if (count > 50) { 3451 GROW; 3452 count = 0; 3453 } 3454 NEXTL(l); 3455 cur = CUR_CHAR(l); 3456 if (cur == 0) { 3457 SHRINK; 3458 GROW; 3459 cur = CUR_CHAR(l); 3460 } 3461 } 3462 buf[len] = 0; 3463 if (!IS_CHAR(cur)) { 3464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3465 "Comment not terminated \n<!--%.50s\n", buf); 3466 xmlFree(buf); 3467 } else { 3468 if (input != ctxt->input) { 3469 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3470 "Comment doesn't start and stop in the same entity\n"); 3471 } 3472 NEXT; 3473 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3474 (!ctxt->disableSAX)) 3475 ctxt->sax->comment(ctxt->userData, buf); 3476 xmlFree(buf); 3477 } 3478 ctxt->instate = state; 3479 return; 3480not_terminated: 3481 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3482 "Comment not terminated\n", NULL); 3483 xmlFree(buf); 3484} 3485 3486/** 3487 * xmlParsePITarget: 3488 * @ctxt: an XML parser context 3489 * 3490 * parse the name of a PI 3491 * 3492 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3493 * 3494 * Returns the PITarget name or NULL 3495 */ 3496 3497const xmlChar * 3498xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3499 const xmlChar *name; 3500 3501 name = xmlParseName(ctxt); 3502 if ((name != NULL) && 3503 ((name[0] == 'x') || (name[0] == 'X')) && 3504 ((name[1] == 'm') || (name[1] == 'M')) && 3505 ((name[2] == 'l') || (name[2] == 'L'))) { 3506 int i; 3507 if ((name[0] == 'x') && (name[1] == 'm') && 3508 (name[2] == 'l') && (name[3] == 0)) { 3509 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3510 "XML declaration allowed only at the start of the document\n"); 3511 return(name); 3512 } else if (name[3] == 0) { 3513 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 3514 return(name); 3515 } 3516 for (i = 0;;i++) { 3517 if (xmlW3CPIs[i] == NULL) break; 3518 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3519 return(name); 3520 } 3521 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3522 "xmlParsePITarget: invalid name prefix 'xml'\n", 3523 NULL, NULL); 3524 } 3525 return(name); 3526} 3527 3528#ifdef LIBXML_CATALOG_ENABLED 3529/** 3530 * xmlParseCatalogPI: 3531 * @ctxt: an XML parser context 3532 * @catalog: the PI value string 3533 * 3534 * parse an XML Catalog Processing Instruction. 3535 * 3536 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3537 * 3538 * Occurs only if allowed by the user and if happening in the Misc 3539 * part of the document before any doctype informations 3540 * This will add the given catalog to the parsing context in order 3541 * to be used if there is a resolution need further down in the document 3542 */ 3543 3544static void 3545xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3546 xmlChar *URL = NULL; 3547 const xmlChar *tmp, *base; 3548 xmlChar marker; 3549 3550 tmp = catalog; 3551 while (IS_BLANK_CH(*tmp)) tmp++; 3552 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3553 goto error; 3554 tmp += 7; 3555 while (IS_BLANK_CH(*tmp)) tmp++; 3556 if (*tmp != '=') { 3557 return; 3558 } 3559 tmp++; 3560 while (IS_BLANK_CH(*tmp)) tmp++; 3561 marker = *tmp; 3562 if ((marker != '\'') && (marker != '"')) 3563 goto error; 3564 tmp++; 3565 base = tmp; 3566 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3567 if (*tmp == 0) 3568 goto error; 3569 URL = xmlStrndup(base, tmp - base); 3570 tmp++; 3571 while (IS_BLANK_CH(*tmp)) tmp++; 3572 if (*tmp != 0) 3573 goto error; 3574 3575 if (URL != NULL) { 3576 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3577 xmlFree(URL); 3578 } 3579 return; 3580 3581error: 3582 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 3583 "Catalog PI syntax error: %s\n", 3584 catalog, NULL); 3585 if (URL != NULL) 3586 xmlFree(URL); 3587} 3588#endif 3589 3590/** 3591 * xmlParsePI: 3592 * @ctxt: an XML parser context 3593 * 3594 * parse an XML Processing Instruction. 3595 * 3596 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3597 * 3598 * The processing is transfered to SAX once parsed. 3599 */ 3600 3601void 3602xmlParsePI(xmlParserCtxtPtr ctxt) { 3603 xmlChar *buf = NULL; 3604 int len = 0; 3605 int size = XML_PARSER_BUFFER_SIZE; 3606 int cur, l; 3607 const xmlChar *target; 3608 xmlParserInputState state; 3609 int count = 0; 3610 3611 if ((RAW == '<') && (NXT(1) == '?')) { 3612 xmlParserInputPtr input = ctxt->input; 3613 state = ctxt->instate; 3614 ctxt->instate = XML_PARSER_PI; 3615 /* 3616 * this is a Processing Instruction. 3617 */ 3618 SKIP(2); 3619 SHRINK; 3620 3621 /* 3622 * Parse the target name and check for special support like 3623 * namespace. 3624 */ 3625 target = xmlParsePITarget(ctxt); 3626 if (target != NULL) { 3627 if ((RAW == '?') && (NXT(1) == '>')) { 3628 if (input != ctxt->input) { 3629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3630 "PI declaration doesn't start and stop in the same entity\n"); 3631 } 3632 SKIP(2); 3633 3634 /* 3635 * SAX: PI detected. 3636 */ 3637 if ((ctxt->sax) && (!ctxt->disableSAX) && 3638 (ctxt->sax->processingInstruction != NULL)) 3639 ctxt->sax->processingInstruction(ctxt->userData, 3640 target, NULL); 3641 ctxt->instate = state; 3642 return; 3643 } 3644 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3645 if (buf == NULL) { 3646 xmlErrMemory(ctxt, NULL); 3647 ctxt->instate = state; 3648 return; 3649 } 3650 cur = CUR; 3651 if (!IS_BLANK(cur)) { 3652 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 3653 "ParsePI: PI %s space expected\n", target); 3654 } 3655 SKIP_BLANKS; 3656 cur = CUR_CHAR(l); 3657 while (IS_CHAR(cur) && /* checked */ 3658 ((cur != '?') || (NXT(1) != '>'))) { 3659 if (len + 5 >= size) { 3660 size *= 2; 3661 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3662 if (buf == NULL) { 3663 xmlErrMemory(ctxt, NULL); 3664 ctxt->instate = state; 3665 return; 3666 } 3667 } 3668 count++; 3669 if (count > 50) { 3670 GROW; 3671 count = 0; 3672 } 3673 COPY_BUF(l,buf,len,cur); 3674 NEXTL(l); 3675 cur = CUR_CHAR(l); 3676 if (cur == 0) { 3677 SHRINK; 3678 GROW; 3679 cur = CUR_CHAR(l); 3680 } 3681 } 3682 buf[len] = 0; 3683 if (cur != '?') { 3684 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 3685 "ParsePI: PI %s never end ...\n", target); 3686 } else { 3687 if (input != ctxt->input) { 3688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3689 "PI declaration doesn't start and stop in the same entity\n"); 3690 } 3691 SKIP(2); 3692 3693#ifdef LIBXML_CATALOG_ENABLED 3694 if (((state == XML_PARSER_MISC) || 3695 (state == XML_PARSER_START)) && 3696 (xmlStrEqual(target, XML_CATALOG_PI))) { 3697 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3698 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3699 (allow == XML_CATA_ALLOW_ALL)) 3700 xmlParseCatalogPI(ctxt, buf); 3701 } 3702#endif 3703 3704 3705 /* 3706 * SAX: PI detected. 3707 */ 3708 if ((ctxt->sax) && (!ctxt->disableSAX) && 3709 (ctxt->sax->processingInstruction != NULL)) 3710 ctxt->sax->processingInstruction(ctxt->userData, 3711 target, buf); 3712 } 3713 xmlFree(buf); 3714 } else { 3715 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 3716 } 3717 ctxt->instate = state; 3718 } 3719} 3720 3721/** 3722 * xmlParseNotationDecl: 3723 * @ctxt: an XML parser context 3724 * 3725 * parse a notation declaration 3726 * 3727 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3728 * 3729 * Hence there is actually 3 choices: 3730 * 'PUBLIC' S PubidLiteral 3731 * 'PUBLIC' S PubidLiteral S SystemLiteral 3732 * and 'SYSTEM' S SystemLiteral 3733 * 3734 * See the NOTE on xmlParseExternalID(). 3735 */ 3736 3737void 3738xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3739 const xmlChar *name; 3740 xmlChar *Pubid; 3741 xmlChar *Systemid; 3742 3743 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 3744 xmlParserInputPtr input = ctxt->input; 3745 SHRINK; 3746 SKIP(10); 3747 if (!IS_BLANK_CH(CUR)) { 3748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3749 "Space required after '<!NOTATION'\n"); 3750 return; 3751 } 3752 SKIP_BLANKS; 3753 3754 name = xmlParseName(ctxt); 3755 if (name == NULL) { 3756 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 3757 return; 3758 } 3759 if (!IS_BLANK_CH(CUR)) { 3760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3761 "Space required after the NOTATION name'\n"); 3762 return; 3763 } 3764 SKIP_BLANKS; 3765 3766 /* 3767 * Parse the IDs. 3768 */ 3769 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3770 SKIP_BLANKS; 3771 3772 if (RAW == '>') { 3773 if (input != ctxt->input) { 3774 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3775 "Notation declaration doesn't start and stop in the same entity\n"); 3776 } 3777 NEXT; 3778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3779 (ctxt->sax->notationDecl != NULL)) 3780 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3781 } else { 3782 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 3783 } 3784 if (Systemid != NULL) xmlFree(Systemid); 3785 if (Pubid != NULL) xmlFree(Pubid); 3786 } 3787} 3788 3789/** 3790 * xmlParseEntityDecl: 3791 * @ctxt: an XML parser context 3792 * 3793 * parse <!ENTITY declarations 3794 * 3795 * [70] EntityDecl ::= GEDecl | PEDecl 3796 * 3797 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3798 * 3799 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3800 * 3801 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3802 * 3803 * [74] PEDef ::= EntityValue | ExternalID 3804 * 3805 * [76] NDataDecl ::= S 'NDATA' S Name 3806 * 3807 * [ VC: Notation Declared ] 3808 * The Name must match the declared name of a notation. 3809 */ 3810 3811void 3812xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3813 const xmlChar *name = NULL; 3814 xmlChar *value = NULL; 3815 xmlChar *URI = NULL, *literal = NULL; 3816 const xmlChar *ndata = NULL; 3817 int isParameter = 0; 3818 xmlChar *orig = NULL; 3819 int skipped; 3820 3821 GROW; 3822 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 3823 xmlParserInputPtr input = ctxt->input; 3824 SHRINK; 3825 SKIP(8); 3826 skipped = SKIP_BLANKS; 3827 if (skipped == 0) { 3828 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3829 "Space required after '<!ENTITY'\n"); 3830 } 3831 3832 if (RAW == '%') { 3833 NEXT; 3834 skipped = SKIP_BLANKS; 3835 if (skipped == 0) { 3836 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3837 "Space required after '%'\n"); 3838 } 3839 isParameter = 1; 3840 } 3841 3842 name = xmlParseName(ctxt); 3843 if (name == NULL) { 3844 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 3845 "xmlParseEntityDecl: no name\n"); 3846 return; 3847 } 3848 skipped = SKIP_BLANKS; 3849 if (skipped == 0) { 3850 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3851 "Space required after the entity name\n"); 3852 } 3853 3854 ctxt->instate = XML_PARSER_ENTITY_DECL; 3855 /* 3856 * handle the various case of definitions... 3857 */ 3858 if (isParameter) { 3859 if ((RAW == '"') || (RAW == '\'')) { 3860 value = xmlParseEntityValue(ctxt, &orig); 3861 if (value) { 3862 if ((ctxt->sax != NULL) && 3863 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3864 ctxt->sax->entityDecl(ctxt->userData, name, 3865 XML_INTERNAL_PARAMETER_ENTITY, 3866 NULL, NULL, value); 3867 } 3868 } else { 3869 URI = xmlParseExternalID(ctxt, &literal, 1); 3870 if ((URI == NULL) && (literal == NULL)) { 3871 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 3872 } 3873 if (URI) { 3874 xmlURIPtr uri; 3875 3876 uri = xmlParseURI((const char *) URI); 3877 if (uri == NULL) { 3878 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 3879 "Invalid URI: %s\n", URI); 3880 /* 3881 * This really ought to be a well formedness error 3882 * but the XML Core WG decided otherwise c.f. issue 3883 * E26 of the XML erratas. 3884 */ 3885 } else { 3886 if (uri->fragment != NULL) { 3887 /* 3888 * Okay this is foolish to block those but not 3889 * invalid URIs. 3890 */ 3891 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 3892 } else { 3893 if ((ctxt->sax != NULL) && 3894 (!ctxt->disableSAX) && 3895 (ctxt->sax->entityDecl != NULL)) 3896 ctxt->sax->entityDecl(ctxt->userData, name, 3897 XML_EXTERNAL_PARAMETER_ENTITY, 3898 literal, URI, NULL); 3899 } 3900 xmlFreeURI(uri); 3901 } 3902 } 3903 } 3904 } else { 3905 if ((RAW == '"') || (RAW == '\'')) { 3906 value = xmlParseEntityValue(ctxt, &orig); 3907 if ((ctxt->sax != NULL) && 3908 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3909 ctxt->sax->entityDecl(ctxt->userData, name, 3910 XML_INTERNAL_GENERAL_ENTITY, 3911 NULL, NULL, value); 3912 /* 3913 * For expat compatibility in SAX mode. 3914 */ 3915 if ((ctxt->myDoc == NULL) || 3916 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 3917 if (ctxt->myDoc == NULL) { 3918 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3919 } 3920 if (ctxt->myDoc->intSubset == NULL) 3921 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3922 BAD_CAST "fake", NULL, NULL); 3923 3924 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 3925 NULL, NULL, value); 3926 } 3927 } else { 3928 URI = xmlParseExternalID(ctxt, &literal, 1); 3929 if ((URI == NULL) && (literal == NULL)) { 3930 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 3931 } 3932 if (URI) { 3933 xmlURIPtr uri; 3934 3935 uri = xmlParseURI((const char *)URI); 3936 if (uri == NULL) { 3937 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 3938 "Invalid URI: %s\n", URI); 3939 /* 3940 * This really ought to be a well formedness error 3941 * but the XML Core WG decided otherwise c.f. issue 3942 * E26 of the XML erratas. 3943 */ 3944 } else { 3945 if (uri->fragment != NULL) { 3946 /* 3947 * Okay this is foolish to block those but not 3948 * invalid URIs. 3949 */ 3950 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 3951 } 3952 xmlFreeURI(uri); 3953 } 3954 } 3955 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 3956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3957 "Space required before 'NDATA'\n"); 3958 } 3959 SKIP_BLANKS; 3960 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 3961 SKIP(5); 3962 if (!IS_BLANK_CH(CUR)) { 3963 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3964 "Space required after 'NDATA'\n"); 3965 } 3966 SKIP_BLANKS; 3967 ndata = xmlParseName(ctxt); 3968 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3969 (ctxt->sax->unparsedEntityDecl != NULL)) 3970 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3971 literal, URI, ndata); 3972 } else { 3973 if ((ctxt->sax != NULL) && 3974 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3975 ctxt->sax->entityDecl(ctxt->userData, name, 3976 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3977 literal, URI, NULL); 3978 /* 3979 * For expat compatibility in SAX mode. 3980 * assuming the entity repalcement was asked for 3981 */ 3982 if ((ctxt->replaceEntities != 0) && 3983 ((ctxt->myDoc == NULL) || 3984 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 3985 if (ctxt->myDoc == NULL) { 3986 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 3987 } 3988 3989 if (ctxt->myDoc->intSubset == NULL) 3990 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 3991 BAD_CAST "fake", NULL, NULL); 3992 xmlSAX2EntityDecl(ctxt, name, 3993 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3994 literal, URI, NULL); 3995 } 3996 } 3997 } 3998 } 3999 SKIP_BLANKS; 4000 if (RAW != '>') { 4001 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4002 "xmlParseEntityDecl: entity %s not terminated\n", name); 4003 } else { 4004 if (input != ctxt->input) { 4005 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4006 "Entity declaration doesn't start and stop in the same entity\n"); 4007 } 4008 NEXT; 4009 } 4010 if (orig != NULL) { 4011 /* 4012 * Ugly mechanism to save the raw entity value. 4013 */ 4014 xmlEntityPtr cur = NULL; 4015 4016 if (isParameter) { 4017 if ((ctxt->sax != NULL) && 4018 (ctxt->sax->getParameterEntity != NULL)) 4019 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4020 } else { 4021 if ((ctxt->sax != NULL) && 4022 (ctxt->sax->getEntity != NULL)) 4023 cur = ctxt->sax->getEntity(ctxt->userData, name); 4024 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4025 cur = xmlSAX2GetEntity(ctxt, name); 4026 } 4027 } 4028 if (cur != NULL) { 4029 if (cur->orig != NULL) 4030 xmlFree(orig); 4031 else 4032 cur->orig = orig; 4033 } else 4034 xmlFree(orig); 4035 } 4036 if (value != NULL) xmlFree(value); 4037 if (URI != NULL) xmlFree(URI); 4038 if (literal != NULL) xmlFree(literal); 4039 } 4040} 4041 4042/** 4043 * xmlParseDefaultDecl: 4044 * @ctxt: an XML parser context 4045 * @value: Receive a possible fixed default value for the attribute 4046 * 4047 * Parse an attribute default declaration 4048 * 4049 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4050 * 4051 * [ VC: Required Attribute ] 4052 * if the default declaration is the keyword #REQUIRED, then the 4053 * attribute must be specified for all elements of the type in the 4054 * attribute-list declaration. 4055 * 4056 * [ VC: Attribute Default Legal ] 4057 * The declared default value must meet the lexical constraints of 4058 * the declared attribute type c.f. xmlValidateAttributeDecl() 4059 * 4060 * [ VC: Fixed Attribute Default ] 4061 * if an attribute has a default value declared with the #FIXED 4062 * keyword, instances of that attribute must match the default value. 4063 * 4064 * [ WFC: No < in Attribute Values ] 4065 * handled in xmlParseAttValue() 4066 * 4067 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4068 * or XML_ATTRIBUTE_FIXED. 4069 */ 4070 4071int 4072xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4073 int val; 4074 xmlChar *ret; 4075 4076 *value = NULL; 4077 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4078 SKIP(9); 4079 return(XML_ATTRIBUTE_REQUIRED); 4080 } 4081 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4082 SKIP(8); 4083 return(XML_ATTRIBUTE_IMPLIED); 4084 } 4085 val = XML_ATTRIBUTE_NONE; 4086 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4087 SKIP(6); 4088 val = XML_ATTRIBUTE_FIXED; 4089 if (!IS_BLANK_CH(CUR)) { 4090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4091 "Space required after '#FIXED'\n"); 4092 } 4093 SKIP_BLANKS; 4094 } 4095 ret = xmlParseAttValue(ctxt); 4096 ctxt->instate = XML_PARSER_DTD; 4097 if (ret == NULL) { 4098 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4099 "Attribute default value declaration error\n"); 4100 } else 4101 *value = ret; 4102 return(val); 4103} 4104 4105/** 4106 * xmlParseNotationType: 4107 * @ctxt: an XML parser context 4108 * 4109 * parse an Notation attribute type. 4110 * 4111 * Note: the leading 'NOTATION' S part has already being parsed... 4112 * 4113 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4114 * 4115 * [ VC: Notation Attributes ] 4116 * Values of this type must match one of the notation names included 4117 * in the declaration; all notation names in the declaration must be declared. 4118 * 4119 * Returns: the notation attribute tree built while parsing 4120 */ 4121 4122xmlEnumerationPtr 4123xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4124 const xmlChar *name; 4125 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4126 4127 if (RAW != '(') { 4128 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4129 return(NULL); 4130 } 4131 SHRINK; 4132 do { 4133 NEXT; 4134 SKIP_BLANKS; 4135 name = xmlParseName(ctxt); 4136 if (name == NULL) { 4137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4138 "Name expected in NOTATION declaration\n"); 4139 return(ret); 4140 } 4141 cur = xmlCreateEnumeration(name); 4142 if (cur == NULL) return(ret); 4143 if (last == NULL) ret = last = cur; 4144 else { 4145 last->next = cur; 4146 last = cur; 4147 } 4148 SKIP_BLANKS; 4149 } while (RAW == '|'); 4150 if (RAW != ')') { 4151 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4152 if ((last != NULL) && (last != ret)) 4153 xmlFreeEnumeration(last); 4154 return(ret); 4155 } 4156 NEXT; 4157 return(ret); 4158} 4159 4160/** 4161 * xmlParseEnumerationType: 4162 * @ctxt: an XML parser context 4163 * 4164 * parse an Enumeration attribute type. 4165 * 4166 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4167 * 4168 * [ VC: Enumeration ] 4169 * Values of this type must match one of the Nmtoken tokens in 4170 * the declaration 4171 * 4172 * Returns: the enumeration attribute tree built while parsing 4173 */ 4174 4175xmlEnumerationPtr 4176xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4177 xmlChar *name; 4178 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4179 4180 if (RAW != '(') { 4181 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4182 return(NULL); 4183 } 4184 SHRINK; 4185 do { 4186 NEXT; 4187 SKIP_BLANKS; 4188 name = xmlParseNmtoken(ctxt); 4189 if (name == NULL) { 4190 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4191 return(ret); 4192 } 4193 cur = xmlCreateEnumeration(name); 4194 xmlFree(name); 4195 if (cur == NULL) return(ret); 4196 if (last == NULL) ret = last = cur; 4197 else { 4198 last->next = cur; 4199 last = cur; 4200 } 4201 SKIP_BLANKS; 4202 } while (RAW == '|'); 4203 if (RAW != ')') { 4204 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4205 return(ret); 4206 } 4207 NEXT; 4208 return(ret); 4209} 4210 4211/** 4212 * xmlParseEnumeratedType: 4213 * @ctxt: an XML parser context 4214 * @tree: the enumeration tree built while parsing 4215 * 4216 * parse an Enumerated attribute type. 4217 * 4218 * [57] EnumeratedType ::= NotationType | Enumeration 4219 * 4220 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4221 * 4222 * 4223 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4224 */ 4225 4226int 4227xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4228 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4229 SKIP(8); 4230 if (!IS_BLANK_CH(CUR)) { 4231 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4232 "Space required after 'NOTATION'\n"); 4233 return(0); 4234 } 4235 SKIP_BLANKS; 4236 *tree = xmlParseNotationType(ctxt); 4237 if (*tree == NULL) return(0); 4238 return(XML_ATTRIBUTE_NOTATION); 4239 } 4240 *tree = xmlParseEnumerationType(ctxt); 4241 if (*tree == NULL) return(0); 4242 return(XML_ATTRIBUTE_ENUMERATION); 4243} 4244 4245/** 4246 * xmlParseAttributeType: 4247 * @ctxt: an XML parser context 4248 * @tree: the enumeration tree built while parsing 4249 * 4250 * parse the Attribute list def for an element 4251 * 4252 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4253 * 4254 * [55] StringType ::= 'CDATA' 4255 * 4256 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4257 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4258 * 4259 * Validity constraints for attribute values syntax are checked in 4260 * xmlValidateAttributeValue() 4261 * 4262 * [ VC: ID ] 4263 * Values of type ID must match the Name production. A name must not 4264 * appear more than once in an XML document as a value of this type; 4265 * i.e., ID values must uniquely identify the elements which bear them. 4266 * 4267 * [ VC: One ID per Element Type ] 4268 * No element type may have more than one ID attribute specified. 4269 * 4270 * [ VC: ID Attribute Default ] 4271 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4272 * 4273 * [ VC: IDREF ] 4274 * Values of type IDREF must match the Name production, and values 4275 * of type IDREFS must match Names; each IDREF Name must match the value 4276 * of an ID attribute on some element in the XML document; i.e. IDREF 4277 * values must match the value of some ID attribute. 4278 * 4279 * [ VC: Entity Name ] 4280 * Values of type ENTITY must match the Name production, values 4281 * of type ENTITIES must match Names; each Entity Name must match the 4282 * name of an unparsed entity declared in the DTD. 4283 * 4284 * [ VC: Name Token ] 4285 * Values of type NMTOKEN must match the Nmtoken production; values 4286 * of type NMTOKENS must match Nmtokens. 4287 * 4288 * Returns the attribute type 4289 */ 4290int 4291xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4292 SHRINK; 4293 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 4294 SKIP(5); 4295 return(XML_ATTRIBUTE_CDATA); 4296 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 4297 SKIP(6); 4298 return(XML_ATTRIBUTE_IDREFS); 4299 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 4300 SKIP(5); 4301 return(XML_ATTRIBUTE_IDREF); 4302 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4303 SKIP(2); 4304 return(XML_ATTRIBUTE_ID); 4305 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 4306 SKIP(6); 4307 return(XML_ATTRIBUTE_ENTITY); 4308 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 4309 SKIP(8); 4310 return(XML_ATTRIBUTE_ENTITIES); 4311 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 4312 SKIP(8); 4313 return(XML_ATTRIBUTE_NMTOKENS); 4314 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 4315 SKIP(7); 4316 return(XML_ATTRIBUTE_NMTOKEN); 4317 } 4318 return(xmlParseEnumeratedType(ctxt, tree)); 4319} 4320 4321/** 4322 * xmlParseAttributeListDecl: 4323 * @ctxt: an XML parser context 4324 * 4325 * : parse the Attribute list def for an element 4326 * 4327 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4328 * 4329 * [53] AttDef ::= S Name S AttType S DefaultDecl 4330 * 4331 */ 4332void 4333xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4334 const xmlChar *elemName; 4335 const xmlChar *attrName; 4336 xmlEnumerationPtr tree; 4337 4338 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 4339 xmlParserInputPtr input = ctxt->input; 4340 4341 SKIP(9); 4342 if (!IS_BLANK_CH(CUR)) { 4343 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4344 "Space required after '<!ATTLIST'\n"); 4345 } 4346 SKIP_BLANKS; 4347 elemName = xmlParseName(ctxt); 4348 if (elemName == NULL) { 4349 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4350 "ATTLIST: no name for Element\n"); 4351 return; 4352 } 4353 SKIP_BLANKS; 4354 GROW; 4355 while (RAW != '>') { 4356 const xmlChar *check = CUR_PTR; 4357 int type; 4358 int def; 4359 xmlChar *defaultValue = NULL; 4360 4361 GROW; 4362 tree = NULL; 4363 attrName = xmlParseName(ctxt); 4364 if (attrName == NULL) { 4365 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4366 "ATTLIST: no name for Attribute\n"); 4367 break; 4368 } 4369 GROW; 4370 if (!IS_BLANK_CH(CUR)) { 4371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4372 "Space required after the attribute name\n"); 4373 if (defaultValue != NULL) 4374 xmlFree(defaultValue); 4375 break; 4376 } 4377 SKIP_BLANKS; 4378 4379 type = xmlParseAttributeType(ctxt, &tree); 4380 if (type <= 0) { 4381 if (defaultValue != NULL) 4382 xmlFree(defaultValue); 4383 break; 4384 } 4385 4386 GROW; 4387 if (!IS_BLANK_CH(CUR)) { 4388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4389 "Space required after the attribute type\n"); 4390 if (defaultValue != NULL) 4391 xmlFree(defaultValue); 4392 if (tree != NULL) 4393 xmlFreeEnumeration(tree); 4394 break; 4395 } 4396 SKIP_BLANKS; 4397 4398 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4399 if (def <= 0) { 4400 if (defaultValue != NULL) 4401 xmlFree(defaultValue); 4402 if (tree != NULL) 4403 xmlFreeEnumeration(tree); 4404 break; 4405 } 4406 4407 GROW; 4408 if (RAW != '>') { 4409 if (!IS_BLANK_CH(CUR)) { 4410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4411 "Space required after the attribute default value\n"); 4412 if (defaultValue != NULL) 4413 xmlFree(defaultValue); 4414 if (tree != NULL) 4415 xmlFreeEnumeration(tree); 4416 break; 4417 } 4418 SKIP_BLANKS; 4419 } 4420 if (check == CUR_PTR) { 4421 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 4422 "in xmlParseAttributeListDecl\n"); 4423 if (defaultValue != NULL) 4424 xmlFree(defaultValue); 4425 if (tree != NULL) 4426 xmlFreeEnumeration(tree); 4427 break; 4428 } 4429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4430 (ctxt->sax->attributeDecl != NULL)) 4431 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4432 type, def, defaultValue, tree); 4433 else if (tree != NULL) 4434 xmlFreeEnumeration(tree); 4435 4436 if ((ctxt->sax2) && (defaultValue != NULL) && 4437 (def != XML_ATTRIBUTE_IMPLIED) && 4438 (def != XML_ATTRIBUTE_REQUIRED)) { 4439 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 4440 } 4441 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { 4442 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 4443 } 4444 if (defaultValue != NULL) 4445 xmlFree(defaultValue); 4446 GROW; 4447 } 4448 if (RAW == '>') { 4449 if (input != ctxt->input) { 4450 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4451 "Attribute list declaration doesn't start and stop in the same entity\n"); 4452 } 4453 NEXT; 4454 } 4455 } 4456} 4457 4458/** 4459 * xmlParseElementMixedContentDecl: 4460 * @ctxt: an XML parser context 4461 * @inputchk: the input used for the current entity, needed for boundary checks 4462 * 4463 * parse the declaration for a Mixed Element content 4464 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4465 * 4466 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4467 * '(' S? '#PCDATA' S? ')' 4468 * 4469 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4470 * 4471 * [ VC: No Duplicate Types ] 4472 * The same name must not appear more than once in a single 4473 * mixed-content declaration. 4474 * 4475 * returns: the list of the xmlElementContentPtr describing the element choices 4476 */ 4477xmlElementContentPtr 4478xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 4479 xmlElementContentPtr ret = NULL, cur = NULL, n; 4480 const xmlChar *elem = NULL; 4481 4482 GROW; 4483 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 4484 SKIP(7); 4485 SKIP_BLANKS; 4486 SHRINK; 4487 if (RAW == ')') { 4488 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4489 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4490"Element content declaration doesn't start and stop in the same entity\n", 4491 NULL); 4492 } 4493 NEXT; 4494 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4495 if (RAW == '*') { 4496 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4497 NEXT; 4498 } 4499 return(ret); 4500 } 4501 if ((RAW == '(') || (RAW == '|')) { 4502 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4503 if (ret == NULL) return(NULL); 4504 } 4505 while (RAW == '|') { 4506 NEXT; 4507 if (elem == NULL) { 4508 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4509 if (ret == NULL) return(NULL); 4510 ret->c1 = cur; 4511 if (cur != NULL) 4512 cur->parent = ret; 4513 cur = ret; 4514 } else { 4515 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4516 if (n == NULL) return(NULL); 4517 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4518 if (n->c1 != NULL) 4519 n->c1->parent = n; 4520 cur->c2 = n; 4521 if (n != NULL) 4522 n->parent = cur; 4523 cur = n; 4524 } 4525 SKIP_BLANKS; 4526 elem = xmlParseName(ctxt); 4527 if (elem == NULL) { 4528 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4529 "xmlParseElementMixedContentDecl : Name expected\n"); 4530 xmlFreeElementContent(cur); 4531 return(NULL); 4532 } 4533 SKIP_BLANKS; 4534 GROW; 4535 } 4536 if ((RAW == ')') && (NXT(1) == '*')) { 4537 if (elem != NULL) { 4538 cur->c2 = xmlNewElementContent(elem, 4539 XML_ELEMENT_CONTENT_ELEMENT); 4540 if (cur->c2 != NULL) 4541 cur->c2->parent = cur; 4542 } 4543 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4544 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4545 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4546"Element content declaration doesn't start and stop in the same entity\n", 4547 NULL); 4548 } 4549 SKIP(2); 4550 } else { 4551 xmlFreeElementContent(ret); 4552 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 4553 return(NULL); 4554 } 4555 4556 } else { 4557 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 4558 } 4559 return(ret); 4560} 4561 4562/** 4563 * xmlParseElementChildrenContentDecl: 4564 * @ctxt: an XML parser context 4565 * @inputchk: the input used for the current entity, needed for boundary checks 4566 * 4567 * parse the declaration for a Mixed Element content 4568 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4569 * 4570 * 4571 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4572 * 4573 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4574 * 4575 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4576 * 4577 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4578 * 4579 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4580 * TODO Parameter-entity replacement text must be properly nested 4581 * with parenthesized groups. That is to say, if either of the 4582 * opening or closing parentheses in a choice, seq, or Mixed 4583 * construct is contained in the replacement text for a parameter 4584 * entity, both must be contained in the same replacement text. For 4585 * interoperability, if a parameter-entity reference appears in a 4586 * choice, seq, or Mixed construct, its replacement text should not 4587 * be empty, and neither the first nor last non-blank character of 4588 * the replacement text should be a connector (| or ,). 4589 * 4590 * Returns the tree of xmlElementContentPtr describing the element 4591 * hierarchy. 4592 */ 4593xmlElementContentPtr 4594xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 4595 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4596 const xmlChar *elem; 4597 xmlChar type = 0; 4598 4599 SKIP_BLANKS; 4600 GROW; 4601 if (RAW == '(') { 4602 int inputid = ctxt->input->id; 4603 4604 /* Recurse on first child */ 4605 NEXT; 4606 SKIP_BLANKS; 4607 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 4608 SKIP_BLANKS; 4609 GROW; 4610 } else { 4611 elem = xmlParseName(ctxt); 4612 if (elem == NULL) { 4613 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 4614 return(NULL); 4615 } 4616 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4617 if (cur == NULL) { 4618 xmlErrMemory(ctxt, NULL); 4619 return(NULL); 4620 } 4621 GROW; 4622 if (RAW == '?') { 4623 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4624 NEXT; 4625 } else if (RAW == '*') { 4626 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4627 NEXT; 4628 } else if (RAW == '+') { 4629 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4630 NEXT; 4631 } else { 4632 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4633 } 4634 GROW; 4635 } 4636 SKIP_BLANKS; 4637 SHRINK; 4638 while (RAW != ')') { 4639 /* 4640 * Each loop we parse one separator and one element. 4641 */ 4642 if (RAW == ',') { 4643 if (type == 0) type = CUR; 4644 4645 /* 4646 * Detect "Name | Name , Name" error 4647 */ 4648 else if (type != CUR) { 4649 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4650 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4651 type); 4652 if ((last != NULL) && (last != ret)) 4653 xmlFreeElementContent(last); 4654 if (ret != NULL) 4655 xmlFreeElementContent(ret); 4656 return(NULL); 4657 } 4658 NEXT; 4659 4660 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4661 if (op == NULL) { 4662 if ((last != NULL) && (last != ret)) 4663 xmlFreeElementContent(last); 4664 xmlFreeElementContent(ret); 4665 return(NULL); 4666 } 4667 if (last == NULL) { 4668 op->c1 = ret; 4669 if (ret != NULL) 4670 ret->parent = op; 4671 ret = cur = op; 4672 } else { 4673 cur->c2 = op; 4674 if (op != NULL) 4675 op->parent = cur; 4676 op->c1 = last; 4677 if (last != NULL) 4678 last->parent = op; 4679 cur =op; 4680 last = NULL; 4681 } 4682 } else if (RAW == '|') { 4683 if (type == 0) type = CUR; 4684 4685 /* 4686 * Detect "Name , Name | Name" error 4687 */ 4688 else if (type != CUR) { 4689 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4690 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4691 type); 4692 if ((last != NULL) && (last != ret)) 4693 xmlFreeElementContent(last); 4694 if (ret != NULL) 4695 xmlFreeElementContent(ret); 4696 return(NULL); 4697 } 4698 NEXT; 4699 4700 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4701 if (op == NULL) { 4702 if ((last != NULL) && (last != ret)) 4703 xmlFreeElementContent(last); 4704 if (ret != NULL) 4705 xmlFreeElementContent(ret); 4706 return(NULL); 4707 } 4708 if (last == NULL) { 4709 op->c1 = ret; 4710 if (ret != NULL) 4711 ret->parent = op; 4712 ret = cur = op; 4713 } else { 4714 cur->c2 = op; 4715 if (op != NULL) 4716 op->parent = cur; 4717 op->c1 = last; 4718 if (last != NULL) 4719 last->parent = op; 4720 cur =op; 4721 last = NULL; 4722 } 4723 } else { 4724 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 4725 if (ret != NULL) 4726 xmlFreeElementContent(ret); 4727 return(NULL); 4728 } 4729 GROW; 4730 SKIP_BLANKS; 4731 GROW; 4732 if (RAW == '(') { 4733 int inputid = ctxt->input->id; 4734 /* Recurse on second child */ 4735 NEXT; 4736 SKIP_BLANKS; 4737 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 4738 SKIP_BLANKS; 4739 } else { 4740 elem = xmlParseName(ctxt); 4741 if (elem == NULL) { 4742 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 4743 if (ret != NULL) 4744 xmlFreeElementContent(ret); 4745 return(NULL); 4746 } 4747 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4748 if (RAW == '?') { 4749 last->ocur = XML_ELEMENT_CONTENT_OPT; 4750 NEXT; 4751 } else if (RAW == '*') { 4752 last->ocur = XML_ELEMENT_CONTENT_MULT; 4753 NEXT; 4754 } else if (RAW == '+') { 4755 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4756 NEXT; 4757 } else { 4758 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4759 } 4760 } 4761 SKIP_BLANKS; 4762 GROW; 4763 } 4764 if ((cur != NULL) && (last != NULL)) { 4765 cur->c2 = last; 4766 if (last != NULL) 4767 last->parent = cur; 4768 } 4769 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4770 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4771"Element content declaration doesn't start and stop in the same entity\n", 4772 NULL); 4773 } 4774 NEXT; 4775 if (RAW == '?') { 4776 if (ret != NULL) 4777 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4778 NEXT; 4779 } else if (RAW == '*') { 4780 if (ret != NULL) { 4781 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4782 cur = ret; 4783 /* 4784 * Some normalization: 4785 * (a | b* | c?)* == (a | b | c)* 4786 */ 4787 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4788 if ((cur->c1 != NULL) && 4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4792 if ((cur->c2 != NULL) && 4793 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4794 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 4795 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4796 cur = cur->c2; 4797 } 4798 } 4799 NEXT; 4800 } else if (RAW == '+') { 4801 if (ret != NULL) { 4802 int found = 0; 4803 4804 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4805 /* 4806 * Some normalization: 4807 * (a | b*)+ == (a | b)* 4808 * (a | b?)+ == (a | b)* 4809 */ 4810 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4811 if ((cur->c1 != NULL) && 4812 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4813 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 4814 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4815 found = 1; 4816 } 4817 if ((cur->c2 != NULL) && 4818 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4819 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 4820 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4821 found = 1; 4822 } 4823 cur = cur->c2; 4824 } 4825 if (found) 4826 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4827 } 4828 NEXT; 4829 } 4830 return(ret); 4831} 4832 4833/** 4834 * xmlParseElementContentDecl: 4835 * @ctxt: an XML parser context 4836 * @name: the name of the element being defined. 4837 * @result: the Element Content pointer will be stored here if any 4838 * 4839 * parse the declaration for an Element content either Mixed or Children, 4840 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4841 * 4842 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4843 * 4844 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4845 */ 4846 4847int 4848xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 4849 xmlElementContentPtr *result) { 4850 4851 xmlElementContentPtr tree = NULL; 4852 int inputid = ctxt->input->id; 4853 int res; 4854 4855 *result = NULL; 4856 4857 if (RAW != '(') { 4858 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 4859 "xmlParseElementContentDecl : %s '(' expected\n", name); 4860 return(-1); 4861 } 4862 NEXT; 4863 GROW; 4864 SKIP_BLANKS; 4865 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 4866 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 4867 res = XML_ELEMENT_TYPE_MIXED; 4868 } else { 4869 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 4870 res = XML_ELEMENT_TYPE_ELEMENT; 4871 } 4872 SKIP_BLANKS; 4873 *result = tree; 4874 return(res); 4875} 4876 4877/** 4878 * xmlParseElementDecl: 4879 * @ctxt: an XML parser context 4880 * 4881 * parse an Element declaration. 4882 * 4883 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4884 * 4885 * [ VC: Unique Element Type Declaration ] 4886 * No element type may be declared more than once 4887 * 4888 * Returns the type of the element, or -1 in case of error 4889 */ 4890int 4891xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4892 const xmlChar *name; 4893 int ret = -1; 4894 xmlElementContentPtr content = NULL; 4895 4896 GROW; 4897 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 4898 xmlParserInputPtr input = ctxt->input; 4899 4900 SKIP(9); 4901 if (!IS_BLANK_CH(CUR)) { 4902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4903 "Space required after 'ELEMENT'\n"); 4904 } 4905 SKIP_BLANKS; 4906 name = xmlParseName(ctxt); 4907 if (name == NULL) { 4908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4909 "xmlParseElementDecl: no name for Element\n"); 4910 return(-1); 4911 } 4912 while ((RAW == 0) && (ctxt->inputNr > 1)) 4913 xmlPopInput(ctxt); 4914 if (!IS_BLANK_CH(CUR)) { 4915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4916 "Space required after the element name\n"); 4917 } 4918 SKIP_BLANKS; 4919 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 4920 SKIP(5); 4921 /* 4922 * Element must always be empty. 4923 */ 4924 ret = XML_ELEMENT_TYPE_EMPTY; 4925 } else if ((RAW == 'A') && (NXT(1) == 'N') && 4926 (NXT(2) == 'Y')) { 4927 SKIP(3); 4928 /* 4929 * Element is a generic container. 4930 */ 4931 ret = XML_ELEMENT_TYPE_ANY; 4932 } else if (RAW == '(') { 4933 ret = xmlParseElementContentDecl(ctxt, name, &content); 4934 } else { 4935 /* 4936 * [ WFC: PEs in Internal Subset ] error handling. 4937 */ 4938 if ((RAW == '%') && (ctxt->external == 0) && 4939 (ctxt->inputNr == 1)) { 4940 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 4941 "PEReference: forbidden within markup decl in internal subset\n"); 4942 } else { 4943 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 4944 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4945 } 4946 return(-1); 4947 } 4948 4949 SKIP_BLANKS; 4950 /* 4951 * Pop-up of finished entities. 4952 */ 4953 while ((RAW == 0) && (ctxt->inputNr > 1)) 4954 xmlPopInput(ctxt); 4955 SKIP_BLANKS; 4956 4957 if (RAW != '>') { 4958 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 4959 } else { 4960 if (input != ctxt->input) { 4961 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4962 "Element declaration doesn't start and stop in the same entity\n"); 4963 } 4964 4965 NEXT; 4966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4967 (ctxt->sax->elementDecl != NULL)) 4968 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4969 content); 4970 } 4971 if (content != NULL) { 4972 xmlFreeElementContent(content); 4973 } 4974 } 4975 return(ret); 4976} 4977 4978/** 4979 * xmlParseConditionalSections 4980 * @ctxt: an XML parser context 4981 * 4982 * [61] conditionalSect ::= includeSect | ignoreSect 4983 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4984 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4985 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4986 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4987 */ 4988 4989static void 4990xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4991 SKIP(3); 4992 SKIP_BLANKS; 4993 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 4994 SKIP(7); 4995 SKIP_BLANKS; 4996 if (RAW != '[') { 4997 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 4998 } else { 4999 NEXT; 5000 } 5001 if (xmlParserDebugEntities) { 5002 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5003 xmlGenericError(xmlGenericErrorContext, 5004 "%s(%d): ", ctxt->input->filename, 5005 ctxt->input->line); 5006 xmlGenericError(xmlGenericErrorContext, 5007 "Entering INCLUDE Conditional Section\n"); 5008 } 5009 5010 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5011 (NXT(2) != '>'))) { 5012 const xmlChar *check = CUR_PTR; 5013 unsigned int cons = ctxt->input->consumed; 5014 5015 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5016 xmlParseConditionalSections(ctxt); 5017 } else if (IS_BLANK_CH(CUR)) { 5018 NEXT; 5019 } else if (RAW == '%') { 5020 xmlParsePEReference(ctxt); 5021 } else 5022 xmlParseMarkupDecl(ctxt); 5023 5024 /* 5025 * Pop-up of finished entities. 5026 */ 5027 while ((RAW == 0) && (ctxt->inputNr > 1)) 5028 xmlPopInput(ctxt); 5029 5030 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5031 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5032 break; 5033 } 5034 } 5035 if (xmlParserDebugEntities) { 5036 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5037 xmlGenericError(xmlGenericErrorContext, 5038 "%s(%d): ", ctxt->input->filename, 5039 ctxt->input->line); 5040 xmlGenericError(xmlGenericErrorContext, 5041 "Leaving INCLUDE Conditional Section\n"); 5042 } 5043 5044 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5045 int state; 5046 xmlParserInputState instate; 5047 int depth = 0; 5048 5049 SKIP(6); 5050 SKIP_BLANKS; 5051 if (RAW != '[') { 5052 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5053 } else { 5054 NEXT; 5055 } 5056 if (xmlParserDebugEntities) { 5057 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5058 xmlGenericError(xmlGenericErrorContext, 5059 "%s(%d): ", ctxt->input->filename, 5060 ctxt->input->line); 5061 xmlGenericError(xmlGenericErrorContext, 5062 "Entering IGNORE Conditional Section\n"); 5063 } 5064 5065 /* 5066 * Parse up to the end of the conditional section 5067 * But disable SAX event generating DTD building in the meantime 5068 */ 5069 state = ctxt->disableSAX; 5070 instate = ctxt->instate; 5071 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5072 ctxt->instate = XML_PARSER_IGNORE; 5073 5074 while ((depth >= 0) && (RAW != 0)) { 5075 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5076 depth++; 5077 SKIP(3); 5078 continue; 5079 } 5080 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5081 if (--depth >= 0) SKIP(3); 5082 continue; 5083 } 5084 NEXT; 5085 continue; 5086 } 5087 5088 ctxt->disableSAX = state; 5089 ctxt->instate = instate; 5090 5091 if (xmlParserDebugEntities) { 5092 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5093 xmlGenericError(xmlGenericErrorContext, 5094 "%s(%d): ", ctxt->input->filename, 5095 ctxt->input->line); 5096 xmlGenericError(xmlGenericErrorContext, 5097 "Leaving IGNORE Conditional Section\n"); 5098 } 5099 5100 } else { 5101 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5102 } 5103 5104 if (RAW == 0) 5105 SHRINK; 5106 5107 if (RAW == 0) { 5108 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5109 } else { 5110 SKIP(3); 5111 } 5112} 5113 5114/** 5115 * xmlParseMarkupDecl: 5116 * @ctxt: an XML parser context 5117 * 5118 * parse Markup declarations 5119 * 5120 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5121 * NotationDecl | PI | Comment 5122 * 5123 * [ VC: Proper Declaration/PE Nesting ] 5124 * Parameter-entity replacement text must be properly nested with 5125 * markup declarations. That is to say, if either the first character 5126 * or the last character of a markup declaration (markupdecl above) is 5127 * contained in the replacement text for a parameter-entity reference, 5128 * both must be contained in the same replacement text. 5129 * 5130 * [ WFC: PEs in Internal Subset ] 5131 * In the internal DTD subset, parameter-entity references can occur 5132 * only where markup declarations can occur, not within markup declarations. 5133 * (This does not apply to references that occur in external parameter 5134 * entities or to the external subset.) 5135 */ 5136void 5137xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5138 GROW; 5139 xmlParseElementDecl(ctxt); 5140 xmlParseAttributeListDecl(ctxt); 5141 xmlParseEntityDecl(ctxt); 5142 xmlParseNotationDecl(ctxt); 5143 xmlParsePI(ctxt); 5144 xmlParseComment(ctxt); 5145 /* 5146 * This is only for internal subset. On external entities, 5147 * the replacement is done before parsing stage 5148 */ 5149 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5150 xmlParsePEReference(ctxt); 5151 5152 /* 5153 * Conditional sections are allowed from entities included 5154 * by PE References in the internal subset. 5155 */ 5156 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5157 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5158 xmlParseConditionalSections(ctxt); 5159 } 5160 } 5161 5162 ctxt->instate = XML_PARSER_DTD; 5163} 5164 5165/** 5166 * xmlParseTextDecl: 5167 * @ctxt: an XML parser context 5168 * 5169 * parse an XML declaration header for external entities 5170 * 5171 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5172 * 5173 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5174 */ 5175 5176void 5177xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5178 xmlChar *version; 5179 const xmlChar *encoding; 5180 5181 /* 5182 * We know that '<?xml' is here. 5183 */ 5184 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5185 SKIP(5); 5186 } else { 5187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5188 return; 5189 } 5190 5191 if (!IS_BLANK_CH(CUR)) { 5192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5193 "Space needed after '<?xml'\n"); 5194 } 5195 SKIP_BLANKS; 5196 5197 /* 5198 * We may have the VersionInfo here. 5199 */ 5200 version = xmlParseVersionInfo(ctxt); 5201 if (version == NULL) 5202 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5203 else { 5204 if (!IS_BLANK_CH(CUR)) { 5205 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5206 "Space needed here\n"); 5207 } 5208 } 5209 ctxt->input->version = version; 5210 5211 /* 5212 * We must have the encoding declaration 5213 */ 5214 encoding = xmlParseEncodingDecl(ctxt); 5215 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5216 /* 5217 * The XML REC instructs us to stop parsing right here 5218 */ 5219 return; 5220 } 5221 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 5222 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 5223 "Missing encoding in text declaration\n"); 5224 } 5225 5226 SKIP_BLANKS; 5227 if ((RAW == '?') && (NXT(1) == '>')) { 5228 SKIP(2); 5229 } else if (RAW == '>') { 5230 /* Deprecated old WD ... */ 5231 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5232 NEXT; 5233 } else { 5234 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5235 MOVETO_ENDTAG(CUR_PTR); 5236 NEXT; 5237 } 5238} 5239 5240/** 5241 * xmlParseExternalSubset: 5242 * @ctxt: an XML parser context 5243 * @ExternalID: the external identifier 5244 * @SystemID: the system identifier (or URL) 5245 * 5246 * parse Markup declarations from an external subset 5247 * 5248 * [30] extSubset ::= textDecl? extSubsetDecl 5249 * 5250 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5251 */ 5252void 5253xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5254 const xmlChar *SystemID) { 5255 xmlDetectSAX2(ctxt); 5256 GROW; 5257 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 5258 xmlParseTextDecl(ctxt); 5259 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5260 /* 5261 * The XML REC instructs us to stop parsing right here 5262 */ 5263 ctxt->instate = XML_PARSER_EOF; 5264 return; 5265 } 5266 } 5267 if (ctxt->myDoc == NULL) { 5268 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5269 } 5270 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5271 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5272 5273 ctxt->instate = XML_PARSER_DTD; 5274 ctxt->external = 1; 5275 while (((RAW == '<') && (NXT(1) == '?')) || 5276 ((RAW == '<') && (NXT(1) == '!')) || 5277 (RAW == '%') || IS_BLANK_CH(CUR)) { 5278 const xmlChar *check = CUR_PTR; 5279 unsigned int cons = ctxt->input->consumed; 5280 5281 GROW; 5282 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5283 xmlParseConditionalSections(ctxt); 5284 } else if (IS_BLANK_CH(CUR)) { 5285 NEXT; 5286 } else if (RAW == '%') { 5287 xmlParsePEReference(ctxt); 5288 } else 5289 xmlParseMarkupDecl(ctxt); 5290 5291 /* 5292 * Pop-up of finished entities. 5293 */ 5294 while ((RAW == 0) && (ctxt->inputNr > 1)) 5295 xmlPopInput(ctxt); 5296 5297 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5298 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5299 break; 5300 } 5301 } 5302 5303 if (RAW != 0) { 5304 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5305 } 5306 5307} 5308 5309/** 5310 * xmlParseReference: 5311 * @ctxt: an XML parser context 5312 * 5313 * parse and handle entity references in content, depending on the SAX 5314 * interface, this may end-up in a call to character() if this is a 5315 * CharRef, a predefined entity, if there is no reference() callback. 5316 * or if the parser was asked to switch to that mode. 5317 * 5318 * [67] Reference ::= EntityRef | CharRef 5319 */ 5320void 5321xmlParseReference(xmlParserCtxtPtr ctxt) { 5322 xmlEntityPtr ent; 5323 xmlChar *val; 5324 if (RAW != '&') return; 5325 5326 if (NXT(1) == '#') { 5327 int i = 0; 5328 xmlChar out[10]; 5329 int hex = NXT(2); 5330 int value = xmlParseCharRef(ctxt); 5331 5332 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5333 /* 5334 * So we are using non-UTF-8 buffers 5335 * Check that the char fit on 8bits, if not 5336 * generate a CharRef. 5337 */ 5338 if (value <= 0xFF) { 5339 out[0] = value; 5340 out[1] = 0; 5341 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5342 (!ctxt->disableSAX)) 5343 ctxt->sax->characters(ctxt->userData, out, 1); 5344 } else { 5345 if ((hex == 'x') || (hex == 'X')) 5346 snprintf((char *)out, sizeof(out), "#x%X", value); 5347 else 5348 snprintf((char *)out, sizeof(out), "#%d", value); 5349 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5350 (!ctxt->disableSAX)) 5351 ctxt->sax->reference(ctxt->userData, out); 5352 } 5353 } else { 5354 /* 5355 * Just encode the value in UTF-8 5356 */ 5357 COPY_BUF(0 ,out, i, value); 5358 out[i] = 0; 5359 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5360 (!ctxt->disableSAX)) 5361 ctxt->sax->characters(ctxt->userData, out, i); 5362 } 5363 } else { 5364 ent = xmlParseEntityRef(ctxt); 5365 if (ent == NULL) return; 5366 if (!ctxt->wellFormed) 5367 return; 5368 if ((ent->name != NULL) && 5369 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5370 xmlNodePtr list = NULL; 5371 xmlParserErrors ret = XML_ERR_OK; 5372 5373 5374 /* 5375 * The first reference to the entity trigger a parsing phase 5376 * where the ent->children is filled with the result from 5377 * the parsing. 5378 */ 5379 if (ent->children == NULL) { 5380 xmlChar *value; 5381 value = ent->content; 5382 5383 /* 5384 * Check that this entity is well formed 5385 */ 5386 if ((value != NULL) && (value[0] != 0) && 5387 (value[1] == 0) && (value[0] == '<') && 5388 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5389 /* 5390 * DONE: get definite answer on this !!! 5391 * Lots of entity decls are used to declare a single 5392 * char 5393 * <!ENTITY lt "<"> 5394 * Which seems to be valid since 5395 * 2.4: The ampersand character (&) and the left angle 5396 * bracket (<) may appear in their literal form only 5397 * when used ... They are also legal within the literal 5398 * entity value of an internal entity declaration;i 5399 * see "4.3.2 Well-Formed Parsed Entities". 5400 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5401 * Looking at the OASIS test suite and James Clark 5402 * tests, this is broken. However the XML REC uses 5403 * it. Is the XML REC not well-formed ???? 5404 * This is a hack to avoid this problem 5405 * 5406 * ANSWER: since lt gt amp .. are already defined, 5407 * this is a redefinition and hence the fact that the 5408 * content is not well balanced is not a Wf error, this 5409 * is lousy but acceptable. 5410 */ 5411 list = xmlNewDocText(ctxt->myDoc, value); 5412 if (list != NULL) { 5413 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5414 (ent->children == NULL)) { 5415 ent->children = list; 5416 ent->last = list; 5417 ent->owner = 1; 5418 list->parent = (xmlNodePtr) ent; 5419 } else { 5420 xmlFreeNodeList(list); 5421 } 5422 } else if (list != NULL) { 5423 xmlFreeNodeList(list); 5424 } 5425 } else { 5426 /* 5427 * 4.3.2: An internal general parsed entity is well-formed 5428 * if its replacement text matches the production labeled 5429 * content. 5430 */ 5431 5432 void *user_data; 5433 /* 5434 * This is a bit hackish but this seems the best 5435 * way to make sure both SAX and DOM entity support 5436 * behaves okay. 5437 */ 5438 if (ctxt->userData == ctxt) 5439 user_data = NULL; 5440 else 5441 user_data = ctxt->userData; 5442 5443 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5444 ctxt->depth++; 5445 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 5446 value, user_data, &list); 5447 ctxt->depth--; 5448 } else if (ent->etype == 5449 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5450 ctxt->depth++; 5451 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5452 ctxt->sax, user_data, ctxt->depth, 5453 ent->URI, ent->ExternalID, &list); 5454 ctxt->depth--; 5455 } else { 5456 ret = XML_ERR_ENTITY_PE_INTERNAL; 5457 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 5458 "invalid entity type found\n", NULL); 5459 } 5460 if (ret == XML_ERR_ENTITY_LOOP) { 5461 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 5462 return; 5463 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 5464 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5465 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5466 (ent->children == NULL)) { 5467 ent->children = list; 5468 if (ctxt->replaceEntities) { 5469 /* 5470 * Prune it directly in the generated document 5471 * except for single text nodes. 5472 */ 5473 if ((list->type == XML_TEXT_NODE) && 5474 (list->next == NULL)) { 5475 list->parent = (xmlNodePtr) ent; 5476 list = NULL; 5477 ent->owner = 1; 5478 } else { 5479 ent->owner = 0; 5480 while (list != NULL) { 5481 list->parent = (xmlNodePtr) ctxt->node; 5482 list->doc = ctxt->myDoc; 5483 if (list->next == NULL) 5484 ent->last = list; 5485 list = list->next; 5486 } 5487 list = ent->children; 5488#ifdef LIBXML_LEGACY_ENABLED 5489 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5490 xmlAddEntityReference(ent, list, NULL); 5491#endif /* LIBXML_LEGACY_ENABLED */ 5492 } 5493 } else { 5494 ent->owner = 1; 5495 while (list != NULL) { 5496 list->parent = (xmlNodePtr) ent; 5497 if (list->next == NULL) 5498 ent->last = list; 5499 list = list->next; 5500 } 5501 } 5502 } else { 5503 xmlFreeNodeList(list); 5504 list = NULL; 5505 } 5506 } else if ((ret != XML_ERR_OK) && 5507 (ret != XML_WAR_UNDECLARED_ENTITY)) { 5508 xmlFatalErr(ctxt, ret, NULL); 5509 } else if (list != NULL) { 5510 xmlFreeNodeList(list); 5511 list = NULL; 5512 } 5513 } 5514 } 5515 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5516 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5517 /* 5518 * Create a node. 5519 */ 5520 ctxt->sax->reference(ctxt->userData, ent->name); 5521 return; 5522 } else if (ctxt->replaceEntities) { 5523 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5524 /* 5525 * Seems we are generating the DOM content, do 5526 * a simple tree copy for all references except the first 5527 * In the first occurrence list contains the replacement 5528 */ 5529 if ((list == NULL) && (ent->owner == 0)) { 5530 xmlNodePtr nw = NULL, cur, firstChild = NULL; 5531 cur = ent->children; 5532 while (cur != NULL) { 5533 nw = xmlCopyNode(cur, 1); 5534 if (nw != NULL) { 5535 nw->_private = cur->_private; 5536 if (firstChild == NULL){ 5537 firstChild = nw; 5538 } 5539 xmlAddChild(ctxt->node, nw); 5540 } 5541 if (cur == ent->last) 5542 break; 5543 cur = cur->next; 5544 } 5545#ifdef LIBXML_LEGACY_ENABLED 5546 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5547 xmlAddEntityReference(ent, firstChild, nw); 5548#endif /* LIBXML_LEGACY_ENABLED */ 5549 } else if (list == NULL) { 5550 xmlNodePtr nw = NULL, cur, next, last, 5551 firstChild = NULL; 5552 /* 5553 * Copy the entity child list and make it the new 5554 * entity child list. The goal is to make sure any 5555 * ID or REF referenced will be the one from the 5556 * document content and not the entity copy. 5557 */ 5558 cur = ent->children; 5559 ent->children = NULL; 5560 last = ent->last; 5561 ent->last = NULL; 5562 while (cur != NULL) { 5563 next = cur->next; 5564 cur->next = NULL; 5565 cur->parent = NULL; 5566 nw = xmlCopyNode(cur, 1); 5567 if (nw != NULL) { 5568 nw->_private = cur->_private; 5569 if (firstChild == NULL){ 5570 firstChild = cur; 5571 } 5572 xmlAddChild((xmlNodePtr) ent, nw); 5573 xmlAddChild(ctxt->node, cur); 5574 } 5575 if (cur == last) 5576 break; 5577 cur = next; 5578 } 5579 ent->owner = 1; 5580#ifdef LIBXML_LEGACY_ENABLED 5581 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5582 xmlAddEntityReference(ent, firstChild, nw); 5583#endif /* LIBXML_LEGACY_ENABLED */ 5584 } else { 5585 /* 5586 * the name change is to avoid coalescing of the 5587 * node with a possible previous text one which 5588 * would make ent->children a dangling pointer 5589 */ 5590 if (ent->children->type == XML_TEXT_NODE) 5591 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 5592 if ((ent->last != ent->children) && 5593 (ent->last->type == XML_TEXT_NODE)) 5594 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 5595 xmlAddChildList(ctxt->node, ent->children); 5596 } 5597 5598 /* 5599 * This is to avoid a nasty side effect, see 5600 * characters() in SAX.c 5601 */ 5602 ctxt->nodemem = 0; 5603 ctxt->nodelen = 0; 5604 return; 5605 } else { 5606 /* 5607 * Probably running in SAX mode 5608 */ 5609 xmlParserInputPtr input; 5610 5611 input = xmlNewEntityInputStream(ctxt, ent); 5612 xmlPushInput(ctxt, input); 5613 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5614 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 5615 (IS_BLANK_CH(NXT(5)))) { 5616 xmlParseTextDecl(ctxt); 5617 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5618 /* 5619 * The XML REC instructs us to stop parsing right here 5620 */ 5621 ctxt->instate = XML_PARSER_EOF; 5622 return; 5623 } 5624 if (input->standalone == 1) { 5625 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE, 5626 NULL); 5627 } 5628 } 5629 return; 5630 } 5631 } 5632 } else { 5633 val = ent->content; 5634 if (val == NULL) return; 5635 /* 5636 * inline the entity. 5637 */ 5638 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5639 (!ctxt->disableSAX)) 5640 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5641 } 5642 } 5643} 5644 5645/** 5646 * xmlParseEntityRef: 5647 * @ctxt: an XML parser context 5648 * 5649 * parse ENTITY references declarations 5650 * 5651 * [68] EntityRef ::= '&' Name ';' 5652 * 5653 * [ WFC: Entity Declared ] 5654 * In a document without any DTD, a document with only an internal DTD 5655 * subset which contains no parameter entity references, or a document 5656 * with "standalone='yes'", the Name given in the entity reference 5657 * must match that in an entity declaration, except that well-formed 5658 * documents need not declare any of the following entities: amp, lt, 5659 * gt, apos, quot. The declaration of a parameter entity must precede 5660 * any reference to it. Similarly, the declaration of a general entity 5661 * must precede any reference to it which appears in a default value in an 5662 * attribute-list declaration. Note that if entities are declared in the 5663 * external subset or in external parameter entities, a non-validating 5664 * processor is not obligated to read and process their declarations; 5665 * for such documents, the rule that an entity must be declared is a 5666 * well-formedness constraint only if standalone='yes'. 5667 * 5668 * [ WFC: Parsed Entity ] 5669 * An entity reference must not contain the name of an unparsed entity 5670 * 5671 * Returns the xmlEntityPtr if found, or NULL otherwise. 5672 */ 5673xmlEntityPtr 5674xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5675 const xmlChar *name; 5676 xmlEntityPtr ent = NULL; 5677 5678 GROW; 5679 5680 if (RAW == '&') { 5681 NEXT; 5682 name = xmlParseName(ctxt); 5683 if (name == NULL) { 5684 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5685 "xmlParseEntityRef: no name\n"); 5686 } else { 5687 if (RAW == ';') { 5688 NEXT; 5689 /* 5690 * Ask first SAX for entity resolution, otherwise try the 5691 * predefined set. 5692 */ 5693 if (ctxt->sax != NULL) { 5694 if (ctxt->sax->getEntity != NULL) 5695 ent = ctxt->sax->getEntity(ctxt->userData, name); 5696 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 5697 ent = xmlGetPredefinedEntity(name); 5698 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 5699 (ctxt->userData==ctxt)) { 5700 ent = xmlSAX2GetEntity(ctxt, name); 5701 } 5702 } 5703 /* 5704 * [ WFC: Entity Declared ] 5705 * In a document without any DTD, a document with only an 5706 * internal DTD subset which contains no parameter entity 5707 * references, or a document with "standalone='yes'", the 5708 * Name given in the entity reference must match that in an 5709 * entity declaration, except that well-formed documents 5710 * need not declare any of the following entities: amp, lt, 5711 * gt, apos, quot. 5712 * The declaration of a parameter entity must precede any 5713 * reference to it. 5714 * Similarly, the declaration of a general entity must 5715 * precede any reference to it which appears in a default 5716 * value in an attribute-list declaration. Note that if 5717 * entities are declared in the external subset or in 5718 * external parameter entities, a non-validating processor 5719 * is not obligated to read and process their declarations; 5720 * for such documents, the rule that an entity must be 5721 * declared is a well-formedness constraint only if 5722 * standalone='yes'. 5723 */ 5724 if (ent == NULL) { 5725 if ((ctxt->standalone == 1) || 5726 ((ctxt->hasExternalSubset == 0) && 5727 (ctxt->hasPErefs == 0))) { 5728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 5729 "Entity '%s' not defined\n", name); 5730 } else { 5731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 5732 "Entity '%s' not defined\n", name); 5733 } 5734 ctxt->valid = 0; 5735 } 5736 5737 /* 5738 * [ WFC: Parsed Entity ] 5739 * An entity reference must not contain the name of an 5740 * unparsed entity 5741 */ 5742 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5743 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 5744 "Entity reference to unparsed entity %s\n", name); 5745 } 5746 5747 /* 5748 * [ WFC: No External Entity References ] 5749 * Attribute values cannot contain direct or indirect 5750 * entity references to external entities. 5751 */ 5752 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5753 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5754 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 5755 "Attribute references external entity '%s'\n", name); 5756 } 5757 /* 5758 * [ WFC: No < in Attribute Values ] 5759 * The replacement text of any entity referred to directly or 5760 * indirectly in an attribute value (other than "<") must 5761 * not contain a <. 5762 */ 5763 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5764 (ent != NULL) && 5765 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5766 (ent->content != NULL) && 5767 (xmlStrchr(ent->content, '<'))) { 5768 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 5769 "'<' in entity '%s' is not allowed in attributes values\n", name); 5770 } 5771 5772 /* 5773 * Internal check, no parameter entities here ... 5774 */ 5775 else { 5776 switch (ent->etype) { 5777 case XML_INTERNAL_PARAMETER_ENTITY: 5778 case XML_EXTERNAL_PARAMETER_ENTITY: 5779 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 5780 "Attempt to reference the parameter entity '%s'\n", 5781 name); 5782 break; 5783 default: 5784 break; 5785 } 5786 } 5787 5788 /* 5789 * [ WFC: No Recursion ] 5790 * A parsed entity must not contain a recursive reference 5791 * to itself, either directly or indirectly. 5792 * Done somewhere else 5793 */ 5794 5795 } else { 5796 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 5797 } 5798 } 5799 } 5800 return(ent); 5801} 5802 5803/** 5804 * xmlParseStringEntityRef: 5805 * @ctxt: an XML parser context 5806 * @str: a pointer to an index in the string 5807 * 5808 * parse ENTITY references declarations, but this version parses it from 5809 * a string value. 5810 * 5811 * [68] EntityRef ::= '&' Name ';' 5812 * 5813 * [ WFC: Entity Declared ] 5814 * In a document without any DTD, a document with only an internal DTD 5815 * subset which contains no parameter entity references, or a document 5816 * with "standalone='yes'", the Name given in the entity reference 5817 * must match that in an entity declaration, except that well-formed 5818 * documents need not declare any of the following entities: amp, lt, 5819 * gt, apos, quot. The declaration of a parameter entity must precede 5820 * any reference to it. Similarly, the declaration of a general entity 5821 * must precede any reference to it which appears in a default value in an 5822 * attribute-list declaration. Note that if entities are declared in the 5823 * external subset or in external parameter entities, a non-validating 5824 * processor is not obligated to read and process their declarations; 5825 * for such documents, the rule that an entity must be declared is a 5826 * well-formedness constraint only if standalone='yes'. 5827 * 5828 * [ WFC: Parsed Entity ] 5829 * An entity reference must not contain the name of an unparsed entity 5830 * 5831 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5832 * is updated to the current location in the string. 5833 */ 5834xmlEntityPtr 5835xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5836 xmlChar *name; 5837 const xmlChar *ptr; 5838 xmlChar cur; 5839 xmlEntityPtr ent = NULL; 5840 5841 if ((str == NULL) || (*str == NULL)) 5842 return(NULL); 5843 ptr = *str; 5844 cur = *ptr; 5845 if (cur == '&') { 5846 ptr++; 5847 cur = *ptr; 5848 name = xmlParseStringName(ctxt, &ptr); 5849 if (name == NULL) { 5850 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5851 "xmlParseStringEntityRef: no name\n"); 5852 } else { 5853 if (*ptr == ';') { 5854 ptr++; 5855 /* 5856 * Ask first SAX for entity resolution, otherwise try the 5857 * predefined set. 5858 */ 5859 if (ctxt->sax != NULL) { 5860 if (ctxt->sax->getEntity != NULL) 5861 ent = ctxt->sax->getEntity(ctxt->userData, name); 5862 if (ent == NULL) 5863 ent = xmlGetPredefinedEntity(name); 5864 if ((ent == NULL) && (ctxt->userData==ctxt)) { 5865 ent = xmlSAX2GetEntity(ctxt, name); 5866 } 5867 } 5868 /* 5869 * [ WFC: Entity Declared ] 5870 * In a document without any DTD, a document with only an 5871 * internal DTD subset which contains no parameter entity 5872 * references, or a document with "standalone='yes'", the 5873 * Name given in the entity reference must match that in an 5874 * entity declaration, except that well-formed documents 5875 * need not declare any of the following entities: amp, lt, 5876 * gt, apos, quot. 5877 * The declaration of a parameter entity must precede any 5878 * reference to it. 5879 * Similarly, the declaration of a general entity must 5880 * precede any reference to it which appears in a default 5881 * value in an attribute-list declaration. Note that if 5882 * entities are declared in the external subset or in 5883 * external parameter entities, a non-validating processor 5884 * is not obligated to read and process their declarations; 5885 * for such documents, the rule that an entity must be 5886 * declared is a well-formedness constraint only if 5887 * standalone='yes'. 5888 */ 5889 if (ent == NULL) { 5890 if ((ctxt->standalone == 1) || 5891 ((ctxt->hasExternalSubset == 0) && 5892 (ctxt->hasPErefs == 0))) { 5893 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 5894 "Entity '%s' not defined\n", name); 5895 } else { 5896 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 5897 "Entity '%s' not defined\n", 5898 name); 5899 } 5900 /* TODO ? check regressions ctxt->valid = 0; */ 5901 } 5902 5903 /* 5904 * [ WFC: Parsed Entity ] 5905 * An entity reference must not contain the name of an 5906 * unparsed entity 5907 */ 5908 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5909 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 5910 "Entity reference to unparsed entity %s\n", name); 5911 } 5912 5913 /* 5914 * [ WFC: No External Entity References ] 5915 * Attribute values cannot contain direct or indirect 5916 * entity references to external entities. 5917 */ 5918 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5919 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5920 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 5921 "Attribute references external entity '%s'\n", name); 5922 } 5923 /* 5924 * [ WFC: No < in Attribute Values ] 5925 * The replacement text of any entity referred to directly or 5926 * indirectly in an attribute value (other than "<") must 5927 * not contain a <. 5928 */ 5929 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5930 (ent != NULL) && 5931 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5932 (ent->content != NULL) && 5933 (xmlStrchr(ent->content, '<'))) { 5934 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 5935 "'<' in entity '%s' is not allowed in attributes values\n", 5936 name); 5937 } 5938 5939 /* 5940 * Internal check, no parameter entities here ... 5941 */ 5942 else { 5943 switch (ent->etype) { 5944 case XML_INTERNAL_PARAMETER_ENTITY: 5945 case XML_EXTERNAL_PARAMETER_ENTITY: 5946 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 5947 "Attempt to reference the parameter entity '%s'\n", 5948 name); 5949 break; 5950 default: 5951 break; 5952 } 5953 } 5954 5955 /* 5956 * [ WFC: No Recursion ] 5957 * A parsed entity must not contain a recursive reference 5958 * to itself, either directly or indirectly. 5959 * Done somewhere else 5960 */ 5961 5962 } else { 5963 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 5964 } 5965 xmlFree(name); 5966 } 5967 } 5968 *str = ptr; 5969 return(ent); 5970} 5971 5972/** 5973 * xmlParsePEReference: 5974 * @ctxt: an XML parser context 5975 * 5976 * parse PEReference declarations 5977 * The entity content is handled directly by pushing it's content as 5978 * a new input stream. 5979 * 5980 * [69] PEReference ::= '%' Name ';' 5981 * 5982 * [ WFC: No Recursion ] 5983 * A parsed entity must not contain a recursive 5984 * reference to itself, either directly or indirectly. 5985 * 5986 * [ WFC: Entity Declared ] 5987 * In a document without any DTD, a document with only an internal DTD 5988 * subset which contains no parameter entity references, or a document 5989 * with "standalone='yes'", ... ... The declaration of a parameter 5990 * entity must precede any reference to it... 5991 * 5992 * [ VC: Entity Declared ] 5993 * In a document with an external subset or external parameter entities 5994 * with "standalone='no'", ... ... The declaration of a parameter entity 5995 * must precede any reference to it... 5996 * 5997 * [ WFC: In DTD ] 5998 * Parameter-entity references may only appear in the DTD. 5999 * NOTE: misleading but this is handled. 6000 */ 6001void 6002xmlParsePEReference(xmlParserCtxtPtr ctxt) 6003{ 6004 const xmlChar *name; 6005 xmlEntityPtr entity = NULL; 6006 xmlParserInputPtr input; 6007 6008 if (RAW == '%') { 6009 NEXT; 6010 name = xmlParseName(ctxt); 6011 if (name == NULL) { 6012 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6013 "xmlParsePEReference: no name\n"); 6014 } else { 6015 if (RAW == ';') { 6016 NEXT; 6017 if ((ctxt->sax != NULL) && 6018 (ctxt->sax->getParameterEntity != NULL)) 6019 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6020 name); 6021 if (entity == NULL) { 6022 /* 6023 * [ WFC: Entity Declared ] 6024 * In a document without any DTD, a document with only an 6025 * internal DTD subset which contains no parameter entity 6026 * references, or a document with "standalone='yes'", ... 6027 * ... The declaration of a parameter entity must precede 6028 * any reference to it... 6029 */ 6030 if ((ctxt->standalone == 1) || 6031 ((ctxt->hasExternalSubset == 0) && 6032 (ctxt->hasPErefs == 0))) { 6033 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6034 "PEReference: %%%s; not found\n", 6035 name); 6036 } else { 6037 /* 6038 * [ VC: Entity Declared ] 6039 * In a document with an external subset or external 6040 * parameter entities with "standalone='no'", ... 6041 * ... The declaration of a parameter entity must 6042 * precede any reference to it... 6043 */ 6044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6045 "PEReference: %%%s; not found\n", 6046 name, NULL); 6047 ctxt->valid = 0; 6048 } 6049 } else { 6050 /* 6051 * Internal checking in case the entity quest barfed 6052 */ 6053 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6054 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6055 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6056 "Internal: %%%s; is not a parameter entity\n", 6057 name, NULL); 6058 } else if (ctxt->input->free != deallocblankswrapper) { 6059 input = 6060 xmlNewBlanksWrapperInputStream(ctxt, entity); 6061 xmlPushInput(ctxt, input); 6062 } else { 6063 /* 6064 * TODO !!! 6065 * handle the extra spaces added before and after 6066 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6067 */ 6068 input = xmlNewEntityInputStream(ctxt, entity); 6069 xmlPushInput(ctxt, input); 6070 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6071 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6072 (IS_BLANK_CH(NXT(5)))) { 6073 xmlParseTextDecl(ctxt); 6074 if (ctxt->errNo == 6075 XML_ERR_UNSUPPORTED_ENCODING) { 6076 /* 6077 * The XML REC instructs us to stop parsing 6078 * right here 6079 */ 6080 ctxt->instate = XML_PARSER_EOF; 6081 return; 6082 } 6083 } 6084 } 6085 } 6086 ctxt->hasPErefs = 1; 6087 } else { 6088 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6089 } 6090 } 6091 } 6092} 6093 6094/** 6095 * xmlParseStringPEReference: 6096 * @ctxt: an XML parser context 6097 * @str: a pointer to an index in the string 6098 * 6099 * parse PEReference declarations 6100 * 6101 * [69] PEReference ::= '%' Name ';' 6102 * 6103 * [ WFC: No Recursion ] 6104 * A parsed entity must not contain a recursive 6105 * reference to itself, either directly or indirectly. 6106 * 6107 * [ WFC: Entity Declared ] 6108 * In a document without any DTD, a document with only an internal DTD 6109 * subset which contains no parameter entity references, or a document 6110 * with "standalone='yes'", ... ... The declaration of a parameter 6111 * entity must precede any reference to it... 6112 * 6113 * [ VC: Entity Declared ] 6114 * In a document with an external subset or external parameter entities 6115 * with "standalone='no'", ... ... The declaration of a parameter entity 6116 * must precede any reference to it... 6117 * 6118 * [ WFC: In DTD ] 6119 * Parameter-entity references may only appear in the DTD. 6120 * NOTE: misleading but this is handled. 6121 * 6122 * Returns the string of the entity content. 6123 * str is updated to the current value of the index 6124 */ 6125xmlEntityPtr 6126xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6127 const xmlChar *ptr; 6128 xmlChar cur; 6129 xmlChar *name; 6130 xmlEntityPtr entity = NULL; 6131 6132 if ((str == NULL) || (*str == NULL)) return(NULL); 6133 ptr = *str; 6134 cur = *ptr; 6135 if (cur == '%') { 6136 ptr++; 6137 cur = *ptr; 6138 name = xmlParseStringName(ctxt, &ptr); 6139 if (name == NULL) { 6140 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6141 "xmlParseStringPEReference: no name\n"); 6142 } else { 6143 cur = *ptr; 6144 if (cur == ';') { 6145 ptr++; 6146 cur = *ptr; 6147 if ((ctxt->sax != NULL) && 6148 (ctxt->sax->getParameterEntity != NULL)) 6149 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6150 name); 6151 if (entity == NULL) { 6152 /* 6153 * [ WFC: Entity Declared ] 6154 * In a document without any DTD, a document with only an 6155 * internal DTD subset which contains no parameter entity 6156 * references, or a document with "standalone='yes'", ... 6157 * ... The declaration of a parameter entity must precede 6158 * any reference to it... 6159 */ 6160 if ((ctxt->standalone == 1) || 6161 ((ctxt->hasExternalSubset == 0) && 6162 (ctxt->hasPErefs == 0))) { 6163 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6164 "PEReference: %%%s; not found\n", name); 6165 } else { 6166 /* 6167 * [ VC: Entity Declared ] 6168 * In a document with an external subset or external 6169 * parameter entities with "standalone='no'", ... 6170 * ... The declaration of a parameter entity must 6171 * precede any reference to it... 6172 */ 6173 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6174 "PEReference: %%%s; not found\n", 6175 name, NULL); 6176 ctxt->valid = 0; 6177 } 6178 } else { 6179 /* 6180 * Internal checking in case the entity quest barfed 6181 */ 6182 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6183 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6184 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6185 "%%%s; is not a parameter entity\n", 6186 name, NULL); 6187 } 6188 } 6189 ctxt->hasPErefs = 1; 6190 } else { 6191 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6192 } 6193 xmlFree(name); 6194 } 6195 } 6196 *str = ptr; 6197 return(entity); 6198} 6199 6200/** 6201 * xmlParseDocTypeDecl: 6202 * @ctxt: an XML parser context 6203 * 6204 * parse a DOCTYPE declaration 6205 * 6206 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6207 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6208 * 6209 * [ VC: Root Element Type ] 6210 * The Name in the document type declaration must match the element 6211 * type of the root element. 6212 */ 6213 6214void 6215xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6216 const xmlChar *name = NULL; 6217 xmlChar *ExternalID = NULL; 6218 xmlChar *URI = NULL; 6219 6220 /* 6221 * We know that '<!DOCTYPE' has been detected. 6222 */ 6223 SKIP(9); 6224 6225 SKIP_BLANKS; 6226 6227 /* 6228 * Parse the DOCTYPE name. 6229 */ 6230 name = xmlParseName(ctxt); 6231 if (name == NULL) { 6232 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6233 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6234 } 6235 ctxt->intSubName = name; 6236 6237 SKIP_BLANKS; 6238 6239 /* 6240 * Check for SystemID and ExternalID 6241 */ 6242 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6243 6244 if ((URI != NULL) || (ExternalID != NULL)) { 6245 ctxt->hasExternalSubset = 1; 6246 } 6247 ctxt->extSubURI = URI; 6248 ctxt->extSubSystem = ExternalID; 6249 6250 SKIP_BLANKS; 6251 6252 /* 6253 * Create and update the internal subset. 6254 */ 6255 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6256 (!ctxt->disableSAX)) 6257 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6258 6259 /* 6260 * Is there any internal subset declarations ? 6261 * they are handled separately in xmlParseInternalSubset() 6262 */ 6263 if (RAW == '[') 6264 return; 6265 6266 /* 6267 * We should be at the end of the DOCTYPE declaration. 6268 */ 6269 if (RAW != '>') { 6270 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6271 } 6272 NEXT; 6273} 6274 6275/** 6276 * xmlParseInternalSubset: 6277 * @ctxt: an XML parser context 6278 * 6279 * parse the internal subset declaration 6280 * 6281 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6282 */ 6283 6284static void 6285xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6286 /* 6287 * Is there any DTD definition ? 6288 */ 6289 if (RAW == '[') { 6290 ctxt->instate = XML_PARSER_DTD; 6291 NEXT; 6292 /* 6293 * Parse the succession of Markup declarations and 6294 * PEReferences. 6295 * Subsequence (markupdecl | PEReference | S)* 6296 */ 6297 while (RAW != ']') { 6298 const xmlChar *check = CUR_PTR; 6299 unsigned int cons = ctxt->input->consumed; 6300 6301 SKIP_BLANKS; 6302 xmlParseMarkupDecl(ctxt); 6303 xmlParsePEReference(ctxt); 6304 6305 /* 6306 * Pop-up of finished entities. 6307 */ 6308 while ((RAW == 0) && (ctxt->inputNr > 1)) 6309 xmlPopInput(ctxt); 6310 6311 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6313 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6314 break; 6315 } 6316 } 6317 if (RAW == ']') { 6318 NEXT; 6319 SKIP_BLANKS; 6320 } 6321 } 6322 6323 /* 6324 * We should be at the end of the DOCTYPE declaration. 6325 */ 6326 if (RAW != '>') { 6327 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6328 } 6329 NEXT; 6330} 6331 6332#ifdef LIBXML_SAX1_ENABLED 6333/** 6334 * xmlParseAttribute: 6335 * @ctxt: an XML parser context 6336 * @value: a xmlChar ** used to store the value of the attribute 6337 * 6338 * parse an attribute 6339 * 6340 * [41] Attribute ::= Name Eq AttValue 6341 * 6342 * [ WFC: No External Entity References ] 6343 * Attribute values cannot contain direct or indirect entity references 6344 * to external entities. 6345 * 6346 * [ WFC: No < in Attribute Values ] 6347 * The replacement text of any entity referred to directly or indirectly in 6348 * an attribute value (other than "<") must not contain a <. 6349 * 6350 * [ VC: Attribute Value Type ] 6351 * The attribute must have been declared; the value must be of the type 6352 * declared for it. 6353 * 6354 * [25] Eq ::= S? '=' S? 6355 * 6356 * With namespace: 6357 * 6358 * [NS 11] Attribute ::= QName Eq AttValue 6359 * 6360 * Also the case QName == xmlns:??? is handled independently as a namespace 6361 * definition. 6362 * 6363 * Returns the attribute name, and the value in *value. 6364 */ 6365 6366const xmlChar * 6367xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6368 const xmlChar *name; 6369 xmlChar *val; 6370 6371 *value = NULL; 6372 GROW; 6373 name = xmlParseName(ctxt); 6374 if (name == NULL) { 6375 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6376 "error parsing attribute name\n"); 6377 return(NULL); 6378 } 6379 6380 /* 6381 * read the value 6382 */ 6383 SKIP_BLANKS; 6384 if (RAW == '=') { 6385 NEXT; 6386 SKIP_BLANKS; 6387 val = xmlParseAttValue(ctxt); 6388 ctxt->instate = XML_PARSER_CONTENT; 6389 } else { 6390 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6391 "Specification mandate value for attribute %s\n", name); 6392 return(NULL); 6393 } 6394 6395 /* 6396 * Check that xml:lang conforms to the specification 6397 * No more registered as an error, just generate a warning now 6398 * since this was deprecated in XML second edition 6399 */ 6400 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6401 if (!xmlCheckLanguageID(val)) { 6402 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 6403 "Malformed value for xml:lang : %s\n", 6404 val, NULL); 6405 } 6406 } 6407 6408 /* 6409 * Check that xml:space conforms to the specification 6410 */ 6411 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6412 if (xmlStrEqual(val, BAD_CAST "default")) 6413 *(ctxt->space) = 0; 6414 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6415 *(ctxt->space) = 1; 6416 else { 6417 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6418"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 6419 val); 6420 } 6421 } 6422 6423 *value = val; 6424 return(name); 6425} 6426 6427/** 6428 * xmlParseStartTag: 6429 * @ctxt: an XML parser context 6430 * 6431 * parse a start of tag either for rule element or 6432 * EmptyElement. In both case we don't parse the tag closing chars. 6433 * 6434 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6435 * 6436 * [ WFC: Unique Att Spec ] 6437 * No attribute name may appear more than once in the same start-tag or 6438 * empty-element tag. 6439 * 6440 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6441 * 6442 * [ WFC: Unique Att Spec ] 6443 * No attribute name may appear more than once in the same start-tag or 6444 * empty-element tag. 6445 * 6446 * With namespace: 6447 * 6448 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6449 * 6450 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6451 * 6452 * Returns the element name parsed 6453 */ 6454 6455const xmlChar * 6456xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6457 const xmlChar *name; 6458 const xmlChar *attname; 6459 xmlChar *attvalue; 6460 const xmlChar **atts = ctxt->atts; 6461 int nbatts = 0; 6462 int maxatts = ctxt->maxatts; 6463 int i; 6464 6465 if (RAW != '<') return(NULL); 6466 NEXT1; 6467 6468 name = xmlParseName(ctxt); 6469 if (name == NULL) { 6470 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6471 "xmlParseStartTag: invalid element name\n"); 6472 return(NULL); 6473 } 6474 6475 /* 6476 * Now parse the attributes, it ends up with the ending 6477 * 6478 * (S Attribute)* S? 6479 */ 6480 SKIP_BLANKS; 6481 GROW; 6482 6483 while ((RAW != '>') && 6484 ((RAW != '/') || (NXT(1) != '>')) && 6485 (IS_BYTE_CHAR(RAW))) { 6486 const xmlChar *q = CUR_PTR; 6487 unsigned int cons = ctxt->input->consumed; 6488 6489 attname = xmlParseAttribute(ctxt, &attvalue); 6490 if ((attname != NULL) && (attvalue != NULL)) { 6491 /* 6492 * [ WFC: Unique Att Spec ] 6493 * No attribute name may appear more than once in the same 6494 * start-tag or empty-element tag. 6495 */ 6496 for (i = 0; i < nbatts;i += 2) { 6497 if (xmlStrEqual(atts[i], attname)) { 6498 xmlErrAttributeDup(ctxt, NULL, attname); 6499 xmlFree(attvalue); 6500 goto failed; 6501 } 6502 } 6503 /* 6504 * Add the pair to atts 6505 */ 6506 if (atts == NULL) { 6507 maxatts = 22; /* allow for 10 attrs by default */ 6508 atts = (const xmlChar **) 6509 xmlMalloc(maxatts * sizeof(xmlChar *)); 6510 if (atts == NULL) { 6511 xmlErrMemory(ctxt, NULL); 6512 if (attvalue != NULL) 6513 xmlFree(attvalue); 6514 goto failed; 6515 } 6516 ctxt->atts = atts; 6517 ctxt->maxatts = maxatts; 6518 } else if (nbatts + 4 > maxatts) { 6519 const xmlChar **n; 6520 6521 maxatts *= 2; 6522 n = (const xmlChar **) xmlRealloc((void *) atts, 6523 maxatts * sizeof(const xmlChar *)); 6524 if (n == NULL) { 6525 xmlErrMemory(ctxt, NULL); 6526 if (attvalue != NULL) 6527 xmlFree(attvalue); 6528 goto failed; 6529 } 6530 atts = n; 6531 ctxt->atts = atts; 6532 ctxt->maxatts = maxatts; 6533 } 6534 atts[nbatts++] = attname; 6535 atts[nbatts++] = attvalue; 6536 atts[nbatts] = NULL; 6537 atts[nbatts + 1] = NULL; 6538 } else { 6539 if (attvalue != NULL) 6540 xmlFree(attvalue); 6541 } 6542 6543failed: 6544 6545 GROW 6546 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6547 break; 6548 if (!IS_BLANK_CH(RAW)) { 6549 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6550 "attributes construct error\n"); 6551 } 6552 SKIP_BLANKS; 6553 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 6554 (attname == NULL) && (attvalue == NULL)) { 6555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 6556 "xmlParseStartTag: problem parsing attributes\n"); 6557 break; 6558 } 6559 SHRINK; 6560 GROW; 6561 } 6562 6563 /* 6564 * SAX: Start of Element ! 6565 */ 6566 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6567 (!ctxt->disableSAX)) { 6568 if (nbatts > 0) 6569 ctxt->sax->startElement(ctxt->userData, name, atts); 6570 else 6571 ctxt->sax->startElement(ctxt->userData, name, NULL); 6572 } 6573 6574 if (atts != NULL) { 6575 /* Free only the content strings */ 6576 for (i = 1;i < nbatts;i+=2) 6577 if (atts[i] != NULL) 6578 xmlFree((xmlChar *) atts[i]); 6579 } 6580 return(name); 6581} 6582 6583/** 6584 * xmlParseEndTag1: 6585 * @ctxt: an XML parser context 6586 * @line: line of the start tag 6587 * @nsNr: number of namespaces on the start tag 6588 * 6589 * parse an end of tag 6590 * 6591 * [42] ETag ::= '</' Name S? '>' 6592 * 6593 * With namespace 6594 * 6595 * [NS 9] ETag ::= '</' QName S? '>' 6596 */ 6597 6598static void 6599xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 6600 const xmlChar *name; 6601 6602 GROW; 6603 if ((RAW != '<') || (NXT(1) != '/')) { 6604 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 6605 "xmlParseEndTag: '</' not found\n"); 6606 return; 6607 } 6608 SKIP(2); 6609 6610 name = xmlParseNameAndCompare(ctxt,ctxt->name); 6611 6612 /* 6613 * We should definitely be at the ending "S? '>'" part 6614 */ 6615 GROW; 6616 SKIP_BLANKS; 6617 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 6618 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6619 } else 6620 NEXT1; 6621 6622 /* 6623 * [ WFC: Element Type Match ] 6624 * The Name in an element's end-tag must match the element type in the 6625 * start-tag. 6626 * 6627 */ 6628 if (name != (xmlChar*)1) { 6629 if (name == NULL) name = BAD_CAST "unparseable"; 6630 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 6631 "Opening and ending tag mismatch: %s line %d and %s\n", 6632 ctxt->name, line, name); 6633 } 6634 6635 /* 6636 * SAX: End of Tag 6637 */ 6638 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6639 (!ctxt->disableSAX)) 6640 ctxt->sax->endElement(ctxt->userData, ctxt->name); 6641 6642 namePop(ctxt); 6643 spacePop(ctxt); 6644 return; 6645} 6646 6647/** 6648 * xmlParseEndTag: 6649 * @ctxt: an XML parser context 6650 * 6651 * parse an end of tag 6652 * 6653 * [42] ETag ::= '</' Name S? '>' 6654 * 6655 * With namespace 6656 * 6657 * [NS 9] ETag ::= '</' QName S? '>' 6658 */ 6659 6660void 6661xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6662 xmlParseEndTag1(ctxt, 0); 6663} 6664#endif /* LIBXML_SAX1_ENABLED */ 6665 6666/************************************************************************ 6667 * * 6668 * SAX 2 specific operations * 6669 * * 6670 ************************************************************************/ 6671 6672static const xmlChar * 6673xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 6674 int len = 0, l; 6675 int c; 6676 int count = 0; 6677 6678 /* 6679 * Handler for more complex cases 6680 */ 6681 GROW; 6682 c = CUR_CHAR(l); 6683 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 6684 (!IS_LETTER(c) && (c != '_'))) { 6685 return(NULL); 6686 } 6687 6688 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 6689 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 6690 (c == '.') || (c == '-') || (c == '_') || 6691 (IS_COMBINING(c)) || 6692 (IS_EXTENDER(c)))) { 6693 if (count++ > 100) { 6694 count = 0; 6695 GROW; 6696 } 6697 len += l; 6698 NEXTL(l); 6699 c = CUR_CHAR(l); 6700 } 6701 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 6702} 6703 6704/* 6705 * xmlGetNamespace: 6706 * @ctxt: an XML parser context 6707 * @prefix: the prefix to lookup 6708 * 6709 * Lookup the namespace name for the @prefix (which ca be NULL) 6710 * The prefix must come from the @ctxt->dict dictionnary 6711 * 6712 * Returns the namespace name or NULL if not bound 6713 */ 6714static const xmlChar * 6715xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 6716 int i; 6717 6718 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 6719 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 6720 if (ctxt->nsTab[i] == prefix) { 6721 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 6722 return(NULL); 6723 return(ctxt->nsTab[i + 1]); 6724 } 6725 return(NULL); 6726} 6727 6728/** 6729 * xmlParseNCName: 6730 * @ctxt: an XML parser context 6731 * @len: lenght of the string parsed 6732 * 6733 * parse an XML name. 6734 * 6735 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 6736 * CombiningChar | Extender 6737 * 6738 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 6739 * 6740 * Returns the Name parsed or NULL 6741 */ 6742 6743static const xmlChar * 6744xmlParseNCName(xmlParserCtxtPtr ctxt) { 6745 const xmlChar *in; 6746 const xmlChar *ret; 6747 int count = 0; 6748 6749 /* 6750 * Accelerator for simple ASCII names 6751 */ 6752 in = ctxt->input->cur; 6753 if (((*in >= 0x61) && (*in <= 0x7A)) || 6754 ((*in >= 0x41) && (*in <= 0x5A)) || 6755 (*in == '_')) { 6756 in++; 6757 while (((*in >= 0x61) && (*in <= 0x7A)) || 6758 ((*in >= 0x41) && (*in <= 0x5A)) || 6759 ((*in >= 0x30) && (*in <= 0x39)) || 6760 (*in == '_') || (*in == '-') || 6761 (*in == '.')) 6762 in++; 6763 if ((*in > 0) && (*in < 0x80)) { 6764 count = in - ctxt->input->cur; 6765 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 6766 ctxt->input->cur = in; 6767 ctxt->nbChars += count; 6768 ctxt->input->col += count; 6769 if (ret == NULL) { 6770 xmlErrMemory(ctxt, NULL); 6771 } 6772 return(ret); 6773 } 6774 } 6775 return(xmlParseNCNameComplex(ctxt)); 6776} 6777 6778/** 6779 * xmlParseQName: 6780 * @ctxt: an XML parser context 6781 * @prefix: pointer to store the prefix part 6782 * 6783 * parse an XML Namespace QName 6784 * 6785 * [6] QName ::= (Prefix ':')? LocalPart 6786 * [7] Prefix ::= NCName 6787 * [8] LocalPart ::= NCName 6788 * 6789 * Returns the Name parsed or NULL 6790 */ 6791 6792static const xmlChar * 6793xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 6794 const xmlChar *l, *p; 6795 6796 GROW; 6797 6798 l = xmlParseNCName(ctxt); 6799 if (l == NULL) { 6800 if (CUR == ':') { 6801 l = xmlParseName(ctxt); 6802 if (l != NULL) { 6803 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6804 "Failed to parse QName '%s'\n", l, NULL, NULL); 6805 *prefix = NULL; 6806 return(l); 6807 } 6808 } 6809 return(NULL); 6810 } 6811 if (CUR == ':') { 6812 NEXT; 6813 p = l; 6814 l = xmlParseNCName(ctxt); 6815 if (l == NULL) { 6816 xmlChar *tmp; 6817 6818 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6819 "Failed to parse QName '%s:'\n", p, NULL, NULL); 6820 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 6821 p = xmlDictLookup(ctxt->dict, tmp, -1); 6822 if (tmp != NULL) xmlFree(tmp); 6823 *prefix = NULL; 6824 return(p); 6825 } 6826 if (CUR == ':') { 6827 xmlChar *tmp; 6828 6829 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6830 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 6831 NEXT; 6832 tmp = (xmlChar *) xmlParseName(ctxt); 6833 if (tmp != NULL) { 6834 tmp = xmlBuildQName(tmp, l, NULL, 0); 6835 l = xmlDictLookup(ctxt->dict, tmp, -1); 6836 if (tmp != NULL) xmlFree(tmp); 6837 *prefix = p; 6838 return(l); 6839 } 6840 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 6841 l = xmlDictLookup(ctxt->dict, tmp, -1); 6842 if (tmp != NULL) xmlFree(tmp); 6843 *prefix = p; 6844 return(l); 6845 } 6846 *prefix = p; 6847 } else 6848 *prefix = NULL; 6849 return(l); 6850} 6851 6852/** 6853 * xmlParseQNameAndCompare: 6854 * @ctxt: an XML parser context 6855 * @name: the localname 6856 * @prefix: the prefix, if any. 6857 * 6858 * parse an XML name and compares for match 6859 * (specialized for endtag parsing) 6860 * 6861 * Returns NULL for an illegal name, (xmlChar*) 1 for success 6862 * and the name for mismatch 6863 */ 6864 6865static const xmlChar * 6866xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 6867 xmlChar const *prefix) { 6868 const xmlChar *cmp = name; 6869 const xmlChar *in; 6870 const xmlChar *ret; 6871 const xmlChar *prefix2; 6872 6873 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 6874 6875 GROW; 6876 in = ctxt->input->cur; 6877 6878 cmp = prefix; 6879 while (*in != 0 && *in == *cmp) { 6880 ++in; 6881 ++cmp; 6882 } 6883 if ((*cmp == 0) && (*in == ':')) { 6884 in++; 6885 cmp = name; 6886 while (*in != 0 && *in == *cmp) { 6887 ++in; 6888 ++cmp; 6889 } 6890 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 6891 /* success */ 6892 ctxt->input->cur = in; 6893 return((const xmlChar*) 1); 6894 } 6895 } 6896 /* 6897 * all strings coms from the dictionary, equality can be done directly 6898 */ 6899 ret = xmlParseQName (ctxt, &prefix2); 6900 if ((ret == name) && (prefix == prefix2)) 6901 return((const xmlChar*) 1); 6902 return ret; 6903} 6904 6905/** 6906 * xmlParseAttValueInternal: 6907 * @ctxt: an XML parser context 6908 * @len: attribute len result 6909 * @alloc: whether the attribute was reallocated as a new string 6910 * @normalize: if 1 then further non-CDATA normalization must be done 6911 * 6912 * parse a value for an attribute. 6913 * NOTE: if no normalization is needed, the routine will return pointers 6914 * directly from the data buffer. 6915 * 6916 * 3.3.3 Attribute-Value Normalization: 6917 * Before the value of an attribute is passed to the application or 6918 * checked for validity, the XML processor must normalize it as follows: 6919 * - a character reference is processed by appending the referenced 6920 * character to the attribute value 6921 * - an entity reference is processed by recursively processing the 6922 * replacement text of the entity 6923 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 6924 * appending #x20 to the normalized value, except that only a single 6925 * #x20 is appended for a "#xD#xA" sequence that is part of an external 6926 * parsed entity or the literal entity value of an internal parsed entity 6927 * - other characters are processed by appending them to the normalized value 6928 * If the declared value is not CDATA, then the XML processor must further 6929 * process the normalized attribute value by discarding any leading and 6930 * trailing space (#x20) characters, and by replacing sequences of space 6931 * (#x20) characters by a single space (#x20) character. 6932 * All attributes for which no declaration has been read should be treated 6933 * by a non-validating parser as if declared CDATA. 6934 * 6935 * Returns the AttValue parsed or NULL. The value has to be freed by the 6936 * caller if it was copied, this can be detected by val[*len] == 0. 6937 */ 6938 6939static xmlChar * 6940xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 6941 int normalize) 6942{ 6943 xmlChar limit = 0; 6944 const xmlChar *in = NULL, *start, *end, *last; 6945 xmlChar *ret = NULL; 6946 6947 GROW; 6948 in = (xmlChar *) CUR_PTR; 6949 if (*in != '"' && *in != '\'') { 6950 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 6951 return (NULL); 6952 } 6953 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 6954 6955 /* 6956 * try to handle in this routine the most common case where no 6957 * allocation of a new string is required and where content is 6958 * pure ASCII. 6959 */ 6960 limit = *in++; 6961 end = ctxt->input->end; 6962 start = in; 6963 if (in >= end) { 6964 const xmlChar *oldbase = ctxt->input->base; 6965 GROW; 6966 if (oldbase != ctxt->input->base) { 6967 long delta = ctxt->input->base - oldbase; 6968 start = start + delta; 6969 in = in + delta; 6970 } 6971 end = ctxt->input->end; 6972 } 6973 if (normalize) { 6974 /* 6975 * Skip any leading spaces 6976 */ 6977 while ((in < end) && (*in != limit) && 6978 ((*in == 0x20) || (*in == 0x9) || 6979 (*in == 0xA) || (*in == 0xD))) { 6980 in++; 6981 start = in; 6982 if (in >= end) { 6983 const xmlChar *oldbase = ctxt->input->base; 6984 GROW; 6985 if (oldbase != ctxt->input->base) { 6986 long delta = ctxt->input->base - oldbase; 6987 start = start + delta; 6988 in = in + delta; 6989 } 6990 end = ctxt->input->end; 6991 } 6992 } 6993 while ((in < end) && (*in != limit) && (*in >= 0x20) && 6994 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 6995 if ((*in++ == 0x20) && (*in == 0x20)) break; 6996 if (in >= end) { 6997 const xmlChar *oldbase = ctxt->input->base; 6998 GROW; 6999 if (oldbase != ctxt->input->base) { 7000 long delta = ctxt->input->base - oldbase; 7001 start = start + delta; 7002 in = in + delta; 7003 } 7004 end = ctxt->input->end; 7005 } 7006 } 7007 last = in; 7008 /* 7009 * skip the trailing blanks 7010 */ 7011 while ((last[-1] == 0x20) && (last > start)) last--; 7012 while ((in < end) && (*in != limit) && 7013 ((*in == 0x20) || (*in == 0x9) || 7014 (*in == 0xA) || (*in == 0xD))) { 7015 in++; 7016 if (in >= end) { 7017 const xmlChar *oldbase = ctxt->input->base; 7018 GROW; 7019 if (oldbase != ctxt->input->base) { 7020 long delta = ctxt->input->base - oldbase; 7021 start = start + delta; 7022 in = in + delta; 7023 last = last + delta; 7024 } 7025 end = ctxt->input->end; 7026 } 7027 } 7028 if (*in != limit) goto need_complex; 7029 } else { 7030 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7031 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7032 in++; 7033 if (in >= end) { 7034 const xmlChar *oldbase = ctxt->input->base; 7035 GROW; 7036 if (oldbase != ctxt->input->base) { 7037 long delta = ctxt->input->base - oldbase; 7038 start = start + delta; 7039 in = in + delta; 7040 } 7041 end = ctxt->input->end; 7042 } 7043 } 7044 last = in; 7045 if (*in != limit) goto need_complex; 7046 } 7047 in++; 7048 if (len != NULL) { 7049 *len = last - start; 7050 ret = (xmlChar *) start; 7051 } else { 7052 if (alloc) *alloc = 1; 7053 ret = xmlStrndup(start, last - start); 7054 } 7055 CUR_PTR = in; 7056 if (alloc) *alloc = 0; 7057 return ret; 7058need_complex: 7059 if (alloc) *alloc = 1; 7060 return xmlParseAttValueComplex(ctxt, len, normalize); 7061} 7062 7063/** 7064 * xmlParseAttribute2: 7065 * @ctxt: an XML parser context 7066 * @pref: the element prefix 7067 * @elem: the element name 7068 * @prefix: a xmlChar ** used to store the value of the attribute prefix 7069 * @value: a xmlChar ** used to store the value of the attribute 7070 * @len: an int * to save the length of the attribute 7071 * @alloc: an int * to indicate if the attribute was allocated 7072 * 7073 * parse an attribute in the new SAX2 framework. 7074 * 7075 * Returns the attribute name, and the value in *value, . 7076 */ 7077 7078static const xmlChar * 7079xmlParseAttribute2(xmlParserCtxtPtr ctxt, 7080 const xmlChar *pref, const xmlChar *elem, 7081 const xmlChar **prefix, xmlChar **value, 7082 int *len, int *alloc) { 7083 const xmlChar *name; 7084 xmlChar *val; 7085 int normalize = 0; 7086 7087 *value = NULL; 7088 GROW; 7089 name = xmlParseQName(ctxt, prefix); 7090 if (name == NULL) { 7091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7092 "error parsing attribute name\n"); 7093 return(NULL); 7094 } 7095 7096 /* 7097 * get the type if needed 7098 */ 7099 if (ctxt->attsSpecial != NULL) { 7100 int type; 7101 7102 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 7103 pref, elem, *prefix, name); 7104 if (type != 0) normalize = 1; 7105 } 7106 7107 /* 7108 * read the value 7109 */ 7110 SKIP_BLANKS; 7111 if (RAW == '=') { 7112 NEXT; 7113 SKIP_BLANKS; 7114 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 7115 ctxt->instate = XML_PARSER_CONTENT; 7116 } else { 7117 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7118 "Specification mandate value for attribute %s\n", name); 7119 return(NULL); 7120 } 7121 7122 /* 7123 * Check that xml:lang conforms to the specification 7124 * No more registered as an error, just generate a warning now 7125 * since this was deprecated in XML second edition 7126 */ 7127 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7128 if (!xmlCheckLanguageID(val)) { 7129 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7130 "Malformed value for xml:lang : %s\n", 7131 val, NULL); 7132 } 7133 } 7134 7135 /* 7136 * Check that xml:space conforms to the specification 7137 */ 7138 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7139 if (xmlStrEqual(val, BAD_CAST "default")) 7140 *(ctxt->space) = 0; 7141 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7142 *(ctxt->space) = 1; 7143 else { 7144 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7145"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7146 val); 7147 } 7148 } 7149 7150 *value = val; 7151 return(name); 7152} 7153 7154/** 7155 * xmlParseStartTag2: 7156 * @ctxt: an XML parser context 7157 * 7158 * parse a start of tag either for rule element or 7159 * EmptyElement. In both case we don't parse the tag closing chars. 7160 * This routine is called when running SAX2 parsing 7161 * 7162 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7163 * 7164 * [ WFC: Unique Att Spec ] 7165 * No attribute name may appear more than once in the same start-tag or 7166 * empty-element tag. 7167 * 7168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7169 * 7170 * [ WFC: Unique Att Spec ] 7171 * No attribute name may appear more than once in the same start-tag or 7172 * empty-element tag. 7173 * 7174 * With namespace: 7175 * 7176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7177 * 7178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7179 * 7180 * Returns the element name parsed 7181 */ 7182 7183static const xmlChar * 7184xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 7185 const xmlChar **URI, int *tlen) { 7186 const xmlChar *localname; 7187 const xmlChar *prefix; 7188 const xmlChar *attname; 7189 const xmlChar *aprefix; 7190 const xmlChar *nsname; 7191 xmlChar *attvalue; 7192 const xmlChar **atts = ctxt->atts; 7193 int maxatts = ctxt->maxatts; 7194 int nratts, nbatts, nbdef; 7195 int i, j, nbNs, attval; 7196 const xmlChar *base; 7197 unsigned long cur; 7198 7199 if (RAW != '<') return(NULL); 7200 NEXT1; 7201 7202 /* 7203 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 7204 * point since the attribute values may be stored as pointers to 7205 * the buffer and calling SHRINK would destroy them ! 7206 * The Shrinking is only possible once the full set of attribute 7207 * callbacks have been done. 7208 */ 7209reparse: 7210 SHRINK; 7211 base = ctxt->input->base; 7212 cur = ctxt->input->cur - ctxt->input->base; 7213 nbatts = 0; 7214 nratts = 0; 7215 nbdef = 0; 7216 nbNs = 0; 7217 attval = 0; 7218 7219 localname = xmlParseQName(ctxt, &prefix); 7220 if (localname == NULL) { 7221 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7222 "StartTag: invalid element name\n"); 7223 return(NULL); 7224 } 7225 *tlen = ctxt->input->cur - ctxt->input->base - cur; 7226 7227 /* 7228 * Now parse the attributes, it ends up with the ending 7229 * 7230 * (S Attribute)* S? 7231 */ 7232 SKIP_BLANKS; 7233 GROW; 7234 if (ctxt->input->base != base) goto base_changed; 7235 7236 while ((RAW != '>') && 7237 ((RAW != '/') || (NXT(1) != '>')) && 7238 (IS_BYTE_CHAR(RAW))) { 7239 const xmlChar *q = CUR_PTR; 7240 unsigned int cons = ctxt->input->consumed; 7241 int len = -1, alloc = 0; 7242 7243 attname = xmlParseAttribute2(ctxt, prefix, localname, 7244 &aprefix, &attvalue, &len, &alloc); 7245 if ((attname != NULL) && (attvalue != NULL)) { 7246 if (len < 0) len = xmlStrlen(attvalue); 7247 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7248 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7249 xmlURIPtr uri; 7250 7251 if (*URL != 0) { 7252 uri = xmlParseURI((const char *) URL); 7253 if (uri == NULL) { 7254 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7255 "xmlns: %s not a valid URI\n", 7256 URL, NULL); 7257 } else { 7258 if (uri->scheme == NULL) { 7259 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7260 "xmlns: URI %s is not absolute\n", 7261 URL, NULL); 7262 } 7263 xmlFreeURI(uri); 7264 } 7265 } 7266 /* 7267 * check that it's not a defined namespace 7268 */ 7269 for (j = 1;j <= nbNs;j++) 7270 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7271 break; 7272 if (j <= nbNs) 7273 xmlErrAttributeDup(ctxt, NULL, attname); 7274 else 7275 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 7276 if (alloc != 0) xmlFree(attvalue); 7277 SKIP_BLANKS; 7278 continue; 7279 } 7280 if (aprefix == ctxt->str_xmlns) { 7281 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7282 xmlURIPtr uri; 7283 7284 if (attname == ctxt->str_xml) { 7285 if (URL != ctxt->str_xml_ns) { 7286 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 7287 "xml namespace prefix mapped to wrong URI\n", 7288 NULL, NULL, NULL); 7289 } 7290 /* 7291 * Do not keep a namespace definition node 7292 */ 7293 if (alloc != 0) xmlFree(attvalue); 7294 SKIP_BLANKS; 7295 continue; 7296 } 7297 uri = xmlParseURI((const char *) URL); 7298 if (uri == NULL) { 7299 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7300 "xmlns:%s: '%s' is not a valid URI\n", 7301 attname, URL); 7302 } else { 7303 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 7304 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7305 "xmlns:%s: URI %s is not absolute\n", 7306 attname, URL); 7307 } 7308 xmlFreeURI(uri); 7309 } 7310 7311 /* 7312 * check that it's not a defined namespace 7313 */ 7314 for (j = 1;j <= nbNs;j++) 7315 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7316 break; 7317 if (j <= nbNs) 7318 xmlErrAttributeDup(ctxt, aprefix, attname); 7319 else 7320 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 7321 if (alloc != 0) xmlFree(attvalue); 7322 SKIP_BLANKS; 7323 continue; 7324 } 7325 7326 /* 7327 * Add the pair to atts 7328 */ 7329 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7330 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7331 if (attvalue[len] == 0) 7332 xmlFree(attvalue); 7333 goto failed; 7334 } 7335 maxatts = ctxt->maxatts; 7336 atts = ctxt->atts; 7337 } 7338 ctxt->attallocs[nratts++] = alloc; 7339 atts[nbatts++] = attname; 7340 atts[nbatts++] = aprefix; 7341 atts[nbatts++] = NULL; /* the URI will be fetched later */ 7342 atts[nbatts++] = attvalue; 7343 attvalue += len; 7344 atts[nbatts++] = attvalue; 7345 /* 7346 * tag if some deallocation is needed 7347 */ 7348 if (alloc != 0) attval = 1; 7349 } else { 7350 if ((attvalue != NULL) && (attvalue[len] == 0)) 7351 xmlFree(attvalue); 7352 } 7353 7354failed: 7355 7356 GROW 7357 if (ctxt->input->base != base) goto base_changed; 7358 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7359 break; 7360 if (!IS_BLANK_CH(RAW)) { 7361 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7362 "attributes construct error\n"); 7363 } 7364 SKIP_BLANKS; 7365 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7366 (attname == NULL) && (attvalue == NULL)) { 7367 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7368 "xmlParseStartTag: problem parsing attributes\n"); 7369 break; 7370 } 7371 GROW; 7372 if (ctxt->input->base != base) goto base_changed; 7373 } 7374 7375 /* 7376 * The attributes defaulting 7377 */ 7378 if (ctxt->attsDefault != NULL) { 7379 xmlDefAttrsPtr defaults; 7380 7381 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 7382 if (defaults != NULL) { 7383 for (i = 0;i < defaults->nbAttrs;i++) { 7384 attname = defaults->values[4 * i]; 7385 aprefix = defaults->values[4 * i + 1]; 7386 7387 /* 7388 * special work for namespaces defaulted defs 7389 */ 7390 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7391 /* 7392 * check that it's not a defined namespace 7393 */ 7394 for (j = 1;j <= nbNs;j++) 7395 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7396 break; 7397 if (j <= nbNs) continue; 7398 7399 nsname = xmlGetNamespace(ctxt, NULL); 7400 if (nsname != defaults->values[4 * i + 2]) { 7401 if (nsPush(ctxt, NULL, 7402 defaults->values[4 * i + 2]) > 0) 7403 nbNs++; 7404 } 7405 } else if (aprefix == ctxt->str_xmlns) { 7406 /* 7407 * check that it's not a defined namespace 7408 */ 7409 for (j = 1;j <= nbNs;j++) 7410 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7411 break; 7412 if (j <= nbNs) continue; 7413 7414 nsname = xmlGetNamespace(ctxt, attname); 7415 if (nsname != defaults->values[2]) { 7416 if (nsPush(ctxt, attname, 7417 defaults->values[4 * i + 2]) > 0) 7418 nbNs++; 7419 } 7420 } else { 7421 /* 7422 * check that it's not a defined attribute 7423 */ 7424 for (j = 0;j < nbatts;j+=5) { 7425 if ((attname == atts[j]) && (aprefix == atts[j+1])) 7426 break; 7427 } 7428 if (j < nbatts) continue; 7429 7430 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7431 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7432 return(NULL); 7433 } 7434 maxatts = ctxt->maxatts; 7435 atts = ctxt->atts; 7436 } 7437 atts[nbatts++] = attname; 7438 atts[nbatts++] = aprefix; 7439 if (aprefix == NULL) 7440 atts[nbatts++] = NULL; 7441 else 7442 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 7443 atts[nbatts++] = defaults->values[4 * i + 2]; 7444 atts[nbatts++] = defaults->values[4 * i + 3]; 7445 nbdef++; 7446 } 7447 } 7448 } 7449 } 7450 7451 /* 7452 * The attributes checkings 7453 */ 7454 for (i = 0; i < nbatts;i += 5) { 7455 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 7456 if ((atts[i + 1] != NULL) && (nsname == NULL)) { 7457 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7458 "Namespace prefix %s for %s on %s is not defined\n", 7459 atts[i + 1], atts[i], localname); 7460 } 7461 atts[i + 2] = nsname; 7462 /* 7463 * [ WFC: Unique Att Spec ] 7464 * No attribute name may appear more than once in the same 7465 * start-tag or empty-element tag. 7466 * As extended by the Namespace in XML REC. 7467 */ 7468 for (j = 0; j < i;j += 5) { 7469 if (atts[i] == atts[j]) { 7470 if (atts[i+1] == atts[j+1]) { 7471 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 7472 break; 7473 } 7474 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 7475 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 7476 "Namespaced Attribute %s in '%s' redefined\n", 7477 atts[i], nsname, NULL); 7478 break; 7479 } 7480 } 7481 } 7482 } 7483 7484 nsname = xmlGetNamespace(ctxt, prefix); 7485 if ((prefix != NULL) && (nsname == NULL)) { 7486 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7487 "Namespace prefix %s on %s is not defined\n", 7488 prefix, localname, NULL); 7489 } 7490 *pref = prefix; 7491 *URI = nsname; 7492 7493 /* 7494 * SAX: Start of Element ! 7495 */ 7496 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 7497 (!ctxt->disableSAX)) { 7498 if (nbNs > 0) 7499 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7500 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 7501 nbatts / 5, nbdef, atts); 7502 else 7503 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7504 nsname, 0, NULL, nbatts / 5, nbdef, atts); 7505 } 7506 7507 /* 7508 * Free up attribute allocated strings if needed 7509 */ 7510 if (attval != 0) { 7511 for (i = 3,j = 0; j < nratts;i += 5,j++) 7512 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7513 xmlFree((xmlChar *) atts[i]); 7514 } 7515 7516 return(localname); 7517 7518base_changed: 7519 /* 7520 * the attribute strings are valid iif the base didn't changed 7521 */ 7522 if (attval != 0) { 7523 for (i = 3,j = 0; j < nratts;i += 5,j++) 7524 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7525 xmlFree((xmlChar *) atts[i]); 7526 } 7527 ctxt->input->cur = ctxt->input->base + cur; 7528 if (ctxt->wellFormed == 1) { 7529 goto reparse; 7530 } 7531 return(NULL); 7532} 7533 7534/** 7535 * xmlParseEndTag2: 7536 * @ctxt: an XML parser context 7537 * @line: line of the start tag 7538 * @nsNr: number of namespaces on the start tag 7539 * 7540 * parse an end of tag 7541 * 7542 * [42] ETag ::= '</' Name S? '>' 7543 * 7544 * With namespace 7545 * 7546 * [NS 9] ETag ::= '</' QName S? '>' 7547 */ 7548 7549static void 7550xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 7551 const xmlChar *URI, int line, int nsNr, int tlen) { 7552 const xmlChar *name; 7553 7554 GROW; 7555 if ((RAW != '<') || (NXT(1) != '/')) { 7556 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 7557 return; 7558 } 7559 SKIP(2); 7560 7561 if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 7562 if (ctxt->input->cur[tlen] == '>') { 7563 ctxt->input->cur += tlen + 1; 7564 goto done; 7565 } 7566 ctxt->input->cur += tlen; 7567 name = (xmlChar*)1; 7568 } else { 7569 if (prefix == NULL) 7570 name = xmlParseNameAndCompare(ctxt, ctxt->name); 7571 else 7572 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 7573 } 7574 7575 /* 7576 * We should definitely be at the ending "S? '>'" part 7577 */ 7578 GROW; 7579 SKIP_BLANKS; 7580 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7581 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7582 } else 7583 NEXT1; 7584 7585 /* 7586 * [ WFC: Element Type Match ] 7587 * The Name in an element's end-tag must match the element type in the 7588 * start-tag. 7589 * 7590 */ 7591 if (name != (xmlChar*)1) { 7592 if (name == NULL) name = BAD_CAST "unparseable"; 7593 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7594 "Opening and ending tag mismatch: %s line %d and %s\n", 7595 ctxt->name, line, name); 7596 } 7597 7598 /* 7599 * SAX: End of Tag 7600 */ 7601done: 7602 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 7603 (!ctxt->disableSAX)) 7604 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 7605 7606 spacePop(ctxt); 7607 if (nsNr != 0) 7608 nsPop(ctxt, nsNr); 7609 return; 7610} 7611 7612/** 7613 * xmlParseCDSect: 7614 * @ctxt: an XML parser context 7615 * 7616 * Parse escaped pure raw content. 7617 * 7618 * [18] CDSect ::= CDStart CData CDEnd 7619 * 7620 * [19] CDStart ::= '<![CDATA[' 7621 * 7622 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 7623 * 7624 * [21] CDEnd ::= ']]>' 7625 */ 7626void 7627xmlParseCDSect(xmlParserCtxtPtr ctxt) { 7628 xmlChar *buf = NULL; 7629 int len = 0; 7630 int size = XML_PARSER_BUFFER_SIZE; 7631 int r, rl; 7632 int s, sl; 7633 int cur, l; 7634 int count = 0; 7635 7636 /* Check 2.6.0 was NXT(0) not RAW */ 7637 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 7638 SKIP(9); 7639 } else 7640 return; 7641 7642 ctxt->instate = XML_PARSER_CDATA_SECTION; 7643 r = CUR_CHAR(rl); 7644 if (!IS_CHAR(r)) { 7645 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7646 ctxt->instate = XML_PARSER_CONTENT; 7647 return; 7648 } 7649 NEXTL(rl); 7650 s = CUR_CHAR(sl); 7651 if (!IS_CHAR(s)) { 7652 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7653 ctxt->instate = XML_PARSER_CONTENT; 7654 return; 7655 } 7656 NEXTL(sl); 7657 cur = CUR_CHAR(l); 7658 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 7659 if (buf == NULL) { 7660 xmlErrMemory(ctxt, NULL); 7661 return; 7662 } 7663 while (IS_CHAR(cur) && 7664 ((r != ']') || (s != ']') || (cur != '>'))) { 7665 if (len + 5 >= size) { 7666 size *= 2; 7667 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7668 if (buf == NULL) { 7669 xmlErrMemory(ctxt, NULL); 7670 return; 7671 } 7672 } 7673 COPY_BUF(rl,buf,len,r); 7674 r = s; 7675 rl = sl; 7676 s = cur; 7677 sl = l; 7678 count++; 7679 if (count > 50) { 7680 GROW; 7681 count = 0; 7682 } 7683 NEXTL(l); 7684 cur = CUR_CHAR(l); 7685 } 7686 buf[len] = 0; 7687 ctxt->instate = XML_PARSER_CONTENT; 7688 if (cur != '>') { 7689 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 7690 "CData section not finished\n%.50s\n", buf); 7691 xmlFree(buf); 7692 return; 7693 } 7694 NEXTL(l); 7695 7696 /* 7697 * OK the buffer is to be consumed as cdata. 7698 */ 7699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 7700 if (ctxt->sax->cdataBlock != NULL) 7701 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 7702 else if (ctxt->sax->characters != NULL) 7703 ctxt->sax->characters(ctxt->userData, buf, len); 7704 } 7705 xmlFree(buf); 7706} 7707 7708/** 7709 * xmlParseContent: 7710 * @ctxt: an XML parser context 7711 * 7712 * Parse a content: 7713 * 7714 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 7715 */ 7716 7717void 7718xmlParseContent(xmlParserCtxtPtr ctxt) { 7719 GROW; 7720 while ((RAW != 0) && 7721 ((RAW != '<') || (NXT(1) != '/'))) { 7722 const xmlChar *test = CUR_PTR; 7723 unsigned int cons = ctxt->input->consumed; 7724 const xmlChar *cur = ctxt->input->cur; 7725 7726 /* 7727 * First case : a Processing Instruction. 7728 */ 7729 if ((*cur == '<') && (cur[1] == '?')) { 7730 xmlParsePI(ctxt); 7731 } 7732 7733 /* 7734 * Second case : a CDSection 7735 */ 7736 /* 2.6.0 test was *cur not RAW */ 7737 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 7738 xmlParseCDSect(ctxt); 7739 } 7740 7741 /* 7742 * Third case : a comment 7743 */ 7744 else if ((*cur == '<') && (NXT(1) == '!') && 7745 (NXT(2) == '-') && (NXT(3) == '-')) { 7746 xmlParseComment(ctxt); 7747 ctxt->instate = XML_PARSER_CONTENT; 7748 } 7749 7750 /* 7751 * Fourth case : a sub-element. 7752 */ 7753 else if (*cur == '<') { 7754 xmlParseElement(ctxt); 7755 } 7756 7757 /* 7758 * Fifth case : a reference. If if has not been resolved, 7759 * parsing returns it's Name, create the node 7760 */ 7761 7762 else if (*cur == '&') { 7763 xmlParseReference(ctxt); 7764 } 7765 7766 /* 7767 * Last case, text. Note that References are handled directly. 7768 */ 7769 else { 7770 xmlParseCharData(ctxt, 0); 7771 } 7772 7773 GROW; 7774 /* 7775 * Pop-up of finished entities. 7776 */ 7777 while ((RAW == 0) && (ctxt->inputNr > 1)) 7778 xmlPopInput(ctxt); 7779 SHRINK; 7780 7781 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 7782 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7783 "detected an error in element content\n"); 7784 ctxt->instate = XML_PARSER_EOF; 7785 break; 7786 } 7787 } 7788} 7789 7790/** 7791 * xmlParseElement: 7792 * @ctxt: an XML parser context 7793 * 7794 * parse an XML element, this is highly recursive 7795 * 7796 * [39] element ::= EmptyElemTag | STag content ETag 7797 * 7798 * [ WFC: Element Type Match ] 7799 * The Name in an element's end-tag must match the element type in the 7800 * start-tag. 7801 * 7802 */ 7803 7804void 7805xmlParseElement(xmlParserCtxtPtr ctxt) { 7806 const xmlChar *name; 7807 const xmlChar *prefix; 7808 const xmlChar *URI; 7809 xmlParserNodeInfo node_info; 7810 int line, tlen; 7811 xmlNodePtr ret; 7812 int nsNr = ctxt->nsNr; 7813 7814 /* Capture start position */ 7815 if (ctxt->record_info) { 7816 node_info.begin_pos = ctxt->input->consumed + 7817 (CUR_PTR - ctxt->input->base); 7818 node_info.begin_line = ctxt->input->line; 7819 } 7820 7821 if (ctxt->spaceNr == 0) 7822 spacePush(ctxt, -1); 7823 else 7824 spacePush(ctxt, *ctxt->space); 7825 7826 line = ctxt->input->line; 7827#ifdef LIBXML_SAX1_ENABLED 7828 if (ctxt->sax2) 7829#endif /* LIBXML_SAX1_ENABLED */ 7830 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 7831#ifdef LIBXML_SAX1_ENABLED 7832 else 7833 name = xmlParseStartTag(ctxt); 7834#endif /* LIBXML_SAX1_ENABLED */ 7835 if (name == NULL) { 7836 spacePop(ctxt); 7837 return; 7838 } 7839 namePush(ctxt, name); 7840 ret = ctxt->node; 7841 7842#ifdef LIBXML_VALID_ENABLED 7843 /* 7844 * [ VC: Root Element Type ] 7845 * The Name in the document type declaration must match the element 7846 * type of the root element. 7847 */ 7848 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7849 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7850 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7851#endif /* LIBXML_VALID_ENABLED */ 7852 7853 /* 7854 * Check for an Empty Element. 7855 */ 7856 if ((RAW == '/') && (NXT(1) == '>')) { 7857 SKIP(2); 7858 if (ctxt->sax2) { 7859 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 7860 (!ctxt->disableSAX)) 7861 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 7862#ifdef LIBXML_SAX1_ENABLED 7863 } else { 7864 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7865 (!ctxt->disableSAX)) 7866 ctxt->sax->endElement(ctxt->userData, name); 7867#endif /* LIBXML_SAX1_ENABLED */ 7868 } 7869 namePop(ctxt); 7870 spacePop(ctxt); 7871 if (nsNr != ctxt->nsNr) 7872 nsPop(ctxt, ctxt->nsNr - nsNr); 7873 if ( ret != NULL && ctxt->record_info ) { 7874 node_info.end_pos = ctxt->input->consumed + 7875 (CUR_PTR - ctxt->input->base); 7876 node_info.end_line = ctxt->input->line; 7877 node_info.node = ret; 7878 xmlParserAddNodeInfo(ctxt, &node_info); 7879 } 7880 return; 7881 } 7882 if (RAW == '>') { 7883 NEXT1; 7884 } else { 7885 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 7886 "Couldn't find end of Start Tag %s line %d\n", 7887 name, line, NULL); 7888 7889 /* 7890 * end of parsing of this node. 7891 */ 7892 nodePop(ctxt); 7893 namePop(ctxt); 7894 spacePop(ctxt); 7895 if (nsNr != ctxt->nsNr) 7896 nsPop(ctxt, ctxt->nsNr - nsNr); 7897 7898 /* 7899 * Capture end position and add node 7900 */ 7901 if ( ret != NULL && ctxt->record_info ) { 7902 node_info.end_pos = ctxt->input->consumed + 7903 (CUR_PTR - ctxt->input->base); 7904 node_info.end_line = ctxt->input->line; 7905 node_info.node = ret; 7906 xmlParserAddNodeInfo(ctxt, &node_info); 7907 } 7908 return; 7909 } 7910 7911 /* 7912 * Parse the content of the element: 7913 */ 7914 xmlParseContent(ctxt); 7915 if (!IS_BYTE_CHAR(RAW)) { 7916 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 7917 "Premature end of data in tag %s line %d\n", 7918 name, line, NULL); 7919 7920 /* 7921 * end of parsing of this node. 7922 */ 7923 nodePop(ctxt); 7924 namePop(ctxt); 7925 spacePop(ctxt); 7926 if (nsNr != ctxt->nsNr) 7927 nsPop(ctxt, ctxt->nsNr - nsNr); 7928 return; 7929 } 7930 7931 /* 7932 * parse the end of tag: '</' should be here. 7933 */ 7934 if (ctxt->sax2) { 7935 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 7936 namePop(ctxt); 7937 } 7938#ifdef LIBXML_SAX1_ENABLED 7939 else 7940 xmlParseEndTag1(ctxt, line); 7941#endif /* LIBXML_SAX1_ENABLED */ 7942 7943 /* 7944 * Capture end position and add node 7945 */ 7946 if ( ret != NULL && ctxt->record_info ) { 7947 node_info.end_pos = ctxt->input->consumed + 7948 (CUR_PTR - ctxt->input->base); 7949 node_info.end_line = ctxt->input->line; 7950 node_info.node = ret; 7951 xmlParserAddNodeInfo(ctxt, &node_info); 7952 } 7953} 7954 7955/** 7956 * xmlParseVersionNum: 7957 * @ctxt: an XML parser context 7958 * 7959 * parse the XML version value. 7960 * 7961 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 7962 * 7963 * Returns the string giving the XML version number, or NULL 7964 */ 7965xmlChar * 7966xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 7967 xmlChar *buf = NULL; 7968 int len = 0; 7969 int size = 10; 7970 xmlChar cur; 7971 7972 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 7973 if (buf == NULL) { 7974 xmlErrMemory(ctxt, NULL); 7975 return(NULL); 7976 } 7977 cur = CUR; 7978 while (((cur >= 'a') && (cur <= 'z')) || 7979 ((cur >= 'A') && (cur <= 'Z')) || 7980 ((cur >= '0') && (cur <= '9')) || 7981 (cur == '_') || (cur == '.') || 7982 (cur == ':') || (cur == '-')) { 7983 if (len + 1 >= size) { 7984 size *= 2; 7985 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7986 if (buf == NULL) { 7987 xmlErrMemory(ctxt, NULL); 7988 return(NULL); 7989 } 7990 } 7991 buf[len++] = cur; 7992 NEXT; 7993 cur=CUR; 7994 } 7995 buf[len] = 0; 7996 return(buf); 7997} 7998 7999/** 8000 * xmlParseVersionInfo: 8001 * @ctxt: an XML parser context 8002 * 8003 * parse the XML version. 8004 * 8005 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 8006 * 8007 * [25] Eq ::= S? '=' S? 8008 * 8009 * Returns the version string, e.g. "1.0" 8010 */ 8011 8012xmlChar * 8013xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 8014 xmlChar *version = NULL; 8015 8016 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 8017 SKIP(7); 8018 SKIP_BLANKS; 8019 if (RAW != '=') { 8020 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8021 return(NULL); 8022 } 8023 NEXT; 8024 SKIP_BLANKS; 8025 if (RAW == '"') { 8026 NEXT; 8027 version = xmlParseVersionNum(ctxt); 8028 if (RAW != '"') { 8029 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8030 } else 8031 NEXT; 8032 } else if (RAW == '\''){ 8033 NEXT; 8034 version = xmlParseVersionNum(ctxt); 8035 if (RAW != '\'') { 8036 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8037 } else 8038 NEXT; 8039 } else { 8040 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8041 } 8042 } 8043 return(version); 8044} 8045 8046/** 8047 * xmlParseEncName: 8048 * @ctxt: an XML parser context 8049 * 8050 * parse the XML encoding name 8051 * 8052 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 8053 * 8054 * Returns the encoding name value or NULL 8055 */ 8056xmlChar * 8057xmlParseEncName(xmlParserCtxtPtr ctxt) { 8058 xmlChar *buf = NULL; 8059 int len = 0; 8060 int size = 10; 8061 xmlChar cur; 8062 8063 cur = CUR; 8064 if (((cur >= 'a') && (cur <= 'z')) || 8065 ((cur >= 'A') && (cur <= 'Z'))) { 8066 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8067 if (buf == NULL) { 8068 xmlErrMemory(ctxt, NULL); 8069 return(NULL); 8070 } 8071 8072 buf[len++] = cur; 8073 NEXT; 8074 cur = CUR; 8075 while (((cur >= 'a') && (cur <= 'z')) || 8076 ((cur >= 'A') && (cur <= 'Z')) || 8077 ((cur >= '0') && (cur <= '9')) || 8078 (cur == '.') || (cur == '_') || 8079 (cur == '-')) { 8080 if (len + 1 >= size) { 8081 size *= 2; 8082 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8083 if (buf == NULL) { 8084 xmlErrMemory(ctxt, NULL); 8085 return(NULL); 8086 } 8087 } 8088 buf[len++] = cur; 8089 NEXT; 8090 cur = CUR; 8091 if (cur == 0) { 8092 SHRINK; 8093 GROW; 8094 cur = CUR; 8095 } 8096 } 8097 buf[len] = 0; 8098 } else { 8099 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 8100 } 8101 return(buf); 8102} 8103 8104/** 8105 * xmlParseEncodingDecl: 8106 * @ctxt: an XML parser context 8107 * 8108 * parse the XML encoding declaration 8109 * 8110 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 8111 * 8112 * this setups the conversion filters. 8113 * 8114 * Returns the encoding value or NULL 8115 */ 8116 8117const xmlChar * 8118xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 8119 xmlChar *encoding = NULL; 8120 8121 SKIP_BLANKS; 8122 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 8123 SKIP(8); 8124 SKIP_BLANKS; 8125 if (RAW != '=') { 8126 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8127 return(NULL); 8128 } 8129 NEXT; 8130 SKIP_BLANKS; 8131 if (RAW == '"') { 8132 NEXT; 8133 encoding = xmlParseEncName(ctxt); 8134 if (RAW != '"') { 8135 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8136 } else 8137 NEXT; 8138 } else if (RAW == '\''){ 8139 NEXT; 8140 encoding = xmlParseEncName(ctxt); 8141 if (RAW != '\'') { 8142 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8143 } else 8144 NEXT; 8145 } else { 8146 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8147 } 8148 /* 8149 * UTF-16 encoding stwich has already taken place at this stage, 8150 * more over the little-endian/big-endian selection is already done 8151 */ 8152 if ((encoding != NULL) && 8153 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 8154 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 8155 if (ctxt->encoding != NULL) 8156 xmlFree((xmlChar *) ctxt->encoding); 8157 ctxt->encoding = encoding; 8158 } 8159 /* 8160 * UTF-8 encoding is handled natively 8161 */ 8162 else if ((encoding != NULL) && 8163 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 8164 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 8165 if (ctxt->encoding != NULL) 8166 xmlFree((xmlChar *) ctxt->encoding); 8167 ctxt->encoding = encoding; 8168 } 8169 else if (encoding != NULL) { 8170 xmlCharEncodingHandlerPtr handler; 8171 8172 if (ctxt->input->encoding != NULL) 8173 xmlFree((xmlChar *) ctxt->input->encoding); 8174 ctxt->input->encoding = encoding; 8175 8176 handler = xmlFindCharEncodingHandler((const char *) encoding); 8177 if (handler != NULL) { 8178 xmlSwitchToEncoding(ctxt, handler); 8179 } else { 8180 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 8181 "Unsupported encoding %s\n", encoding); 8182 return(NULL); 8183 } 8184 } 8185 } 8186 return(encoding); 8187} 8188 8189/** 8190 * xmlParseSDDecl: 8191 * @ctxt: an XML parser context 8192 * 8193 * parse the XML standalone declaration 8194 * 8195 * [32] SDDecl ::= S 'standalone' Eq 8196 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 8197 * 8198 * [ VC: Standalone Document Declaration ] 8199 * TODO The standalone document declaration must have the value "no" 8200 * if any external markup declarations contain declarations of: 8201 * - attributes with default values, if elements to which these 8202 * attributes apply appear in the document without specifications 8203 * of values for these attributes, or 8204 * - entities (other than amp, lt, gt, apos, quot), if references 8205 * to those entities appear in the document, or 8206 * - attributes with values subject to normalization, where the 8207 * attribute appears in the document with a value which will change 8208 * as a result of normalization, or 8209 * - element types with element content, if white space occurs directly 8210 * within any instance of those types. 8211 * 8212 * Returns 1 if standalone, 0 otherwise 8213 */ 8214 8215int 8216xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 8217 int standalone = -1; 8218 8219 SKIP_BLANKS; 8220 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 8221 SKIP(10); 8222 SKIP_BLANKS; 8223 if (RAW != '=') { 8224 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8225 return(standalone); 8226 } 8227 NEXT; 8228 SKIP_BLANKS; 8229 if (RAW == '\''){ 8230 NEXT; 8231 if ((RAW == 'n') && (NXT(1) == 'o')) { 8232 standalone = 0; 8233 SKIP(2); 8234 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8235 (NXT(2) == 's')) { 8236 standalone = 1; 8237 SKIP(3); 8238 } else { 8239 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8240 } 8241 if (RAW != '\'') { 8242 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8243 } else 8244 NEXT; 8245 } else if (RAW == '"'){ 8246 NEXT; 8247 if ((RAW == 'n') && (NXT(1) == 'o')) { 8248 standalone = 0; 8249 SKIP(2); 8250 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8251 (NXT(2) == 's')) { 8252 standalone = 1; 8253 SKIP(3); 8254 } else { 8255 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8256 } 8257 if (RAW != '"') { 8258 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8259 } else 8260 NEXT; 8261 } else { 8262 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8263 } 8264 } 8265 return(standalone); 8266} 8267 8268/** 8269 * xmlParseXMLDecl: 8270 * @ctxt: an XML parser context 8271 * 8272 * parse an XML declaration header 8273 * 8274 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 8275 */ 8276 8277void 8278xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 8279 xmlChar *version; 8280 8281 /* 8282 * We know that '<?xml' is here. 8283 */ 8284 SKIP(5); 8285 8286 if (!IS_BLANK_CH(RAW)) { 8287 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8288 "Blank needed after '<?xml'\n"); 8289 } 8290 SKIP_BLANKS; 8291 8292 /* 8293 * We must have the VersionInfo here. 8294 */ 8295 version = xmlParseVersionInfo(ctxt); 8296 if (version == NULL) { 8297 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 8298 } else { 8299 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 8300 /* 8301 * TODO: Blueberry should be detected here 8302 */ 8303 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 8304 "Unsupported version '%s'\n", 8305 version, NULL); 8306 } 8307 if (ctxt->version != NULL) 8308 xmlFree((void *) ctxt->version); 8309 ctxt->version = version; 8310 } 8311 8312 /* 8313 * We may have the encoding declaration 8314 */ 8315 if (!IS_BLANK_CH(RAW)) { 8316 if ((RAW == '?') && (NXT(1) == '>')) { 8317 SKIP(2); 8318 return; 8319 } 8320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8321 } 8322 xmlParseEncodingDecl(ctxt); 8323 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8324 /* 8325 * The XML REC instructs us to stop parsing right here 8326 */ 8327 return; 8328 } 8329 8330 /* 8331 * We may have the standalone status. 8332 */ 8333 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 8334 if ((RAW == '?') && (NXT(1) == '>')) { 8335 SKIP(2); 8336 return; 8337 } 8338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8339 } 8340 SKIP_BLANKS; 8341 ctxt->input->standalone = xmlParseSDDecl(ctxt); 8342 8343 SKIP_BLANKS; 8344 if ((RAW == '?') && (NXT(1) == '>')) { 8345 SKIP(2); 8346 } else if (RAW == '>') { 8347 /* Deprecated old WD ... */ 8348 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8349 NEXT; 8350 } else { 8351 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8352 MOVETO_ENDTAG(CUR_PTR); 8353 NEXT; 8354 } 8355} 8356 8357/** 8358 * xmlParseMisc: 8359 * @ctxt: an XML parser context 8360 * 8361 * parse an XML Misc* optional field. 8362 * 8363 * [27] Misc ::= Comment | PI | S 8364 */ 8365 8366void 8367xmlParseMisc(xmlParserCtxtPtr ctxt) { 8368 while (((RAW == '<') && (NXT(1) == '?')) || 8369 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 8370 IS_BLANK_CH(CUR)) { 8371 if ((RAW == '<') && (NXT(1) == '?')) { 8372 xmlParsePI(ctxt); 8373 } else if (IS_BLANK_CH(CUR)) { 8374 NEXT; 8375 } else 8376 xmlParseComment(ctxt); 8377 } 8378} 8379 8380/** 8381 * xmlParseDocument: 8382 * @ctxt: an XML parser context 8383 * 8384 * parse an XML document (and build a tree if using the standard SAX 8385 * interface). 8386 * 8387 * [1] document ::= prolog element Misc* 8388 * 8389 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 8390 * 8391 * Returns 0, -1 in case of error. the parser context is augmented 8392 * as a result of the parsing. 8393 */ 8394 8395int 8396xmlParseDocument(xmlParserCtxtPtr ctxt) { 8397 xmlChar start[4]; 8398 xmlCharEncoding enc; 8399 8400 xmlInitParser(); 8401 8402 GROW; 8403 8404 /* 8405 * SAX: detecting the level. 8406 */ 8407 xmlDetectSAX2(ctxt); 8408 8409 /* 8410 * SAX: beginning of the document processing. 8411 */ 8412 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8413 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8414 8415 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 8416 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 8417 /* 8418 * Get the 4 first bytes and decode the charset 8419 * if enc != XML_CHAR_ENCODING_NONE 8420 * plug some encoding conversion routines. 8421 */ 8422 start[0] = RAW; 8423 start[1] = NXT(1); 8424 start[2] = NXT(2); 8425 start[3] = NXT(3); 8426 enc = xmlDetectCharEncoding(&start[0], 4); 8427 if (enc != XML_CHAR_ENCODING_NONE) { 8428 xmlSwitchEncoding(ctxt, enc); 8429 } 8430 } 8431 8432 8433 if (CUR == 0) { 8434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8435 } 8436 8437 /* 8438 * Check for the XMLDecl in the Prolog. 8439 */ 8440 GROW; 8441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8442 8443 /* 8444 * Note that we will switch encoding on the fly. 8445 */ 8446 xmlParseXMLDecl(ctxt); 8447 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8448 /* 8449 * The XML REC instructs us to stop parsing right here 8450 */ 8451 return(-1); 8452 } 8453 ctxt->standalone = ctxt->input->standalone; 8454 SKIP_BLANKS; 8455 } else { 8456 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8457 } 8458 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8459 ctxt->sax->startDocument(ctxt->userData); 8460 8461 /* 8462 * The Misc part of the Prolog 8463 */ 8464 GROW; 8465 xmlParseMisc(ctxt); 8466 8467 /* 8468 * Then possibly doc type declaration(s) and more Misc 8469 * (doctypedecl Misc*)? 8470 */ 8471 GROW; 8472 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 8473 8474 ctxt->inSubset = 1; 8475 xmlParseDocTypeDecl(ctxt); 8476 if (RAW == '[') { 8477 ctxt->instate = XML_PARSER_DTD; 8478 xmlParseInternalSubset(ctxt); 8479 } 8480 8481 /* 8482 * Create and update the external subset. 8483 */ 8484 ctxt->inSubset = 2; 8485 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 8486 (!ctxt->disableSAX)) 8487 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8488 ctxt->extSubSystem, ctxt->extSubURI); 8489 ctxt->inSubset = 0; 8490 8491 8492 ctxt->instate = XML_PARSER_PROLOG; 8493 xmlParseMisc(ctxt); 8494 } 8495 8496 /* 8497 * Time to start parsing the tree itself 8498 */ 8499 GROW; 8500 if (RAW != '<') { 8501 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 8502 "Start tag expected, '<' not found\n"); 8503 } else { 8504 ctxt->instate = XML_PARSER_CONTENT; 8505 xmlParseElement(ctxt); 8506 ctxt->instate = XML_PARSER_EPILOG; 8507 8508 8509 /* 8510 * The Misc part at the end 8511 */ 8512 xmlParseMisc(ctxt); 8513 8514 if (RAW != 0) { 8515 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 8516 } 8517 ctxt->instate = XML_PARSER_EOF; 8518 } 8519 8520 /* 8521 * SAX: end of the document processing. 8522 */ 8523 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8524 ctxt->sax->endDocument(ctxt->userData); 8525 8526 /* 8527 * Remove locally kept entity definitions if the tree was not built 8528 */ 8529 if ((ctxt->myDoc != NULL) && 8530 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 8531 xmlFreeDoc(ctxt->myDoc); 8532 ctxt->myDoc = NULL; 8533 } 8534 8535 if (! ctxt->wellFormed) { 8536 ctxt->valid = 0; 8537 return(-1); 8538 } 8539 return(0); 8540} 8541 8542/** 8543 * xmlParseExtParsedEnt: 8544 * @ctxt: an XML parser context 8545 * 8546 * parse a general parsed entity 8547 * An external general parsed entity is well-formed if it matches the 8548 * production labeled extParsedEnt. 8549 * 8550 * [78] extParsedEnt ::= TextDecl? content 8551 * 8552 * Returns 0, -1 in case of error. the parser context is augmented 8553 * as a result of the parsing. 8554 */ 8555 8556int 8557xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 8558 xmlChar start[4]; 8559 xmlCharEncoding enc; 8560 8561 xmlDefaultSAXHandlerInit(); 8562 8563 xmlDetectSAX2(ctxt); 8564 8565 GROW; 8566 8567 /* 8568 * SAX: beginning of the document processing. 8569 */ 8570 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8571 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8572 8573 /* 8574 * Get the 4 first bytes and decode the charset 8575 * if enc != XML_CHAR_ENCODING_NONE 8576 * plug some encoding conversion routines. 8577 */ 8578 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 8579 start[0] = RAW; 8580 start[1] = NXT(1); 8581 start[2] = NXT(2); 8582 start[3] = NXT(3); 8583 enc = xmlDetectCharEncoding(start, 4); 8584 if (enc != XML_CHAR_ENCODING_NONE) { 8585 xmlSwitchEncoding(ctxt, enc); 8586 } 8587 } 8588 8589 8590 if (CUR == 0) { 8591 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8592 } 8593 8594 /* 8595 * Check for the XMLDecl in the Prolog. 8596 */ 8597 GROW; 8598 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8599 8600 /* 8601 * Note that we will switch encoding on the fly. 8602 */ 8603 xmlParseXMLDecl(ctxt); 8604 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8605 /* 8606 * The XML REC instructs us to stop parsing right here 8607 */ 8608 return(-1); 8609 } 8610 SKIP_BLANKS; 8611 } else { 8612 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8613 } 8614 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8615 ctxt->sax->startDocument(ctxt->userData); 8616 8617 /* 8618 * Doing validity checking on chunk doesn't make sense 8619 */ 8620 ctxt->instate = XML_PARSER_CONTENT; 8621 ctxt->validate = 0; 8622 ctxt->loadsubset = 0; 8623 ctxt->depth = 0; 8624 8625 xmlParseContent(ctxt); 8626 8627 if ((RAW == '<') && (NXT(1) == '/')) { 8628 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 8629 } else if (RAW != 0) { 8630 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 8631 } 8632 8633 /* 8634 * SAX: end of the document processing. 8635 */ 8636 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8637 ctxt->sax->endDocument(ctxt->userData); 8638 8639 if (! ctxt->wellFormed) return(-1); 8640 return(0); 8641} 8642 8643#ifdef LIBXML_PUSH_ENABLED 8644/************************************************************************ 8645 * * 8646 * Progressive parsing interfaces * 8647 * * 8648 ************************************************************************/ 8649 8650/** 8651 * xmlParseLookupSequence: 8652 * @ctxt: an XML parser context 8653 * @first: the first char to lookup 8654 * @next: the next char to lookup or zero 8655 * @third: the next char to lookup or zero 8656 * 8657 * Try to find if a sequence (first, next, third) or just (first next) or 8658 * (first) is available in the input stream. 8659 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 8660 * to avoid rescanning sequences of bytes, it DOES change the state of the 8661 * parser, do not use liberally. 8662 * 8663 * Returns the index to the current parsing point if the full sequence 8664 * is available, -1 otherwise. 8665 */ 8666static int 8667xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 8668 xmlChar next, xmlChar third) { 8669 int base, len; 8670 xmlParserInputPtr in; 8671 const xmlChar *buf; 8672 8673 in = ctxt->input; 8674 if (in == NULL) return(-1); 8675 base = in->cur - in->base; 8676 if (base < 0) return(-1); 8677 if (ctxt->checkIndex > base) 8678 base = ctxt->checkIndex; 8679 if (in->buf == NULL) { 8680 buf = in->base; 8681 len = in->length; 8682 } else { 8683 buf = in->buf->buffer->content; 8684 len = in->buf->buffer->use; 8685 } 8686 /* take into account the sequence length */ 8687 if (third) len -= 2; 8688 else if (next) len --; 8689 for (;base < len;base++) { 8690 if (buf[base] == first) { 8691 if (third != 0) { 8692 if ((buf[base + 1] != next) || 8693 (buf[base + 2] != third)) continue; 8694 } else if (next != 0) { 8695 if (buf[base + 1] != next) continue; 8696 } 8697 ctxt->checkIndex = 0; 8698#ifdef DEBUG_PUSH 8699 if (next == 0) 8700 xmlGenericError(xmlGenericErrorContext, 8701 "PP: lookup '%c' found at %d\n", 8702 first, base); 8703 else if (third == 0) 8704 xmlGenericError(xmlGenericErrorContext, 8705 "PP: lookup '%c%c' found at %d\n", 8706 first, next, base); 8707 else 8708 xmlGenericError(xmlGenericErrorContext, 8709 "PP: lookup '%c%c%c' found at %d\n", 8710 first, next, third, base); 8711#endif 8712 return(base - (in->cur - in->base)); 8713 } 8714 } 8715 ctxt->checkIndex = base; 8716#ifdef DEBUG_PUSH 8717 if (next == 0) 8718 xmlGenericError(xmlGenericErrorContext, 8719 "PP: lookup '%c' failed\n", first); 8720 else if (third == 0) 8721 xmlGenericError(xmlGenericErrorContext, 8722 "PP: lookup '%c%c' failed\n", first, next); 8723 else 8724 xmlGenericError(xmlGenericErrorContext, 8725 "PP: lookup '%c%c%c' failed\n", first, next, third); 8726#endif 8727 return(-1); 8728} 8729 8730/** 8731 * xmlParseGetLasts: 8732 * @ctxt: an XML parser context 8733 * @lastlt: pointer to store the last '<' from the input 8734 * @lastgt: pointer to store the last '>' from the input 8735 * 8736 * Lookup the last < and > in the current chunk 8737 */ 8738static void 8739xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 8740 const xmlChar **lastgt) { 8741 const xmlChar *tmp; 8742 8743 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 8744 xmlGenericError(xmlGenericErrorContext, 8745 "Internal error: xmlParseGetLasts\n"); 8746 return; 8747 } 8748 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) { 8749 tmp = ctxt->input->end; 8750 tmp--; 8751 while ((tmp >= ctxt->input->base) && (*tmp != '<') && 8752 (*tmp != '>')) tmp--; 8753 if (tmp < ctxt->input->base) { 8754 *lastlt = NULL; 8755 *lastgt = NULL; 8756 } else if (*tmp == '<') { 8757 *lastlt = tmp; 8758 tmp--; 8759 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 8760 if (tmp < ctxt->input->base) 8761 *lastgt = NULL; 8762 else 8763 *lastgt = tmp; 8764 } else { 8765 *lastgt = tmp; 8766 tmp--; 8767 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 8768 if (tmp < ctxt->input->base) 8769 *lastlt = NULL; 8770 else 8771 *lastlt = tmp; 8772 } 8773 8774 } else { 8775 *lastlt = NULL; 8776 *lastgt = NULL; 8777 } 8778} 8779/** 8780 * xmlParseTryOrFinish: 8781 * @ctxt: an XML parser context 8782 * @terminate: last chunk indicator 8783 * 8784 * Try to progress on parsing 8785 * 8786 * Returns zero if no parsing was possible 8787 */ 8788static int 8789xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 8790 int ret = 0; 8791 int avail, tlen; 8792 xmlChar cur, next; 8793 const xmlChar *lastlt, *lastgt; 8794 8795#ifdef DEBUG_PUSH 8796 switch (ctxt->instate) { 8797 case XML_PARSER_EOF: 8798 xmlGenericError(xmlGenericErrorContext, 8799 "PP: try EOF\n"); break; 8800 case XML_PARSER_START: 8801 xmlGenericError(xmlGenericErrorContext, 8802 "PP: try START\n"); break; 8803 case XML_PARSER_MISC: 8804 xmlGenericError(xmlGenericErrorContext, 8805 "PP: try MISC\n");break; 8806 case XML_PARSER_COMMENT: 8807 xmlGenericError(xmlGenericErrorContext, 8808 "PP: try COMMENT\n");break; 8809 case XML_PARSER_PROLOG: 8810 xmlGenericError(xmlGenericErrorContext, 8811 "PP: try PROLOG\n");break; 8812 case XML_PARSER_START_TAG: 8813 xmlGenericError(xmlGenericErrorContext, 8814 "PP: try START_TAG\n");break; 8815 case XML_PARSER_CONTENT: 8816 xmlGenericError(xmlGenericErrorContext, 8817 "PP: try CONTENT\n");break; 8818 case XML_PARSER_CDATA_SECTION: 8819 xmlGenericError(xmlGenericErrorContext, 8820 "PP: try CDATA_SECTION\n");break; 8821 case XML_PARSER_END_TAG: 8822 xmlGenericError(xmlGenericErrorContext, 8823 "PP: try END_TAG\n");break; 8824 case XML_PARSER_ENTITY_DECL: 8825 xmlGenericError(xmlGenericErrorContext, 8826 "PP: try ENTITY_DECL\n");break; 8827 case XML_PARSER_ENTITY_VALUE: 8828 xmlGenericError(xmlGenericErrorContext, 8829 "PP: try ENTITY_VALUE\n");break; 8830 case XML_PARSER_ATTRIBUTE_VALUE: 8831 xmlGenericError(xmlGenericErrorContext, 8832 "PP: try ATTRIBUTE_VALUE\n");break; 8833 case XML_PARSER_DTD: 8834 xmlGenericError(xmlGenericErrorContext, 8835 "PP: try DTD\n");break; 8836 case XML_PARSER_EPILOG: 8837 xmlGenericError(xmlGenericErrorContext, 8838 "PP: try EPILOG\n");break; 8839 case XML_PARSER_PI: 8840 xmlGenericError(xmlGenericErrorContext, 8841 "PP: try PI\n");break; 8842 case XML_PARSER_IGNORE: 8843 xmlGenericError(xmlGenericErrorContext, 8844 "PP: try IGNORE\n");break; 8845 } 8846#endif 8847 8848 if ((ctxt->input != NULL) && 8849 (ctxt->input->cur - ctxt->input->base > 4096)) { 8850 xmlSHRINK(ctxt); 8851 ctxt->checkIndex = 0; 8852 } 8853 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 8854 8855 while (1) { 8856 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 8857 return(0); 8858 8859 8860 /* 8861 * Pop-up of finished entities. 8862 */ 8863 while ((RAW == 0) && (ctxt->inputNr > 1)) 8864 xmlPopInput(ctxt); 8865 8866 if (ctxt->input == NULL) break; 8867 if (ctxt->input->buf == NULL) 8868 avail = ctxt->input->length - 8869 (ctxt->input->cur - ctxt->input->base); 8870 else { 8871 /* 8872 * If we are operating on converted input, try to flush 8873 * remainng chars to avoid them stalling in the non-converted 8874 * buffer. 8875 */ 8876 if ((ctxt->input->buf->raw != NULL) && 8877 (ctxt->input->buf->raw->use > 0)) { 8878 int base = ctxt->input->base - 8879 ctxt->input->buf->buffer->content; 8880 int current = ctxt->input->cur - ctxt->input->base; 8881 8882 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 8883 ctxt->input->base = ctxt->input->buf->buffer->content + base; 8884 ctxt->input->cur = ctxt->input->base + current; 8885 ctxt->input->end = 8886 &ctxt->input->buf->buffer->content[ 8887 ctxt->input->buf->buffer->use]; 8888 } 8889 avail = ctxt->input->buf->buffer->use - 8890 (ctxt->input->cur - ctxt->input->base); 8891 } 8892 if (avail < 1) 8893 goto done; 8894 switch (ctxt->instate) { 8895 case XML_PARSER_EOF: 8896 /* 8897 * Document parsing is done ! 8898 */ 8899 goto done; 8900 case XML_PARSER_START: 8901 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 8902 xmlChar start[4]; 8903 xmlCharEncoding enc; 8904 8905 /* 8906 * Very first chars read from the document flow. 8907 */ 8908 if (avail < 4) 8909 goto done; 8910 8911 /* 8912 * Get the 4 first bytes and decode the charset 8913 * if enc != XML_CHAR_ENCODING_NONE 8914 * plug some encoding conversion routines. 8915 */ 8916 start[0] = RAW; 8917 start[1] = NXT(1); 8918 start[2] = NXT(2); 8919 start[3] = NXT(3); 8920 enc = xmlDetectCharEncoding(start, 4); 8921 if (enc != XML_CHAR_ENCODING_NONE) { 8922 xmlSwitchEncoding(ctxt, enc); 8923 } 8924 break; 8925 } 8926 8927 if (avail < 2) 8928 goto done; 8929 cur = ctxt->input->cur[0]; 8930 next = ctxt->input->cur[1]; 8931 if (cur == 0) { 8932 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8933 ctxt->sax->setDocumentLocator(ctxt->userData, 8934 &xmlDefaultSAXLocator); 8935 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8936 ctxt->instate = XML_PARSER_EOF; 8937#ifdef DEBUG_PUSH 8938 xmlGenericError(xmlGenericErrorContext, 8939 "PP: entering EOF\n"); 8940#endif 8941 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8942 ctxt->sax->endDocument(ctxt->userData); 8943 goto done; 8944 } 8945 if ((cur == '<') && (next == '?')) { 8946 /* PI or XML decl */ 8947 if (avail < 5) return(ret); 8948 if ((!terminate) && 8949 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 8950 return(ret); 8951 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8952 ctxt->sax->setDocumentLocator(ctxt->userData, 8953 &xmlDefaultSAXLocator); 8954 if ((ctxt->input->cur[2] == 'x') && 8955 (ctxt->input->cur[3] == 'm') && 8956 (ctxt->input->cur[4] == 'l') && 8957 (IS_BLANK_CH(ctxt->input->cur[5]))) { 8958 ret += 5; 8959#ifdef DEBUG_PUSH 8960 xmlGenericError(xmlGenericErrorContext, 8961 "PP: Parsing XML Decl\n"); 8962#endif 8963 xmlParseXMLDecl(ctxt); 8964 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8965 /* 8966 * The XML REC instructs us to stop parsing right 8967 * here 8968 */ 8969 ctxt->instate = XML_PARSER_EOF; 8970 return(0); 8971 } 8972 ctxt->standalone = ctxt->input->standalone; 8973 if ((ctxt->encoding == NULL) && 8974 (ctxt->input->encoding != NULL)) 8975 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 8976 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8977 (!ctxt->disableSAX)) 8978 ctxt->sax->startDocument(ctxt->userData); 8979 ctxt->instate = XML_PARSER_MISC; 8980#ifdef DEBUG_PUSH 8981 xmlGenericError(xmlGenericErrorContext, 8982 "PP: entering MISC\n"); 8983#endif 8984 } else { 8985 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8986 if ((ctxt->sax) && (ctxt->sax->startDocument) && 8987 (!ctxt->disableSAX)) 8988 ctxt->sax->startDocument(ctxt->userData); 8989 ctxt->instate = XML_PARSER_MISC; 8990#ifdef DEBUG_PUSH 8991 xmlGenericError(xmlGenericErrorContext, 8992 "PP: entering MISC\n"); 8993#endif 8994 } 8995 } else { 8996 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8997 ctxt->sax->setDocumentLocator(ctxt->userData, 8998 &xmlDefaultSAXLocator); 8999 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9000 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9001 (!ctxt->disableSAX)) 9002 ctxt->sax->startDocument(ctxt->userData); 9003 ctxt->instate = XML_PARSER_MISC; 9004#ifdef DEBUG_PUSH 9005 xmlGenericError(xmlGenericErrorContext, 9006 "PP: entering MISC\n"); 9007#endif 9008 } 9009 break; 9010 case XML_PARSER_START_TAG: { 9011 const xmlChar *name; 9012 const xmlChar *prefix; 9013 const xmlChar *URI; 9014 int nsNr = ctxt->nsNr; 9015 9016 if ((avail < 2) && (ctxt->inputNr == 1)) 9017 goto done; 9018 cur = ctxt->input->cur[0]; 9019 if (cur != '<') { 9020 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9021 ctxt->instate = XML_PARSER_EOF; 9022 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9023 ctxt->sax->endDocument(ctxt->userData); 9024 goto done; 9025 } 9026 if (!terminate) { 9027 if (ctxt->progressive) { 9028 /* > can be found unescaped in attribute values */ 9029 if ((lastlt == NULL) || (ctxt->input->cur >= lastlt)) 9030 goto done; 9031 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9032 goto done; 9033 } 9034 } 9035 if (ctxt->spaceNr == 0) 9036 spacePush(ctxt, -1); 9037 else 9038 spacePush(ctxt, *ctxt->space); 9039#ifdef LIBXML_SAX1_ENABLED 9040 if (ctxt->sax2) 9041#endif /* LIBXML_SAX1_ENABLED */ 9042 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9043#ifdef LIBXML_SAX1_ENABLED 9044 else 9045 name = xmlParseStartTag(ctxt); 9046#endif /* LIBXML_SAX1_ENABLED */ 9047 if (name == NULL) { 9048 spacePop(ctxt); 9049 ctxt->instate = XML_PARSER_EOF; 9050 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9051 ctxt->sax->endDocument(ctxt->userData); 9052 goto done; 9053 } 9054#ifdef LIBXML_VALID_ENABLED 9055 /* 9056 * [ VC: Root Element Type ] 9057 * The Name in the document type declaration must match 9058 * the element type of the root element. 9059 */ 9060 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9061 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9062 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9063#endif /* LIBXML_VALID_ENABLED */ 9064 9065 /* 9066 * Check for an Empty Element. 9067 */ 9068 if ((RAW == '/') && (NXT(1) == '>')) { 9069 SKIP(2); 9070 9071 if (ctxt->sax2) { 9072 if ((ctxt->sax != NULL) && 9073 (ctxt->sax->endElementNs != NULL) && 9074 (!ctxt->disableSAX)) 9075 ctxt->sax->endElementNs(ctxt->userData, name, 9076 prefix, URI); 9077#ifdef LIBXML_SAX1_ENABLED 9078 } else { 9079 if ((ctxt->sax != NULL) && 9080 (ctxt->sax->endElement != NULL) && 9081 (!ctxt->disableSAX)) 9082 ctxt->sax->endElement(ctxt->userData, name); 9083#endif /* LIBXML_SAX1_ENABLED */ 9084 } 9085 spacePop(ctxt); 9086 if (ctxt->nameNr == 0) { 9087 ctxt->instate = XML_PARSER_EPILOG; 9088 } else { 9089 ctxt->instate = XML_PARSER_CONTENT; 9090 } 9091 break; 9092 } 9093 if (RAW == '>') { 9094 NEXT; 9095 } else { 9096 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 9097 "Couldn't find end of Start Tag %s\n", 9098 name); 9099 nodePop(ctxt); 9100 spacePop(ctxt); 9101 } 9102 if (ctxt->sax2) 9103 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9104#ifdef LIBXML_SAX1_ENABLED 9105 else 9106 namePush(ctxt, name); 9107#endif /* LIBXML_SAX1_ENABLED */ 9108 9109 ctxt->instate = XML_PARSER_CONTENT; 9110 break; 9111 } 9112 case XML_PARSER_CONTENT: { 9113 const xmlChar *test; 9114 unsigned int cons; 9115 if ((avail < 2) && (ctxt->inputNr == 1)) 9116 goto done; 9117 cur = ctxt->input->cur[0]; 9118 next = ctxt->input->cur[1]; 9119 9120 test = CUR_PTR; 9121 cons = ctxt->input->consumed; 9122 if ((cur == '<') && (next == '/')) { 9123 ctxt->instate = XML_PARSER_END_TAG; 9124 break; 9125 } else if ((cur == '<') && (next == '?')) { 9126 if ((!terminate) && 9127 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9128 goto done; 9129 xmlParsePI(ctxt); 9130 } else if ((cur == '<') && (next != '!')) { 9131 ctxt->instate = XML_PARSER_START_TAG; 9132 break; 9133 } else if ((cur == '<') && (next == '!') && 9134 (ctxt->input->cur[2] == '-') && 9135 (ctxt->input->cur[3] == '-')) { 9136 if ((!terminate) && 9137 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9138 goto done; 9139 xmlParseComment(ctxt); 9140 ctxt->instate = XML_PARSER_CONTENT; 9141 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 9142 (ctxt->input->cur[2] == '[') && 9143 (ctxt->input->cur[3] == 'C') && 9144 (ctxt->input->cur[4] == 'D') && 9145 (ctxt->input->cur[5] == 'A') && 9146 (ctxt->input->cur[6] == 'T') && 9147 (ctxt->input->cur[7] == 'A') && 9148 (ctxt->input->cur[8] == '[')) { 9149 SKIP(9); 9150 ctxt->instate = XML_PARSER_CDATA_SECTION; 9151 break; 9152 } else if ((cur == '<') && (next == '!') && 9153 (avail < 9)) { 9154 goto done; 9155 } else if (cur == '&') { 9156 if ((!terminate) && 9157 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 9158 goto done; 9159 xmlParseReference(ctxt); 9160 } else { 9161 /* TODO Avoid the extra copy, handle directly !!! */ 9162 /* 9163 * Goal of the following test is: 9164 * - minimize calls to the SAX 'character' callback 9165 * when they are mergeable 9166 * - handle an problem for isBlank when we only parse 9167 * a sequence of blank chars and the next one is 9168 * not available to check against '<' presence. 9169 * - tries to homogenize the differences in SAX 9170 * callbacks between the push and pull versions 9171 * of the parser. 9172 */ 9173 if ((ctxt->inputNr == 1) && 9174 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 9175 if (!terminate) { 9176 if (ctxt->progressive) { 9177 if ((lastlt == NULL) || 9178 (ctxt->input->cur > lastlt)) 9179 goto done; 9180 } else if (xmlParseLookupSequence(ctxt, 9181 '<', 0, 0) < 0) { 9182 goto done; 9183 } 9184 } 9185 } 9186 ctxt->checkIndex = 0; 9187 xmlParseCharData(ctxt, 0); 9188 } 9189 /* 9190 * Pop-up of finished entities. 9191 */ 9192 while ((RAW == 0) && (ctxt->inputNr > 1)) 9193 xmlPopInput(ctxt); 9194 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9195 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9196 "detected an error in element content\n"); 9197 ctxt->instate = XML_PARSER_EOF; 9198 break; 9199 } 9200 break; 9201 } 9202 case XML_PARSER_END_TAG: 9203 if (avail < 2) 9204 goto done; 9205 if (!terminate) { 9206 if (ctxt->progressive) { 9207 if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) 9208 goto done; 9209 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9210 goto done; 9211 } 9212 } 9213 if (ctxt->sax2) { 9214 xmlParseEndTag2(ctxt, 9215 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 9216 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 9217 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 9218 nameNsPop(ctxt); 9219 } 9220#ifdef LIBXML_SAX1_ENABLED 9221 else 9222 xmlParseEndTag1(ctxt, 0); 9223#endif /* LIBXML_SAX1_ENABLED */ 9224 if (ctxt->nameNr == 0) { 9225 ctxt->instate = XML_PARSER_EPILOG; 9226 } else { 9227 ctxt->instate = XML_PARSER_CONTENT; 9228 } 9229 break; 9230 case XML_PARSER_CDATA_SECTION: { 9231 /* 9232 * The Push mode need to have the SAX callback for 9233 * cdataBlock merge back contiguous callbacks. 9234 */ 9235 int base; 9236 9237 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 9238 if (base < 0) { 9239 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 9240 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9241 if (ctxt->sax->cdataBlock != NULL) 9242 ctxt->sax->cdataBlock(ctxt->userData, 9243 ctxt->input->cur, 9244 XML_PARSER_BIG_BUFFER_SIZE); 9245 else if (ctxt->sax->characters != NULL) 9246 ctxt->sax->characters(ctxt->userData, 9247 ctxt->input->cur, 9248 XML_PARSER_BIG_BUFFER_SIZE); 9249 } 9250 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 9251 ctxt->checkIndex = 0; 9252 } 9253 goto done; 9254 } else { 9255 if ((ctxt->sax != NULL) && (base > 0) && 9256 (!ctxt->disableSAX)) { 9257 if (ctxt->sax->cdataBlock != NULL) 9258 ctxt->sax->cdataBlock(ctxt->userData, 9259 ctxt->input->cur, base); 9260 else if (ctxt->sax->characters != NULL) 9261 ctxt->sax->characters(ctxt->userData, 9262 ctxt->input->cur, base); 9263 } 9264 SKIP(base + 3); 9265 ctxt->checkIndex = 0; 9266 ctxt->instate = XML_PARSER_CONTENT; 9267#ifdef DEBUG_PUSH 9268 xmlGenericError(xmlGenericErrorContext, 9269 "PP: entering CONTENT\n"); 9270#endif 9271 } 9272 break; 9273 } 9274 case XML_PARSER_MISC: 9275 SKIP_BLANKS; 9276 if (ctxt->input->buf == NULL) 9277 avail = ctxt->input->length - 9278 (ctxt->input->cur - ctxt->input->base); 9279 else 9280 avail = ctxt->input->buf->buffer->use - 9281 (ctxt->input->cur - ctxt->input->base); 9282 if (avail < 2) 9283 goto done; 9284 cur = ctxt->input->cur[0]; 9285 next = ctxt->input->cur[1]; 9286 if ((cur == '<') && (next == '?')) { 9287 if ((!terminate) && 9288 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9289 goto done; 9290#ifdef DEBUG_PUSH 9291 xmlGenericError(xmlGenericErrorContext, 9292 "PP: Parsing PI\n"); 9293#endif 9294 xmlParsePI(ctxt); 9295 } else if ((cur == '<') && (next == '!') && 9296 (ctxt->input->cur[2] == '-') && 9297 (ctxt->input->cur[3] == '-')) { 9298 if ((!terminate) && 9299 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9300 goto done; 9301#ifdef DEBUG_PUSH 9302 xmlGenericError(xmlGenericErrorContext, 9303 "PP: Parsing Comment\n"); 9304#endif 9305 xmlParseComment(ctxt); 9306 ctxt->instate = XML_PARSER_MISC; 9307 } else if ((cur == '<') && (next == '!') && 9308 (ctxt->input->cur[2] == 'D') && 9309 (ctxt->input->cur[3] == 'O') && 9310 (ctxt->input->cur[4] == 'C') && 9311 (ctxt->input->cur[5] == 'T') && 9312 (ctxt->input->cur[6] == 'Y') && 9313 (ctxt->input->cur[7] == 'P') && 9314 (ctxt->input->cur[8] == 'E')) { 9315 if ((!terminate) && 9316 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 9317 goto done; 9318#ifdef DEBUG_PUSH 9319 xmlGenericError(xmlGenericErrorContext, 9320 "PP: Parsing internal subset\n"); 9321#endif 9322 ctxt->inSubset = 1; 9323 xmlParseDocTypeDecl(ctxt); 9324 if (RAW == '[') { 9325 ctxt->instate = XML_PARSER_DTD; 9326#ifdef DEBUG_PUSH 9327 xmlGenericError(xmlGenericErrorContext, 9328 "PP: entering DTD\n"); 9329#endif 9330 } else { 9331 /* 9332 * Create and update the external subset. 9333 */ 9334 ctxt->inSubset = 2; 9335 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9336 (ctxt->sax->externalSubset != NULL)) 9337 ctxt->sax->externalSubset(ctxt->userData, 9338 ctxt->intSubName, ctxt->extSubSystem, 9339 ctxt->extSubURI); 9340 ctxt->inSubset = 0; 9341 ctxt->instate = XML_PARSER_PROLOG; 9342#ifdef DEBUG_PUSH 9343 xmlGenericError(xmlGenericErrorContext, 9344 "PP: entering PROLOG\n"); 9345#endif 9346 } 9347 } else if ((cur == '<') && (next == '!') && 9348 (avail < 9)) { 9349 goto done; 9350 } else { 9351 ctxt->instate = XML_PARSER_START_TAG; 9352 ctxt->progressive = 1; 9353 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9354#ifdef DEBUG_PUSH 9355 xmlGenericError(xmlGenericErrorContext, 9356 "PP: entering START_TAG\n"); 9357#endif 9358 } 9359 break; 9360 case XML_PARSER_PROLOG: 9361 SKIP_BLANKS; 9362 if (ctxt->input->buf == NULL) 9363 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9364 else 9365 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9366 if (avail < 2) 9367 goto done; 9368 cur = ctxt->input->cur[0]; 9369 next = ctxt->input->cur[1]; 9370 if ((cur == '<') && (next == '?')) { 9371 if ((!terminate) && 9372 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9373 goto done; 9374#ifdef DEBUG_PUSH 9375 xmlGenericError(xmlGenericErrorContext, 9376 "PP: Parsing PI\n"); 9377#endif 9378 xmlParsePI(ctxt); 9379 } else if ((cur == '<') && (next == '!') && 9380 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9381 if ((!terminate) && 9382 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9383 goto done; 9384#ifdef DEBUG_PUSH 9385 xmlGenericError(xmlGenericErrorContext, 9386 "PP: Parsing Comment\n"); 9387#endif 9388 xmlParseComment(ctxt); 9389 ctxt->instate = XML_PARSER_PROLOG; 9390 } else if ((cur == '<') && (next == '!') && 9391 (avail < 4)) { 9392 goto done; 9393 } else { 9394 ctxt->instate = XML_PARSER_START_TAG; 9395 ctxt->progressive = 1; 9396 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9397#ifdef DEBUG_PUSH 9398 xmlGenericError(xmlGenericErrorContext, 9399 "PP: entering START_TAG\n"); 9400#endif 9401 } 9402 break; 9403 case XML_PARSER_EPILOG: 9404 SKIP_BLANKS; 9405 if (ctxt->input->buf == NULL) 9406 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9407 else 9408 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9409 if (avail < 2) 9410 goto done; 9411 cur = ctxt->input->cur[0]; 9412 next = ctxt->input->cur[1]; 9413 if ((cur == '<') && (next == '?')) { 9414 if ((!terminate) && 9415 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9416 goto done; 9417#ifdef DEBUG_PUSH 9418 xmlGenericError(xmlGenericErrorContext, 9419 "PP: Parsing PI\n"); 9420#endif 9421 xmlParsePI(ctxt); 9422 ctxt->instate = XML_PARSER_EPILOG; 9423 } else if ((cur == '<') && (next == '!') && 9424 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9425 if ((!terminate) && 9426 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9427 goto done; 9428#ifdef DEBUG_PUSH 9429 xmlGenericError(xmlGenericErrorContext, 9430 "PP: Parsing Comment\n"); 9431#endif 9432 xmlParseComment(ctxt); 9433 ctxt->instate = XML_PARSER_EPILOG; 9434 } else if ((cur == '<') && (next == '!') && 9435 (avail < 4)) { 9436 goto done; 9437 } else { 9438 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9439 ctxt->instate = XML_PARSER_EOF; 9440#ifdef DEBUG_PUSH 9441 xmlGenericError(xmlGenericErrorContext, 9442 "PP: entering EOF\n"); 9443#endif 9444 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9445 ctxt->sax->endDocument(ctxt->userData); 9446 goto done; 9447 } 9448 break; 9449 case XML_PARSER_DTD: { 9450 /* 9451 * Sorry but progressive parsing of the internal subset 9452 * is not expected to be supported. We first check that 9453 * the full content of the internal subset is available and 9454 * the parsing is launched only at that point. 9455 * Internal subset ends up with "']' S? '>'" in an unescaped 9456 * section and not in a ']]>' sequence which are conditional 9457 * sections (whoever argued to keep that crap in XML deserve 9458 * a place in hell !). 9459 */ 9460 int base, i; 9461 xmlChar *buf; 9462 xmlChar quote = 0; 9463 9464 base = ctxt->input->cur - ctxt->input->base; 9465 if (base < 0) return(0); 9466 if (ctxt->checkIndex > base) 9467 base = ctxt->checkIndex; 9468 buf = ctxt->input->buf->buffer->content; 9469 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 9470 base++) { 9471 if (quote != 0) { 9472 if (buf[base] == quote) 9473 quote = 0; 9474 continue; 9475 } 9476 if ((quote == 0) && (buf[base] == '<')) { 9477 int found = 0; 9478 /* special handling of comments */ 9479 if (((unsigned int) base + 4 < 9480 ctxt->input->buf->buffer->use) && 9481 (buf[base + 1] == '!') && 9482 (buf[base + 2] == '-') && 9483 (buf[base + 3] == '-')) { 9484 for (;(unsigned int) base + 3 < 9485 ctxt->input->buf->buffer->use; base++) { 9486 if ((buf[base] == '-') && 9487 (buf[base + 1] == '-') && 9488 (buf[base + 2] == '>')) { 9489 found = 1; 9490 base += 2; 9491 break; 9492 } 9493 } 9494 if (!found) 9495 break; 9496 continue; 9497 } 9498 } 9499 if (buf[base] == '"') { 9500 quote = '"'; 9501 continue; 9502 } 9503 if (buf[base] == '\'') { 9504 quote = '\''; 9505 continue; 9506 } 9507 if (buf[base] == ']') { 9508 if ((unsigned int) base +1 >= 9509 ctxt->input->buf->buffer->use) 9510 break; 9511 if (buf[base + 1] == ']') { 9512 /* conditional crap, skip both ']' ! */ 9513 base++; 9514 continue; 9515 } 9516 for (i = 0; 9517 (unsigned int) base + i < ctxt->input->buf->buffer->use; 9518 i++) { 9519 if (buf[base + i] == '>') 9520 goto found_end_int_subset; 9521 } 9522 break; 9523 } 9524 } 9525 /* 9526 * We didn't found the end of the Internal subset 9527 */ 9528 if (quote == 0) 9529 ctxt->checkIndex = base; 9530#ifdef DEBUG_PUSH 9531 if (next == 0) 9532 xmlGenericError(xmlGenericErrorContext, 9533 "PP: lookup of int subset end filed\n"); 9534#endif 9535 goto done; 9536 9537found_end_int_subset: 9538 xmlParseInternalSubset(ctxt); 9539 ctxt->inSubset = 2; 9540 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9541 (ctxt->sax->externalSubset != NULL)) 9542 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9543 ctxt->extSubSystem, ctxt->extSubURI); 9544 ctxt->inSubset = 0; 9545 ctxt->instate = XML_PARSER_PROLOG; 9546 ctxt->checkIndex = 0; 9547#ifdef DEBUG_PUSH 9548 xmlGenericError(xmlGenericErrorContext, 9549 "PP: entering PROLOG\n"); 9550#endif 9551 break; 9552 } 9553 case XML_PARSER_COMMENT: 9554 xmlGenericError(xmlGenericErrorContext, 9555 "PP: internal error, state == COMMENT\n"); 9556 ctxt->instate = XML_PARSER_CONTENT; 9557#ifdef DEBUG_PUSH 9558 xmlGenericError(xmlGenericErrorContext, 9559 "PP: entering CONTENT\n"); 9560#endif 9561 break; 9562 case XML_PARSER_IGNORE: 9563 xmlGenericError(xmlGenericErrorContext, 9564 "PP: internal error, state == IGNORE"); 9565 ctxt->instate = XML_PARSER_DTD; 9566#ifdef DEBUG_PUSH 9567 xmlGenericError(xmlGenericErrorContext, 9568 "PP: entering DTD\n"); 9569#endif 9570 break; 9571 case XML_PARSER_PI: 9572 xmlGenericError(xmlGenericErrorContext, 9573 "PP: internal error, state == PI\n"); 9574 ctxt->instate = XML_PARSER_CONTENT; 9575#ifdef DEBUG_PUSH 9576 xmlGenericError(xmlGenericErrorContext, 9577 "PP: entering CONTENT\n"); 9578#endif 9579 break; 9580 case XML_PARSER_ENTITY_DECL: 9581 xmlGenericError(xmlGenericErrorContext, 9582 "PP: internal error, state == ENTITY_DECL\n"); 9583 ctxt->instate = XML_PARSER_DTD; 9584#ifdef DEBUG_PUSH 9585 xmlGenericError(xmlGenericErrorContext, 9586 "PP: entering DTD\n"); 9587#endif 9588 break; 9589 case XML_PARSER_ENTITY_VALUE: 9590 xmlGenericError(xmlGenericErrorContext, 9591 "PP: internal error, state == ENTITY_VALUE\n"); 9592 ctxt->instate = XML_PARSER_CONTENT; 9593#ifdef DEBUG_PUSH 9594 xmlGenericError(xmlGenericErrorContext, 9595 "PP: entering DTD\n"); 9596#endif 9597 break; 9598 case XML_PARSER_ATTRIBUTE_VALUE: 9599 xmlGenericError(xmlGenericErrorContext, 9600 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 9601 ctxt->instate = XML_PARSER_START_TAG; 9602#ifdef DEBUG_PUSH 9603 xmlGenericError(xmlGenericErrorContext, 9604 "PP: entering START_TAG\n"); 9605#endif 9606 break; 9607 case XML_PARSER_SYSTEM_LITERAL: 9608 xmlGenericError(xmlGenericErrorContext, 9609 "PP: internal error, state == SYSTEM_LITERAL\n"); 9610 ctxt->instate = XML_PARSER_START_TAG; 9611#ifdef DEBUG_PUSH 9612 xmlGenericError(xmlGenericErrorContext, 9613 "PP: entering START_TAG\n"); 9614#endif 9615 break; 9616 case XML_PARSER_PUBLIC_LITERAL: 9617 xmlGenericError(xmlGenericErrorContext, 9618 "PP: internal error, state == PUBLIC_LITERAL\n"); 9619 ctxt->instate = XML_PARSER_START_TAG; 9620#ifdef DEBUG_PUSH 9621 xmlGenericError(xmlGenericErrorContext, 9622 "PP: entering START_TAG\n"); 9623#endif 9624 break; 9625 } 9626 } 9627done: 9628#ifdef DEBUG_PUSH 9629 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 9630#endif 9631 return(ret); 9632} 9633 9634/** 9635 * xmlParseChunk: 9636 * @ctxt: an XML parser context 9637 * @chunk: an char array 9638 * @size: the size in byte of the chunk 9639 * @terminate: last chunk indicator 9640 * 9641 * Parse a Chunk of memory 9642 * 9643 * Returns zero if no error, the xmlParserErrors otherwise. 9644 */ 9645int 9646xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 9647 int terminate) { 9648 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9649 return(ctxt->errNo); 9650 if (ctxt->instate == XML_PARSER_START) 9651 xmlDetectSAX2(ctxt); 9652 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9653 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 9654 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9655 int cur = ctxt->input->cur - ctxt->input->base; 9656 9657 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9658 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9659 ctxt->input->cur = ctxt->input->base + cur; 9660 ctxt->input->end = 9661 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9662#ifdef DEBUG_PUSH 9663 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9664#endif 9665 9666 } else if (ctxt->instate != XML_PARSER_EOF) { 9667 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 9668 xmlParserInputBufferPtr in = ctxt->input->buf; 9669 if ((in->encoder != NULL) && (in->buffer != NULL) && 9670 (in->raw != NULL)) { 9671 int nbchars; 9672 9673 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 9674 if (nbchars < 0) { 9675 /* TODO 2.6.0 */ 9676 xmlGenericError(xmlGenericErrorContext, 9677 "xmlParseChunk: encoder error\n"); 9678 return(XML_ERR_INVALID_ENCODING); 9679 } 9680 } 9681 } 9682 } 9683 xmlParseTryOrFinish(ctxt, terminate); 9684 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9685 return(ctxt->errNo); 9686 if (terminate) { 9687 /* 9688 * Check for termination 9689 */ 9690 int avail = 0; 9691 if (ctxt->input->buf == NULL) 9692 avail = ctxt->input->length - 9693 (ctxt->input->cur - ctxt->input->base); 9694 else 9695 avail = ctxt->input->buf->buffer->use - 9696 (ctxt->input->cur - ctxt->input->base); 9697 9698 if ((ctxt->instate != XML_PARSER_EOF) && 9699 (ctxt->instate != XML_PARSER_EPILOG)) { 9700 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9701 } 9702 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 9703 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9704 } 9705 if (ctxt->instate != XML_PARSER_EOF) { 9706 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9707 ctxt->sax->endDocument(ctxt->userData); 9708 } 9709 ctxt->instate = XML_PARSER_EOF; 9710 } 9711 return((xmlParserErrors) ctxt->errNo); 9712} 9713 9714/************************************************************************ 9715 * * 9716 * I/O front end functions to the parser * 9717 * * 9718 ************************************************************************/ 9719 9720/** 9721 * xmlStopParser: 9722 * @ctxt: an XML parser context 9723 * 9724 * Blocks further parser processing 9725 */ 9726void 9727xmlStopParser(xmlParserCtxtPtr ctxt) { 9728 if (ctxt == NULL) 9729 return; 9730 ctxt->instate = XML_PARSER_EOF; 9731 ctxt->disableSAX = 1; 9732 if (ctxt->input != NULL) 9733 ctxt->input->cur = BAD_CAST""; 9734} 9735 9736/** 9737 * xmlCreatePushParserCtxt: 9738 * @sax: a SAX handler 9739 * @user_data: The user data returned on SAX callbacks 9740 * @chunk: a pointer to an array of chars 9741 * @size: number of chars in the array 9742 * @filename: an optional file name or URI 9743 * 9744 * Create a parser context for using the XML parser in push mode. 9745 * If @buffer and @size are non-NULL, the data is used to detect 9746 * the encoding. The remaining characters will be parsed so they 9747 * don't need to be fed in again through xmlParseChunk. 9748 * To allow content encoding detection, @size should be >= 4 9749 * The value of @filename is used for fetching external entities 9750 * and error/warning reports. 9751 * 9752 * Returns the new parser context or NULL 9753 */ 9754 9755xmlParserCtxtPtr 9756xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9757 const char *chunk, int size, const char *filename) { 9758 xmlParserCtxtPtr ctxt; 9759 xmlParserInputPtr inputStream; 9760 xmlParserInputBufferPtr buf; 9761 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 9762 9763 /* 9764 * plug some encoding conversion routines 9765 */ 9766 if ((chunk != NULL) && (size >= 4)) 9767 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 9768 9769 buf = xmlAllocParserInputBuffer(enc); 9770 if (buf == NULL) return(NULL); 9771 9772 ctxt = xmlNewParserCtxt(); 9773 if (ctxt == NULL) { 9774 xmlErrMemory(NULL, "creating parser: out of memory\n"); 9775 xmlFreeParserInputBuffer(buf); 9776 return(NULL); 9777 } 9778 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 9779 if (ctxt->pushTab == NULL) { 9780 xmlErrMemory(ctxt, NULL); 9781 xmlFreeParserInputBuffer(buf); 9782 xmlFreeParserCtxt(ctxt); 9783 return(NULL); 9784 } 9785 if (sax != NULL) { 9786#ifdef LIBXML_SAX1_ENABLED 9787 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 9788#endif /* LIBXML_SAX1_ENABLED */ 9789 xmlFree(ctxt->sax); 9790 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9791 if (ctxt->sax == NULL) { 9792 xmlErrMemory(ctxt, NULL); 9793 xmlFreeParserInputBuffer(buf); 9794 xmlFreeParserCtxt(ctxt); 9795 return(NULL); 9796 } 9797 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9798 if (user_data != NULL) 9799 ctxt->userData = user_data; 9800 } 9801 if (filename == NULL) { 9802 ctxt->directory = NULL; 9803 } else { 9804 ctxt->directory = xmlParserGetDirectory(filename); 9805 } 9806 9807 inputStream = xmlNewInputStream(ctxt); 9808 if (inputStream == NULL) { 9809 xmlFreeParserCtxt(ctxt); 9810 xmlFreeParserInputBuffer(buf); 9811 return(NULL); 9812 } 9813 9814 if (filename == NULL) 9815 inputStream->filename = NULL; 9816 else 9817 inputStream->filename = (char *) 9818 xmlCanonicPath((const xmlChar *) filename); 9819 inputStream->buf = buf; 9820 inputStream->base = inputStream->buf->buffer->content; 9821 inputStream->cur = inputStream->buf->buffer->content; 9822 inputStream->end = 9823 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 9824 9825 inputPush(ctxt, inputStream); 9826 9827 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9828 (ctxt->input->buf != NULL)) { 9829 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9830 int cur = ctxt->input->cur - ctxt->input->base; 9831 9832 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9833 9834 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9835 ctxt->input->cur = ctxt->input->base + cur; 9836 ctxt->input->end = 9837 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9838#ifdef DEBUG_PUSH 9839 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9840#endif 9841 } 9842 9843 if (enc != XML_CHAR_ENCODING_NONE) { 9844 xmlSwitchEncoding(ctxt, enc); 9845 } 9846 9847 return(ctxt); 9848} 9849#endif /* LIBXML_PUSH_ENABLED */ 9850 9851/** 9852 * xmlCreateIOParserCtxt: 9853 * @sax: a SAX handler 9854 * @user_data: The user data returned on SAX callbacks 9855 * @ioread: an I/O read function 9856 * @ioclose: an I/O close function 9857 * @ioctx: an I/O handler 9858 * @enc: the charset encoding if known 9859 * 9860 * Create a parser context for using the XML parser with an existing 9861 * I/O stream 9862 * 9863 * Returns the new parser context or NULL 9864 */ 9865xmlParserCtxtPtr 9866xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9867 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 9868 void *ioctx, xmlCharEncoding enc) { 9869 xmlParserCtxtPtr ctxt; 9870 xmlParserInputPtr inputStream; 9871 xmlParserInputBufferPtr buf; 9872 9873 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 9874 if (buf == NULL) return(NULL); 9875 9876 ctxt = xmlNewParserCtxt(); 9877 if (ctxt == NULL) { 9878 xmlFree(buf); 9879 return(NULL); 9880 } 9881 if (sax != NULL) { 9882#ifdef LIBXML_SAX1_ENABLED 9883 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 9884#endif /* LIBXML_SAX1_ENABLED */ 9885 xmlFree(ctxt->sax); 9886 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9887 if (ctxt->sax == NULL) { 9888 xmlErrMemory(ctxt, NULL); 9889 xmlFree(ctxt); 9890 return(NULL); 9891 } 9892 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9893 if (user_data != NULL) 9894 ctxt->userData = user_data; 9895 } 9896 9897 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 9898 if (inputStream == NULL) { 9899 xmlFreeParserCtxt(ctxt); 9900 return(NULL); 9901 } 9902 inputPush(ctxt, inputStream); 9903 9904 return(ctxt); 9905} 9906 9907#ifdef LIBXML_VALID_ENABLED 9908/************************************************************************ 9909 * * 9910 * Front ends when parsing a DTD * 9911 * * 9912 ************************************************************************/ 9913 9914/** 9915 * xmlIOParseDTD: 9916 * @sax: the SAX handler block or NULL 9917 * @input: an Input Buffer 9918 * @enc: the charset encoding if known 9919 * 9920 * Load and parse a DTD 9921 * 9922 * Returns the resulting xmlDtdPtr or NULL in case of error. 9923 * @input will be freed at parsing end. 9924 */ 9925 9926xmlDtdPtr 9927xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 9928 xmlCharEncoding enc) { 9929 xmlDtdPtr ret = NULL; 9930 xmlParserCtxtPtr ctxt; 9931 xmlParserInputPtr pinput = NULL; 9932 xmlChar start[4]; 9933 9934 if (input == NULL) 9935 return(NULL); 9936 9937 ctxt = xmlNewParserCtxt(); 9938 if (ctxt == NULL) { 9939 return(NULL); 9940 } 9941 9942 /* 9943 * Set-up the SAX context 9944 */ 9945 if (sax != NULL) { 9946 if (ctxt->sax != NULL) 9947 xmlFree(ctxt->sax); 9948 ctxt->sax = sax; 9949 ctxt->userData = NULL; 9950 } 9951 xmlDetectSAX2(ctxt); 9952 9953 /* 9954 * generate a parser input from the I/O handler 9955 */ 9956 9957 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 9958 if (pinput == NULL) { 9959 if (sax != NULL) ctxt->sax = NULL; 9960 xmlFreeParserCtxt(ctxt); 9961 return(NULL); 9962 } 9963 9964 /* 9965 * plug some encoding conversion routines here. 9966 */ 9967 xmlPushInput(ctxt, pinput); 9968 if (enc != XML_CHAR_ENCODING_NONE) { 9969 xmlSwitchEncoding(ctxt, enc); 9970 } 9971 9972 pinput->filename = NULL; 9973 pinput->line = 1; 9974 pinput->col = 1; 9975 pinput->base = ctxt->input->cur; 9976 pinput->cur = ctxt->input->cur; 9977 pinput->free = NULL; 9978 9979 /* 9980 * let's parse that entity knowing it's an external subset. 9981 */ 9982 ctxt->inSubset = 2; 9983 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 9984 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 9985 BAD_CAST "none", BAD_CAST "none"); 9986 9987 if ((enc == XML_CHAR_ENCODING_NONE) && 9988 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 9989 /* 9990 * Get the 4 first bytes and decode the charset 9991 * if enc != XML_CHAR_ENCODING_NONE 9992 * plug some encoding conversion routines. 9993 */ 9994 start[0] = RAW; 9995 start[1] = NXT(1); 9996 start[2] = NXT(2); 9997 start[3] = NXT(3); 9998 enc = xmlDetectCharEncoding(start, 4); 9999 if (enc != XML_CHAR_ENCODING_NONE) { 10000 xmlSwitchEncoding(ctxt, enc); 10001 } 10002 } 10003 10004 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 10005 10006 if (ctxt->myDoc != NULL) { 10007 if (ctxt->wellFormed) { 10008 ret = ctxt->myDoc->extSubset; 10009 ctxt->myDoc->extSubset = NULL; 10010 if (ret != NULL) { 10011 xmlNodePtr tmp; 10012 10013 ret->doc = NULL; 10014 tmp = ret->children; 10015 while (tmp != NULL) { 10016 tmp->doc = NULL; 10017 tmp = tmp->next; 10018 } 10019 } 10020 } else { 10021 ret = NULL; 10022 } 10023 xmlFreeDoc(ctxt->myDoc); 10024 ctxt->myDoc = NULL; 10025 } 10026 if (sax != NULL) ctxt->sax = NULL; 10027 xmlFreeParserCtxt(ctxt); 10028 10029 return(ret); 10030} 10031 10032/** 10033 * xmlSAXParseDTD: 10034 * @sax: the SAX handler block 10035 * @ExternalID: a NAME* containing the External ID of the DTD 10036 * @SystemID: a NAME* containing the URL to the DTD 10037 * 10038 * Load and parse an external subset. 10039 * 10040 * Returns the resulting xmlDtdPtr or NULL in case of error. 10041 */ 10042 10043xmlDtdPtr 10044xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 10045 const xmlChar *SystemID) { 10046 xmlDtdPtr ret = NULL; 10047 xmlParserCtxtPtr ctxt; 10048 xmlParserInputPtr input = NULL; 10049 xmlCharEncoding enc; 10050 10051 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 10052 10053 ctxt = xmlNewParserCtxt(); 10054 if (ctxt == NULL) { 10055 return(NULL); 10056 } 10057 10058 /* 10059 * Set-up the SAX context 10060 */ 10061 if (sax != NULL) { 10062 if (ctxt->sax != NULL) 10063 xmlFree(ctxt->sax); 10064 ctxt->sax = sax; 10065 ctxt->userData = ctxt; 10066 } 10067 10068 /* 10069 * Ask the Entity resolver to load the damn thing 10070 */ 10071 10072 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 10073 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID); 10074 if (input == NULL) { 10075 if (sax != NULL) ctxt->sax = NULL; 10076 xmlFreeParserCtxt(ctxt); 10077 return(NULL); 10078 } 10079 10080 /* 10081 * plug some encoding conversion routines here. 10082 */ 10083 xmlPushInput(ctxt, input); 10084 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10085 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 10086 xmlSwitchEncoding(ctxt, enc); 10087 } 10088 10089 if (input->filename == NULL) 10090 input->filename = (char *) xmlCanonicPath(SystemID); 10091 input->line = 1; 10092 input->col = 1; 10093 input->base = ctxt->input->cur; 10094 input->cur = ctxt->input->cur; 10095 input->free = NULL; 10096 10097 /* 10098 * let's parse that entity knowing it's an external subset. 10099 */ 10100 ctxt->inSubset = 2; 10101 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10102 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10103 ExternalID, SystemID); 10104 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 10105 10106 if (ctxt->myDoc != NULL) { 10107 if (ctxt->wellFormed) { 10108 ret = ctxt->myDoc->extSubset; 10109 ctxt->myDoc->extSubset = NULL; 10110 if (ret != NULL) { 10111 xmlNodePtr tmp; 10112 10113 ret->doc = NULL; 10114 tmp = ret->children; 10115 while (tmp != NULL) { 10116 tmp->doc = NULL; 10117 tmp = tmp->next; 10118 } 10119 } 10120 } else { 10121 ret = NULL; 10122 } 10123 xmlFreeDoc(ctxt->myDoc); 10124 ctxt->myDoc = NULL; 10125 } 10126 if (sax != NULL) ctxt->sax = NULL; 10127 xmlFreeParserCtxt(ctxt); 10128 10129 return(ret); 10130} 10131 10132 10133/** 10134 * xmlParseDTD: 10135 * @ExternalID: a NAME* containing the External ID of the DTD 10136 * @SystemID: a NAME* containing the URL to the DTD 10137 * 10138 * Load and parse an external subset. 10139 * 10140 * Returns the resulting xmlDtdPtr or NULL in case of error. 10141 */ 10142 10143xmlDtdPtr 10144xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 10145 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 10146} 10147#endif /* LIBXML_VALID_ENABLED */ 10148 10149/************************************************************************ 10150 * * 10151 * Front ends when parsing an Entity * 10152 * * 10153 ************************************************************************/ 10154 10155/** 10156 * xmlParseCtxtExternalEntity: 10157 * @ctx: the existing parsing context 10158 * @URL: the URL for the entity to load 10159 * @ID: the System ID for the entity to load 10160 * @lst: the return value for the set of parsed nodes 10161 * 10162 * Parse an external general entity within an existing parsing context 10163 * An external general parsed entity is well-formed if it matches the 10164 * production labeled extParsedEnt. 10165 * 10166 * [78] extParsedEnt ::= TextDecl? content 10167 * 10168 * Returns 0 if the entity is well formed, -1 in case of args problem and 10169 * the parser error code otherwise 10170 */ 10171 10172int 10173xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 10174 const xmlChar *ID, xmlNodePtr *lst) { 10175 xmlParserCtxtPtr ctxt; 10176 xmlDocPtr newDoc; 10177 xmlSAXHandlerPtr oldsax = NULL; 10178 int ret = 0; 10179 xmlChar start[4]; 10180 xmlCharEncoding enc; 10181 10182 if (ctx->depth > 40) { 10183 return(XML_ERR_ENTITY_LOOP); 10184 } 10185 10186 if (lst != NULL) 10187 *lst = NULL; 10188 if ((URL == NULL) && (ID == NULL)) 10189 return(-1); 10190 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 10191 return(-1); 10192 10193 10194 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10195 if (ctxt == NULL) return(-1); 10196 ctxt->userData = ctxt; 10197 ctxt->_private = ctx->_private; 10198 oldsax = ctxt->sax; 10199 ctxt->sax = ctx->sax; 10200 xmlDetectSAX2(ctxt); 10201 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10202 if (newDoc == NULL) { 10203 xmlFreeParserCtxt(ctxt); 10204 return(-1); 10205 } 10206 if (ctx->myDoc != NULL) { 10207 newDoc->intSubset = ctx->myDoc->intSubset; 10208 newDoc->extSubset = ctx->myDoc->extSubset; 10209 } 10210 if (ctx->myDoc->URL != NULL) { 10211 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 10212 } 10213 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10214 if (newDoc->children == NULL) { 10215 ctxt->sax = oldsax; 10216 xmlFreeParserCtxt(ctxt); 10217 newDoc->intSubset = NULL; 10218 newDoc->extSubset = NULL; 10219 xmlFreeDoc(newDoc); 10220 return(-1); 10221 } 10222 nodePush(ctxt, newDoc->children); 10223 if (ctx->myDoc == NULL) { 10224 ctxt->myDoc = newDoc; 10225 } else { 10226 ctxt->myDoc = ctx->myDoc; 10227 newDoc->children->doc = ctx->myDoc; 10228 } 10229 10230 /* 10231 * Get the 4 first bytes and decode the charset 10232 * if enc != XML_CHAR_ENCODING_NONE 10233 * plug some encoding conversion routines. 10234 */ 10235 GROW 10236 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10237 start[0] = RAW; 10238 start[1] = NXT(1); 10239 start[2] = NXT(2); 10240 start[3] = NXT(3); 10241 enc = xmlDetectCharEncoding(start, 4); 10242 if (enc != XML_CHAR_ENCODING_NONE) { 10243 xmlSwitchEncoding(ctxt, enc); 10244 } 10245 } 10246 10247 /* 10248 * Parse a possible text declaration first 10249 */ 10250 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10251 xmlParseTextDecl(ctxt); 10252 } 10253 10254 /* 10255 * Doing validity checking on chunk doesn't make sense 10256 */ 10257 ctxt->instate = XML_PARSER_CONTENT; 10258 ctxt->validate = ctx->validate; 10259 ctxt->valid = ctx->valid; 10260 ctxt->loadsubset = ctx->loadsubset; 10261 ctxt->depth = ctx->depth + 1; 10262 ctxt->replaceEntities = ctx->replaceEntities; 10263 if (ctxt->validate) { 10264 ctxt->vctxt.error = ctx->vctxt.error; 10265 ctxt->vctxt.warning = ctx->vctxt.warning; 10266 } else { 10267 ctxt->vctxt.error = NULL; 10268 ctxt->vctxt.warning = NULL; 10269 } 10270 ctxt->vctxt.nodeTab = NULL; 10271 ctxt->vctxt.nodeNr = 0; 10272 ctxt->vctxt.nodeMax = 0; 10273 ctxt->vctxt.node = NULL; 10274 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10275 ctxt->dict = ctx->dict; 10276 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 10277 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 10278 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 10279 ctxt->dictNames = ctx->dictNames; 10280 ctxt->attsDefault = ctx->attsDefault; 10281 ctxt->attsSpecial = ctx->attsSpecial; 10282 10283 xmlParseContent(ctxt); 10284 10285 ctx->validate = ctxt->validate; 10286 ctx->valid = ctxt->valid; 10287 if ((RAW == '<') && (NXT(1) == '/')) { 10288 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10289 } else if (RAW != 0) { 10290 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10291 } 10292 if (ctxt->node != newDoc->children) { 10293 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10294 } 10295 10296 if (!ctxt->wellFormed) { 10297 if (ctxt->errNo == 0) 10298 ret = 1; 10299 else 10300 ret = ctxt->errNo; 10301 } else { 10302 if (lst != NULL) { 10303 xmlNodePtr cur; 10304 10305 /* 10306 * Return the newly created nodeset after unlinking it from 10307 * they pseudo parent. 10308 */ 10309 cur = newDoc->children->children; 10310 *lst = cur; 10311 while (cur != NULL) { 10312 cur->parent = NULL; 10313 cur = cur->next; 10314 } 10315 newDoc->children->children = NULL; 10316 } 10317 ret = 0; 10318 } 10319 ctxt->sax = oldsax; 10320 ctxt->dict = NULL; 10321 ctxt->attsDefault = NULL; 10322 ctxt->attsSpecial = NULL; 10323 xmlFreeParserCtxt(ctxt); 10324 newDoc->intSubset = NULL; 10325 newDoc->extSubset = NULL; 10326 xmlFreeDoc(newDoc); 10327 10328 return(ret); 10329} 10330 10331/** 10332 * xmlParseExternalEntityPrivate: 10333 * @doc: the document the chunk pertains to 10334 * @oldctxt: the previous parser context if available 10335 * @sax: the SAX handler bloc (possibly NULL) 10336 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10337 * @depth: Used for loop detection, use 0 10338 * @URL: the URL for the entity to load 10339 * @ID: the System ID for the entity to load 10340 * @list: the return value for the set of parsed nodes 10341 * 10342 * Private version of xmlParseExternalEntity() 10343 * 10344 * Returns 0 if the entity is well formed, -1 in case of args problem and 10345 * the parser error code otherwise 10346 */ 10347 10348static xmlParserErrors 10349xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 10350 xmlSAXHandlerPtr sax, 10351 void *user_data, int depth, const xmlChar *URL, 10352 const xmlChar *ID, xmlNodePtr *list) { 10353 xmlParserCtxtPtr ctxt; 10354 xmlDocPtr newDoc; 10355 xmlSAXHandlerPtr oldsax = NULL; 10356 xmlParserErrors ret = XML_ERR_OK; 10357 xmlChar start[4]; 10358 xmlCharEncoding enc; 10359 10360 if (depth > 40) { 10361 return(XML_ERR_ENTITY_LOOP); 10362 } 10363 10364 10365 10366 if (list != NULL) 10367 *list = NULL; 10368 if ((URL == NULL) && (ID == NULL)) 10369 return(XML_ERR_INTERNAL_ERROR); 10370 if (doc == NULL) /* @@ relax but check for dereferences */ 10371 return(XML_ERR_INTERNAL_ERROR); 10372 10373 10374 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10375 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10376 ctxt->userData = ctxt; 10377 if (oldctxt != NULL) { 10378 ctxt->_private = oldctxt->_private; 10379 ctxt->loadsubset = oldctxt->loadsubset; 10380 ctxt->validate = oldctxt->validate; 10381 ctxt->external = oldctxt->external; 10382 ctxt->record_info = oldctxt->record_info; 10383 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 10384 ctxt->node_seq.length = oldctxt->node_seq.length; 10385 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 10386 } else { 10387 /* 10388 * Doing validity checking on chunk without context 10389 * doesn't make sense 10390 */ 10391 ctxt->_private = NULL; 10392 ctxt->validate = 0; 10393 ctxt->external = 2; 10394 ctxt->loadsubset = 0; 10395 } 10396 if (sax != NULL) { 10397 oldsax = ctxt->sax; 10398 ctxt->sax = sax; 10399 if (user_data != NULL) 10400 ctxt->userData = user_data; 10401 } 10402 xmlDetectSAX2(ctxt); 10403 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10404 if (newDoc == NULL) { 10405 ctxt->node_seq.maximum = 0; 10406 ctxt->node_seq.length = 0; 10407 ctxt->node_seq.buffer = NULL; 10408 xmlFreeParserCtxt(ctxt); 10409 return(XML_ERR_INTERNAL_ERROR); 10410 } 10411 if (doc != NULL) { 10412 newDoc->intSubset = doc->intSubset; 10413 newDoc->extSubset = doc->extSubset; 10414 } 10415 if (doc->URL != NULL) { 10416 newDoc->URL = xmlStrdup(doc->URL); 10417 } 10418 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10419 if (newDoc->children == NULL) { 10420 if (sax != NULL) 10421 ctxt->sax = oldsax; 10422 ctxt->node_seq.maximum = 0; 10423 ctxt->node_seq.length = 0; 10424 ctxt->node_seq.buffer = NULL; 10425 xmlFreeParserCtxt(ctxt); 10426 newDoc->intSubset = NULL; 10427 newDoc->extSubset = NULL; 10428 xmlFreeDoc(newDoc); 10429 return(XML_ERR_INTERNAL_ERROR); 10430 } 10431 nodePush(ctxt, newDoc->children); 10432 if (doc == NULL) { 10433 ctxt->myDoc = newDoc; 10434 } else { 10435 ctxt->myDoc = doc; 10436 newDoc->children->doc = doc; 10437 } 10438 10439 /* 10440 * Get the 4 first bytes and decode the charset 10441 * if enc != XML_CHAR_ENCODING_NONE 10442 * plug some encoding conversion routines. 10443 */ 10444 GROW; 10445 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10446 start[0] = RAW; 10447 start[1] = NXT(1); 10448 start[2] = NXT(2); 10449 start[3] = NXT(3); 10450 enc = xmlDetectCharEncoding(start, 4); 10451 if (enc != XML_CHAR_ENCODING_NONE) { 10452 xmlSwitchEncoding(ctxt, enc); 10453 } 10454 } 10455 10456 /* 10457 * Parse a possible text declaration first 10458 */ 10459 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10460 xmlParseTextDecl(ctxt); 10461 } 10462 10463 ctxt->instate = XML_PARSER_CONTENT; 10464 ctxt->depth = depth; 10465 10466 xmlParseContent(ctxt); 10467 10468 if ((RAW == '<') && (NXT(1) == '/')) { 10469 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10470 } else if (RAW != 0) { 10471 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10472 } 10473 if (ctxt->node != newDoc->children) { 10474 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10475 } 10476 10477 if (!ctxt->wellFormed) { 10478 if (ctxt->errNo == 0) 10479 ret = XML_ERR_INTERNAL_ERROR; 10480 else 10481 ret = (xmlParserErrors)ctxt->errNo; 10482 } else { 10483 if (list != NULL) { 10484 xmlNodePtr cur; 10485 10486 /* 10487 * Return the newly created nodeset after unlinking it from 10488 * they pseudo parent. 10489 */ 10490 cur = newDoc->children->children; 10491 *list = cur; 10492 while (cur != NULL) { 10493 cur->parent = NULL; 10494 cur = cur->next; 10495 } 10496 newDoc->children->children = NULL; 10497 } 10498 ret = XML_ERR_OK; 10499 } 10500 if (sax != NULL) 10501 ctxt->sax = oldsax; 10502 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 10503 oldctxt->node_seq.length = ctxt->node_seq.length; 10504 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 10505 ctxt->node_seq.maximum = 0; 10506 ctxt->node_seq.length = 0; 10507 ctxt->node_seq.buffer = NULL; 10508 xmlFreeParserCtxt(ctxt); 10509 newDoc->intSubset = NULL; 10510 newDoc->extSubset = NULL; 10511 xmlFreeDoc(newDoc); 10512 10513 return(ret); 10514} 10515 10516#ifdef LIBXML_SAX1_ENABLED 10517/** 10518 * xmlParseExternalEntity: 10519 * @doc: the document the chunk pertains to 10520 * @sax: the SAX handler bloc (possibly NULL) 10521 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10522 * @depth: Used for loop detection, use 0 10523 * @URL: the URL for the entity to load 10524 * @ID: the System ID for the entity to load 10525 * @lst: the return value for the set of parsed nodes 10526 * 10527 * Parse an external general entity 10528 * An external general parsed entity is well-formed if it matches the 10529 * production labeled extParsedEnt. 10530 * 10531 * [78] extParsedEnt ::= TextDecl? content 10532 * 10533 * Returns 0 if the entity is well formed, -1 in case of args problem and 10534 * the parser error code otherwise 10535 */ 10536 10537int 10538xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 10539 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 10540 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 10541 ID, lst)); 10542} 10543 10544/** 10545 * xmlParseBalancedChunkMemory: 10546 * @doc: the document the chunk pertains to 10547 * @sax: the SAX handler bloc (possibly NULL) 10548 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10549 * @depth: Used for loop detection, use 0 10550 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10551 * @lst: the return value for the set of parsed nodes 10552 * 10553 * Parse a well-balanced chunk of an XML document 10554 * called by the parser 10555 * The allowed sequence for the Well Balanced Chunk is the one defined by 10556 * the content production in the XML grammar: 10557 * 10558 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10559 * 10560 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10561 * the parser error code otherwise 10562 */ 10563 10564int 10565xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10566 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 10567 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 10568 depth, string, lst, 0 ); 10569} 10570#endif /* LIBXML_SAX1_ENABLED */ 10571 10572/** 10573 * xmlParseBalancedChunkMemoryInternal: 10574 * @oldctxt: the existing parsing context 10575 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10576 * @user_data: the user data field for the parser context 10577 * @lst: the return value for the set of parsed nodes 10578 * 10579 * 10580 * Parse a well-balanced chunk of an XML document 10581 * called by the parser 10582 * The allowed sequence for the Well Balanced Chunk is the one defined by 10583 * the content production in the XML grammar: 10584 * 10585 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10586 * 10587 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 10588 * error code otherwise 10589 * 10590 * In case recover is set to 1, the nodelist will not be empty even if 10591 * the parsed chunk is not well balanced. 10592 */ 10593static xmlParserErrors 10594xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 10595 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 10596 xmlParserCtxtPtr ctxt; 10597 xmlDocPtr newDoc = NULL; 10598 xmlSAXHandlerPtr oldsax = NULL; 10599 xmlNodePtr content = NULL; 10600 int size; 10601 xmlParserErrors ret = XML_ERR_OK; 10602 10603 if (oldctxt->depth > 40) { 10604 return(XML_ERR_ENTITY_LOOP); 10605 } 10606 10607 10608 if (lst != NULL) 10609 *lst = NULL; 10610 if (string == NULL) 10611 return(XML_ERR_INTERNAL_ERROR); 10612 10613 size = xmlStrlen(string); 10614 10615 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10616 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10617 if (user_data != NULL) 10618 ctxt->userData = user_data; 10619 else 10620 ctxt->userData = ctxt; 10621 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10622 ctxt->dict = oldctxt->dict; 10623 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 10624 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 10625 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 10626 10627 oldsax = ctxt->sax; 10628 ctxt->sax = oldctxt->sax; 10629 xmlDetectSAX2(ctxt); 10630 ctxt->replaceEntities = oldctxt->replaceEntities; 10631 ctxt->options = oldctxt->options; 10632 10633 ctxt->_private = oldctxt->_private; 10634 if (oldctxt->myDoc == NULL) { 10635 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10636 if (newDoc == NULL) { 10637 ctxt->sax = oldsax; 10638 ctxt->dict = NULL; 10639 xmlFreeParserCtxt(ctxt); 10640 return(XML_ERR_INTERNAL_ERROR); 10641 } 10642 ctxt->myDoc = newDoc; 10643 } else { 10644 ctxt->myDoc = oldctxt->myDoc; 10645 content = ctxt->myDoc->children; 10646 } 10647 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL, 10648 BAD_CAST "pseudoroot", NULL); 10649 if (ctxt->myDoc->children == NULL) { 10650 ctxt->sax = oldsax; 10651 ctxt->dict = NULL; 10652 xmlFreeParserCtxt(ctxt); 10653 if (newDoc != NULL) 10654 xmlFreeDoc(newDoc); 10655 return(XML_ERR_INTERNAL_ERROR); 10656 } 10657 nodePush(ctxt, ctxt->myDoc->children); 10658 ctxt->instate = XML_PARSER_CONTENT; 10659 ctxt->depth = oldctxt->depth + 1; 10660 10661 ctxt->validate = 0; 10662 ctxt->loadsubset = oldctxt->loadsubset; 10663 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 10664 /* 10665 * ID/IDREF registration will be done in xmlValidateElement below 10666 */ 10667 ctxt->loadsubset |= XML_SKIP_IDS; 10668 } 10669 ctxt->dictNames = oldctxt->dictNames; 10670 ctxt->attsDefault = oldctxt->attsDefault; 10671 ctxt->attsSpecial = oldctxt->attsSpecial; 10672 10673 xmlParseContent(ctxt); 10674 if ((RAW == '<') && (NXT(1) == '/')) { 10675 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10676 } else if (RAW != 0) { 10677 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10678 } 10679 if (ctxt->node != ctxt->myDoc->children) { 10680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10681 } 10682 10683 if (!ctxt->wellFormed) { 10684 if (ctxt->errNo == 0) 10685 ret = XML_ERR_INTERNAL_ERROR; 10686 else 10687 ret = (xmlParserErrors)ctxt->errNo; 10688 } else { 10689 ret = XML_ERR_OK; 10690 } 10691 10692 if ((lst != NULL) && (ret == XML_ERR_OK)) { 10693 xmlNodePtr cur; 10694 10695 /* 10696 * Return the newly created nodeset after unlinking it from 10697 * they pseudo parent. 10698 */ 10699 cur = ctxt->myDoc->children->children; 10700 *lst = cur; 10701 while (cur != NULL) { 10702#ifdef LIBXML_VALID_ENABLED 10703 if (oldctxt->validate && oldctxt->wellFormed && 10704 oldctxt->myDoc && oldctxt->myDoc->intSubset) { 10705 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 10706 oldctxt->myDoc, cur); 10707 } 10708#endif /* LIBXML_VALID_ENABLED */ 10709 cur->parent = NULL; 10710 cur = cur->next; 10711 } 10712 ctxt->myDoc->children->children = NULL; 10713 } 10714 if (ctxt->myDoc != NULL) { 10715 xmlFreeNode(ctxt->myDoc->children); 10716 ctxt->myDoc->children = content; 10717 } 10718 10719 ctxt->sax = oldsax; 10720 ctxt->dict = NULL; 10721 ctxt->attsDefault = NULL; 10722 ctxt->attsSpecial = NULL; 10723 xmlFreeParserCtxt(ctxt); 10724 if (newDoc != NULL) 10725 xmlFreeDoc(newDoc); 10726 10727 return(ret); 10728} 10729 10730#ifdef LIBXML_SAX1_ENABLED 10731/** 10732 * xmlParseBalancedChunkMemoryRecover: 10733 * @doc: the document the chunk pertains to 10734 * @sax: the SAX handler bloc (possibly NULL) 10735 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10736 * @depth: Used for loop detection, use 0 10737 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10738 * @lst: the return value for the set of parsed nodes 10739 * @recover: return nodes even if the data is broken (use 0) 10740 * 10741 * 10742 * Parse a well-balanced chunk of an XML document 10743 * called by the parser 10744 * The allowed sequence for the Well Balanced Chunk is the one defined by 10745 * the content production in the XML grammar: 10746 * 10747 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10748 * 10749 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10750 * the parser error code otherwise 10751 * 10752 * In case recover is set to 1, the nodelist will not be empty even if 10753 * the parsed chunk is not well balanced. 10754 */ 10755int 10756xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10757 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 10758 int recover) { 10759 xmlParserCtxtPtr ctxt; 10760 xmlDocPtr newDoc; 10761 xmlSAXHandlerPtr oldsax = NULL; 10762 xmlNodePtr content; 10763 int size; 10764 int ret = 0; 10765 10766 if (depth > 40) { 10767 return(XML_ERR_ENTITY_LOOP); 10768 } 10769 10770 10771 if (lst != NULL) 10772 *lst = NULL; 10773 if (string == NULL) 10774 return(-1); 10775 10776 size = xmlStrlen(string); 10777 10778 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10779 if (ctxt == NULL) return(-1); 10780 ctxt->userData = ctxt; 10781 if (sax != NULL) { 10782 oldsax = ctxt->sax; 10783 ctxt->sax = sax; 10784 if (user_data != NULL) 10785 ctxt->userData = user_data; 10786 } 10787 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10788 if (newDoc == NULL) { 10789 xmlFreeParserCtxt(ctxt); 10790 return(-1); 10791 } 10792 if (doc != NULL) { 10793 newDoc->intSubset = doc->intSubset; 10794 newDoc->extSubset = doc->extSubset; 10795 } 10796 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10797 if (newDoc->children == NULL) { 10798 if (sax != NULL) 10799 ctxt->sax = oldsax; 10800 xmlFreeParserCtxt(ctxt); 10801 newDoc->intSubset = NULL; 10802 newDoc->extSubset = NULL; 10803 xmlFreeDoc(newDoc); 10804 return(-1); 10805 } 10806 nodePush(ctxt, newDoc->children); 10807 if (doc == NULL) { 10808 ctxt->myDoc = newDoc; 10809 } else { 10810 ctxt->myDoc = newDoc; 10811 newDoc->children->doc = doc; 10812 } 10813 ctxt->instate = XML_PARSER_CONTENT; 10814 ctxt->depth = depth; 10815 10816 /* 10817 * Doing validity checking on chunk doesn't make sense 10818 */ 10819 ctxt->validate = 0; 10820 ctxt->loadsubset = 0; 10821 xmlDetectSAX2(ctxt); 10822 10823 if ( doc != NULL ){ 10824 content = doc->children; 10825 doc->children = NULL; 10826 xmlParseContent(ctxt); 10827 doc->children = content; 10828 } 10829 else { 10830 xmlParseContent(ctxt); 10831 } 10832 if ((RAW == '<') && (NXT(1) == '/')) { 10833 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10834 } else if (RAW != 0) { 10835 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10836 } 10837 if (ctxt->node != newDoc->children) { 10838 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10839 } 10840 10841 if (!ctxt->wellFormed) { 10842 if (ctxt->errNo == 0) 10843 ret = 1; 10844 else 10845 ret = ctxt->errNo; 10846 } else { 10847 ret = 0; 10848 } 10849 10850 if (lst != NULL && (ret == 0 || recover == 1)) { 10851 xmlNodePtr cur; 10852 10853 /* 10854 * Return the newly created nodeset after unlinking it from 10855 * they pseudo parent. 10856 */ 10857 cur = newDoc->children->children; 10858 *lst = cur; 10859 while (cur != NULL) { 10860 cur->parent = NULL; 10861 cur = cur->next; 10862 } 10863 newDoc->children->children = NULL; 10864 } 10865 10866 if (sax != NULL) 10867 ctxt->sax = oldsax; 10868 xmlFreeParserCtxt(ctxt); 10869 newDoc->intSubset = NULL; 10870 newDoc->extSubset = NULL; 10871 xmlFreeDoc(newDoc); 10872 10873 return(ret); 10874} 10875 10876/** 10877 * xmlSAXParseEntity: 10878 * @sax: the SAX handler block 10879 * @filename: the filename 10880 * 10881 * parse an XML external entity out of context and build a tree. 10882 * It use the given SAX function block to handle the parsing callback. 10883 * If sax is NULL, fallback to the default DOM tree building routines. 10884 * 10885 * [78] extParsedEnt ::= TextDecl? content 10886 * 10887 * This correspond to a "Well Balanced" chunk 10888 * 10889 * Returns the resulting document tree 10890 */ 10891 10892xmlDocPtr 10893xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 10894 xmlDocPtr ret; 10895 xmlParserCtxtPtr ctxt; 10896 10897 ctxt = xmlCreateFileParserCtxt(filename); 10898 if (ctxt == NULL) { 10899 return(NULL); 10900 } 10901 if (sax != NULL) { 10902 if (ctxt->sax != NULL) 10903 xmlFree(ctxt->sax); 10904 ctxt->sax = sax; 10905 ctxt->userData = NULL; 10906 } 10907 10908 xmlParseExtParsedEnt(ctxt); 10909 10910 if (ctxt->wellFormed) 10911 ret = ctxt->myDoc; 10912 else { 10913 ret = NULL; 10914 xmlFreeDoc(ctxt->myDoc); 10915 ctxt->myDoc = NULL; 10916 } 10917 if (sax != NULL) 10918 ctxt->sax = NULL; 10919 xmlFreeParserCtxt(ctxt); 10920 10921 return(ret); 10922} 10923 10924/** 10925 * xmlParseEntity: 10926 * @filename: the filename 10927 * 10928 * parse an XML external entity out of context and build a tree. 10929 * 10930 * [78] extParsedEnt ::= TextDecl? content 10931 * 10932 * This correspond to a "Well Balanced" chunk 10933 * 10934 * Returns the resulting document tree 10935 */ 10936 10937xmlDocPtr 10938xmlParseEntity(const char *filename) { 10939 return(xmlSAXParseEntity(NULL, filename)); 10940} 10941#endif /* LIBXML_SAX1_ENABLED */ 10942 10943/** 10944 * xmlCreateEntityParserCtxt: 10945 * @URL: the entity URL 10946 * @ID: the entity PUBLIC ID 10947 * @base: a possible base for the target URI 10948 * 10949 * Create a parser context for an external entity 10950 * Automatic support for ZLIB/Compress compressed document is provided 10951 * by default if found at compile-time. 10952 * 10953 * Returns the new parser context or NULL 10954 */ 10955xmlParserCtxtPtr 10956xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 10957 const xmlChar *base) { 10958 xmlParserCtxtPtr ctxt; 10959 xmlParserInputPtr inputStream; 10960 char *directory = NULL; 10961 xmlChar *uri; 10962 10963 ctxt = xmlNewParserCtxt(); 10964 if (ctxt == NULL) { 10965 return(NULL); 10966 } 10967 10968 uri = xmlBuildURI(URL, base); 10969 10970 if (uri == NULL) { 10971 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 10972 if (inputStream == NULL) { 10973 xmlFreeParserCtxt(ctxt); 10974 return(NULL); 10975 } 10976 10977 inputPush(ctxt, inputStream); 10978 10979 if ((ctxt->directory == NULL) && (directory == NULL)) 10980 directory = xmlParserGetDirectory((char *)URL); 10981 if ((ctxt->directory == NULL) && (directory != NULL)) 10982 ctxt->directory = directory; 10983 } else { 10984 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 10985 if (inputStream == NULL) { 10986 xmlFree(uri); 10987 xmlFreeParserCtxt(ctxt); 10988 return(NULL); 10989 } 10990 10991 inputPush(ctxt, inputStream); 10992 10993 if ((ctxt->directory == NULL) && (directory == NULL)) 10994 directory = xmlParserGetDirectory((char *)uri); 10995 if ((ctxt->directory == NULL) && (directory != NULL)) 10996 ctxt->directory = directory; 10997 xmlFree(uri); 10998 } 10999 return(ctxt); 11000} 11001 11002/************************************************************************ 11003 * * 11004 * Front ends when parsing from a file * 11005 * * 11006 ************************************************************************/ 11007 11008/** 11009 * xmlCreateURLParserCtxt: 11010 * @filename: the filename or URL 11011 * @options: a combination of xmlParserOption 11012 * 11013 * Create a parser context for a file or URL content. 11014 * Automatic support for ZLIB/Compress compressed document is provided 11015 * by default if found at compile-time and for file accesses 11016 * 11017 * Returns the new parser context or NULL 11018 */ 11019xmlParserCtxtPtr 11020xmlCreateURLParserCtxt(const char *filename, int options) 11021{ 11022 xmlParserCtxtPtr ctxt; 11023 xmlParserInputPtr inputStream; 11024 char *directory = NULL; 11025 11026 ctxt = xmlNewParserCtxt(); 11027 if (ctxt == NULL) { 11028 xmlErrMemory(NULL, "cannot allocate parser context"); 11029 return(NULL); 11030 } 11031 11032 if (options != 0) 11033 xmlCtxtUseOptions(ctxt, options); 11034 11035 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 11036 if (inputStream == NULL) { 11037 xmlFreeParserCtxt(ctxt); 11038 return(NULL); 11039 } 11040 11041 inputPush(ctxt, inputStream); 11042 if ((ctxt->directory == NULL) && (directory == NULL)) 11043 directory = xmlParserGetDirectory(filename); 11044 if ((ctxt->directory == NULL) && (directory != NULL)) 11045 ctxt->directory = directory; 11046 11047 return(ctxt); 11048} 11049 11050/** 11051 * xmlCreateFileParserCtxt: 11052 * @filename: the filename 11053 * 11054 * Create a parser context for a file content. 11055 * Automatic support for ZLIB/Compress compressed document is provided 11056 * by default if found at compile-time. 11057 * 11058 * Returns the new parser context or NULL 11059 */ 11060xmlParserCtxtPtr 11061xmlCreateFileParserCtxt(const char *filename) 11062{ 11063 return(xmlCreateURLParserCtxt(filename, 0)); 11064} 11065 11066#ifdef LIBXML_SAX1_ENABLED 11067/** 11068 * xmlSAXParseFileWithData: 11069 * @sax: the SAX handler block 11070 * @filename: the filename 11071 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11072 * documents 11073 * @data: the userdata 11074 * 11075 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11076 * compressed document is provided by default if found at compile-time. 11077 * It use the given SAX function block to handle the parsing callback. 11078 * If sax is NULL, fallback to the default DOM tree building routines. 11079 * 11080 * User data (void *) is stored within the parser context in the 11081 * context's _private member, so it is available nearly everywhere in libxml 11082 * 11083 * Returns the resulting document tree 11084 */ 11085 11086xmlDocPtr 11087xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 11088 int recovery, void *data) { 11089 xmlDocPtr ret; 11090 xmlParserCtxtPtr ctxt; 11091 char *directory = NULL; 11092 11093 xmlInitParser(); 11094 11095 ctxt = xmlCreateFileParserCtxt(filename); 11096 if (ctxt == NULL) { 11097 return(NULL); 11098 } 11099 if (sax != NULL) { 11100 if (ctxt->sax != NULL) 11101 xmlFree(ctxt->sax); 11102 ctxt->sax = sax; 11103 } 11104 xmlDetectSAX2(ctxt); 11105 if (data!=NULL) { 11106 ctxt->_private = data; 11107 } 11108 11109 if ((ctxt->directory == NULL) && (directory == NULL)) 11110 directory = xmlParserGetDirectory(filename); 11111 if ((ctxt->directory == NULL) && (directory != NULL)) 11112 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 11113 11114 ctxt->recovery = recovery; 11115 11116 xmlParseDocument(ctxt); 11117 11118 if ((ctxt->wellFormed) || recovery) { 11119 ret = ctxt->myDoc; 11120 if (ret != NULL) { 11121 if (ctxt->input->buf->compressed > 0) 11122 ret->compression = 9; 11123 else 11124 ret->compression = ctxt->input->buf->compressed; 11125 } 11126 } 11127 else { 11128 ret = NULL; 11129 xmlFreeDoc(ctxt->myDoc); 11130 ctxt->myDoc = NULL; 11131 } 11132 if (sax != NULL) 11133 ctxt->sax = NULL; 11134 xmlFreeParserCtxt(ctxt); 11135 11136 return(ret); 11137} 11138 11139/** 11140 * xmlSAXParseFile: 11141 * @sax: the SAX handler block 11142 * @filename: the filename 11143 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11144 * documents 11145 * 11146 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11147 * compressed document is provided by default if found at compile-time. 11148 * It use the given SAX function block to handle the parsing callback. 11149 * If sax is NULL, fallback to the default DOM tree building routines. 11150 * 11151 * Returns the resulting document tree 11152 */ 11153 11154xmlDocPtr 11155xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 11156 int recovery) { 11157 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 11158} 11159 11160/** 11161 * xmlRecoverDoc: 11162 * @cur: a pointer to an array of xmlChar 11163 * 11164 * parse an XML in-memory document and build a tree. 11165 * In the case the document is not Well Formed, a tree is built anyway 11166 * 11167 * Returns the resulting document tree 11168 */ 11169 11170xmlDocPtr 11171xmlRecoverDoc(xmlChar *cur) { 11172 return(xmlSAXParseDoc(NULL, cur, 1)); 11173} 11174 11175/** 11176 * xmlParseFile: 11177 * @filename: the filename 11178 * 11179 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11180 * compressed document is provided by default if found at compile-time. 11181 * 11182 * Returns the resulting document tree if the file was wellformed, 11183 * NULL otherwise. 11184 */ 11185 11186xmlDocPtr 11187xmlParseFile(const char *filename) { 11188 return(xmlSAXParseFile(NULL, filename, 0)); 11189} 11190 11191/** 11192 * xmlRecoverFile: 11193 * @filename: the filename 11194 * 11195 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11196 * compressed document is provided by default if found at compile-time. 11197 * In the case the document is not Well Formed, a tree is built anyway 11198 * 11199 * Returns the resulting document tree 11200 */ 11201 11202xmlDocPtr 11203xmlRecoverFile(const char *filename) { 11204 return(xmlSAXParseFile(NULL, filename, 1)); 11205} 11206 11207 11208/** 11209 * xmlSetupParserForBuffer: 11210 * @ctxt: an XML parser context 11211 * @buffer: a xmlChar * buffer 11212 * @filename: a file name 11213 * 11214 * Setup the parser context to parse a new buffer; Clears any prior 11215 * contents from the parser context. The buffer parameter must not be 11216 * NULL, but the filename parameter can be 11217 */ 11218void 11219xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 11220 const char* filename) 11221{ 11222 xmlParserInputPtr input; 11223 11224 input = xmlNewInputStream(ctxt); 11225 if (input == NULL) { 11226 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 11227 xmlFree(ctxt); 11228 return; 11229 } 11230 11231 xmlClearParserCtxt(ctxt); 11232 if (filename != NULL) 11233 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 11234 input->base = buffer; 11235 input->cur = buffer; 11236 input->end = &buffer[xmlStrlen(buffer)]; 11237 inputPush(ctxt, input); 11238} 11239 11240/** 11241 * xmlSAXUserParseFile: 11242 * @sax: a SAX handler 11243 * @user_data: The user data returned on SAX callbacks 11244 * @filename: a file name 11245 * 11246 * parse an XML file and call the given SAX handler routines. 11247 * Automatic support for ZLIB/Compress compressed document is provided 11248 * 11249 * Returns 0 in case of success or a error number otherwise 11250 */ 11251int 11252xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 11253 const char *filename) { 11254 int ret = 0; 11255 xmlParserCtxtPtr ctxt; 11256 11257 ctxt = xmlCreateFileParserCtxt(filename); 11258 if (ctxt == NULL) return -1; 11259#ifdef LIBXML_SAX1_ENABLED 11260 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11261#endif /* LIBXML_SAX1_ENABLED */ 11262 xmlFree(ctxt->sax); 11263 ctxt->sax = sax; 11264 xmlDetectSAX2(ctxt); 11265 11266 if (user_data != NULL) 11267 ctxt->userData = user_data; 11268 11269 xmlParseDocument(ctxt); 11270 11271 if (ctxt->wellFormed) 11272 ret = 0; 11273 else { 11274 if (ctxt->errNo != 0) 11275 ret = ctxt->errNo; 11276 else 11277 ret = -1; 11278 } 11279 if (sax != NULL) 11280 ctxt->sax = NULL; 11281 xmlFreeParserCtxt(ctxt); 11282 11283 return ret; 11284} 11285#endif /* LIBXML_SAX1_ENABLED */ 11286 11287/************************************************************************ 11288 * * 11289 * Front ends when parsing from memory * 11290 * * 11291 ************************************************************************/ 11292 11293/** 11294 * xmlCreateMemoryParserCtxt: 11295 * @buffer: a pointer to a char array 11296 * @size: the size of the array 11297 * 11298 * Create a parser context for an XML in-memory document. 11299 * 11300 * Returns the new parser context or NULL 11301 */ 11302xmlParserCtxtPtr 11303xmlCreateMemoryParserCtxt(const char *buffer, int size) { 11304 xmlParserCtxtPtr ctxt; 11305 xmlParserInputPtr input; 11306 xmlParserInputBufferPtr buf; 11307 11308 if (buffer == NULL) 11309 return(NULL); 11310 if (size <= 0) 11311 return(NULL); 11312 11313 ctxt = xmlNewParserCtxt(); 11314 if (ctxt == NULL) 11315 return(NULL); 11316 11317 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 11318 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 11319 if (buf == NULL) { 11320 xmlFreeParserCtxt(ctxt); 11321 return(NULL); 11322 } 11323 11324 input = xmlNewInputStream(ctxt); 11325 if (input == NULL) { 11326 xmlFreeParserInputBuffer(buf); 11327 xmlFreeParserCtxt(ctxt); 11328 return(NULL); 11329 } 11330 11331 input->filename = NULL; 11332 input->buf = buf; 11333 input->base = input->buf->buffer->content; 11334 input->cur = input->buf->buffer->content; 11335 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 11336 11337 inputPush(ctxt, input); 11338 return(ctxt); 11339} 11340 11341#ifdef LIBXML_SAX1_ENABLED 11342/** 11343 * xmlSAXParseMemoryWithData: 11344 * @sax: the SAX handler block 11345 * @buffer: an pointer to a char array 11346 * @size: the size of the array 11347 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11348 * documents 11349 * @data: the userdata 11350 * 11351 * parse an XML in-memory block and use the given SAX function block 11352 * to handle the parsing callback. If sax is NULL, fallback to the default 11353 * DOM tree building routines. 11354 * 11355 * User data (void *) is stored within the parser context in the 11356 * context's _private member, so it is available nearly everywhere in libxml 11357 * 11358 * Returns the resulting document tree 11359 */ 11360 11361xmlDocPtr 11362xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 11363 int size, int recovery, void *data) { 11364 xmlDocPtr ret; 11365 xmlParserCtxtPtr ctxt; 11366 11367 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11368 if (ctxt == NULL) return(NULL); 11369 if (sax != NULL) { 11370 if (ctxt->sax != NULL) 11371 xmlFree(ctxt->sax); 11372 ctxt->sax = sax; 11373 } 11374 xmlDetectSAX2(ctxt); 11375 if (data!=NULL) { 11376 ctxt->_private=data; 11377 } 11378 11379 ctxt->recovery = recovery; 11380 11381 xmlParseDocument(ctxt); 11382 11383 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11384 else { 11385 ret = NULL; 11386 xmlFreeDoc(ctxt->myDoc); 11387 ctxt->myDoc = NULL; 11388 } 11389 if (sax != NULL) 11390 ctxt->sax = NULL; 11391 xmlFreeParserCtxt(ctxt); 11392 11393 return(ret); 11394} 11395 11396/** 11397 * xmlSAXParseMemory: 11398 * @sax: the SAX handler block 11399 * @buffer: an pointer to a char array 11400 * @size: the size of the array 11401 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 11402 * documents 11403 * 11404 * parse an XML in-memory block and use the given SAX function block 11405 * to handle the parsing callback. If sax is NULL, fallback to the default 11406 * DOM tree building routines. 11407 * 11408 * Returns the resulting document tree 11409 */ 11410xmlDocPtr 11411xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 11412 int size, int recovery) { 11413 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 11414} 11415 11416/** 11417 * xmlParseMemory: 11418 * @buffer: an pointer to a char array 11419 * @size: the size of the array 11420 * 11421 * parse an XML in-memory block and build a tree. 11422 * 11423 * Returns the resulting document tree 11424 */ 11425 11426xmlDocPtr xmlParseMemory(const char *buffer, int size) { 11427 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 11428} 11429 11430/** 11431 * xmlRecoverMemory: 11432 * @buffer: an pointer to a char array 11433 * @size: the size of the array 11434 * 11435 * parse an XML in-memory block and build a tree. 11436 * In the case the document is not Well Formed, a tree is built anyway 11437 * 11438 * Returns the resulting document tree 11439 */ 11440 11441xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 11442 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 11443} 11444 11445/** 11446 * xmlSAXUserParseMemory: 11447 * @sax: a SAX handler 11448 * @user_data: The user data returned on SAX callbacks 11449 * @buffer: an in-memory XML document input 11450 * @size: the length of the XML document in bytes 11451 * 11452 * A better SAX parsing routine. 11453 * parse an XML in-memory buffer and call the given SAX handler routines. 11454 * 11455 * Returns 0 in case of success or a error number otherwise 11456 */ 11457int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 11458 const char *buffer, int size) { 11459 int ret = 0; 11460 xmlParserCtxtPtr ctxt; 11461 xmlSAXHandlerPtr oldsax = NULL; 11462 11463 if (sax == NULL) return -1; 11464 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11465 if (ctxt == NULL) return -1; 11466 oldsax = ctxt->sax; 11467 ctxt->sax = sax; 11468 xmlDetectSAX2(ctxt); 11469 if (user_data != NULL) 11470 ctxt->userData = user_data; 11471 11472 xmlParseDocument(ctxt); 11473 11474 if (ctxt->wellFormed) 11475 ret = 0; 11476 else { 11477 if (ctxt->errNo != 0) 11478 ret = ctxt->errNo; 11479 else 11480 ret = -1; 11481 } 11482 ctxt->sax = oldsax; 11483 xmlFreeParserCtxt(ctxt); 11484 11485 return ret; 11486} 11487#endif /* LIBXML_SAX1_ENABLED */ 11488 11489/** 11490 * xmlCreateDocParserCtxt: 11491 * @cur: a pointer to an array of xmlChar 11492 * 11493 * Creates a parser context for an XML in-memory document. 11494 * 11495 * Returns the new parser context or NULL 11496 */ 11497xmlParserCtxtPtr 11498xmlCreateDocParserCtxt(const xmlChar *cur) { 11499 int len; 11500 11501 if (cur == NULL) 11502 return(NULL); 11503 len = xmlStrlen(cur); 11504 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 11505} 11506 11507#ifdef LIBXML_SAX1_ENABLED 11508/** 11509 * xmlSAXParseDoc: 11510 * @sax: the SAX handler block 11511 * @cur: a pointer to an array of xmlChar 11512 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11513 * documents 11514 * 11515 * parse an XML in-memory document and build a tree. 11516 * It use the given SAX function block to handle the parsing callback. 11517 * If sax is NULL, fallback to the default DOM tree building routines. 11518 * 11519 * Returns the resulting document tree 11520 */ 11521 11522xmlDocPtr 11523xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 11524 xmlDocPtr ret; 11525 xmlParserCtxtPtr ctxt; 11526 11527 if (cur == NULL) return(NULL); 11528 11529 11530 ctxt = xmlCreateDocParserCtxt(cur); 11531 if (ctxt == NULL) return(NULL); 11532 if (sax != NULL) { 11533 ctxt->sax = sax; 11534 ctxt->userData = NULL; 11535 } 11536 xmlDetectSAX2(ctxt); 11537 11538 xmlParseDocument(ctxt); 11539 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11540 else { 11541 ret = NULL; 11542 xmlFreeDoc(ctxt->myDoc); 11543 ctxt->myDoc = NULL; 11544 } 11545 if (sax != NULL) 11546 ctxt->sax = NULL; 11547 xmlFreeParserCtxt(ctxt); 11548 11549 return(ret); 11550} 11551 11552/** 11553 * xmlParseDoc: 11554 * @cur: a pointer to an array of xmlChar 11555 * 11556 * parse an XML in-memory document and build a tree. 11557 * 11558 * Returns the resulting document tree 11559 */ 11560 11561xmlDocPtr 11562xmlParseDoc(xmlChar *cur) { 11563 return(xmlSAXParseDoc(NULL, cur, 0)); 11564} 11565#endif /* LIBXML_SAX1_ENABLED */ 11566 11567#ifdef LIBXML_LEGACY_ENABLED 11568/************************************************************************ 11569 * * 11570 * Specific function to keep track of entities references * 11571 * and used by the XSLT debugger * 11572 * * 11573 ************************************************************************/ 11574 11575static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 11576 11577/** 11578 * xmlAddEntityReference: 11579 * @ent : A valid entity 11580 * @firstNode : A valid first node for children of entity 11581 * @lastNode : A valid last node of children entity 11582 * 11583 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 11584 */ 11585static void 11586xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 11587 xmlNodePtr lastNode) 11588{ 11589 if (xmlEntityRefFunc != NULL) { 11590 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 11591 } 11592} 11593 11594 11595/** 11596 * xmlSetEntityReferenceFunc: 11597 * @func: A valid function 11598 * 11599 * Set the function to call call back when a xml reference has been made 11600 */ 11601void 11602xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 11603{ 11604 xmlEntityRefFunc = func; 11605} 11606#endif /* LIBXML_LEGACY_ENABLED */ 11607 11608/************************************************************************ 11609 * * 11610 * Miscellaneous * 11611 * * 11612 ************************************************************************/ 11613 11614#ifdef LIBXML_XPATH_ENABLED 11615#include <libxml/xpath.h> 11616#endif 11617 11618extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 11619static int xmlParserInitialized = 0; 11620 11621/** 11622 * xmlInitParser: 11623 * 11624 * Initialization function for the XML parser. 11625 * This is not reentrant. Call once before processing in case of 11626 * use in multithreaded programs. 11627 */ 11628 11629void 11630xmlInitParser(void) { 11631 if (xmlParserInitialized != 0) 11632 return; 11633 11634 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 11635 (xmlGenericError == NULL)) 11636 initGenericErrorDefaultFunc(NULL); 11637 xmlInitGlobals(); 11638 xmlInitThreads(); 11639 xmlInitMemory(); 11640 xmlInitCharEncodingHandlers(); 11641 xmlDefaultSAXHandlerInit(); 11642 xmlRegisterDefaultInputCallbacks(); 11643#ifdef LIBXML_OUTPUT_ENABLED 11644 xmlRegisterDefaultOutputCallbacks(); 11645#endif /* LIBXML_OUTPUT_ENABLED */ 11646#ifdef LIBXML_HTML_ENABLED 11647 htmlInitAutoClose(); 11648 htmlDefaultSAXHandlerInit(); 11649#endif 11650#ifdef LIBXML_XPATH_ENABLED 11651 xmlXPathInit(); 11652#endif 11653 xmlParserInitialized = 1; 11654} 11655 11656/** 11657 * xmlCleanupParser: 11658 * 11659 * Cleanup function for the XML library. It tries to reclaim all 11660 * parsing related global memory allocated for the library processing. 11661 * It doesn't deallocate any document related memory. Calling this 11662 * function should not prevent reusing the library but one should 11663 * call xmlCleanupParser() only when the process has 11664 * finished using the library or XML document built with it. 11665 */ 11666 11667void 11668xmlCleanupParser(void) { 11669 if (!xmlParserInitialized) 11670 return; 11671 11672 xmlCleanupCharEncodingHandlers(); 11673#ifdef LIBXML_CATALOG_ENABLED 11674 xmlCatalogCleanup(); 11675#endif 11676 xmlCleanupInputCallbacks(); 11677#ifdef LIBXML_OUTPUT_ENABLED 11678 xmlCleanupOutputCallbacks(); 11679#endif 11680 xmlCleanupGlobals(); 11681 xmlResetLastError(); 11682 xmlCleanupThreads(); /* must be last if called not from the main thread */ 11683 xmlCleanupMemory(); 11684 xmlParserInitialized = 0; 11685} 11686 11687/************************************************************************ 11688 * * 11689 * New set (2.6.0) of simpler and more flexible APIs * 11690 * * 11691 ************************************************************************/ 11692 11693/** 11694 * DICT_FREE: 11695 * @str: a string 11696 * 11697 * Free a string if it is not owned by the "dict" dictionnary in the 11698 * current scope 11699 */ 11700#define DICT_FREE(str) \ 11701 if ((str) && ((!dict) || \ 11702 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 11703 xmlFree((char *)(str)); 11704 11705/** 11706 * xmlCtxtReset: 11707 * @ctxt: an XML parser context 11708 * 11709 * Reset a parser context 11710 */ 11711void 11712xmlCtxtReset(xmlParserCtxtPtr ctxt) 11713{ 11714 xmlParserInputPtr input; 11715 xmlDictPtr dict = ctxt->dict; 11716 11717 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 11718 xmlFreeInputStream(input); 11719 } 11720 ctxt->inputNr = 0; 11721 ctxt->input = NULL; 11722 11723 ctxt->spaceNr = 0; 11724 ctxt->spaceTab[0] = -1; 11725 ctxt->space = &ctxt->spaceTab[0]; 11726 11727 11728 ctxt->nodeNr = 0; 11729 ctxt->node = NULL; 11730 11731 ctxt->nameNr = 0; 11732 ctxt->name = NULL; 11733 11734 DICT_FREE(ctxt->version); 11735 ctxt->version = NULL; 11736 DICT_FREE(ctxt->encoding); 11737 ctxt->encoding = NULL; 11738 DICT_FREE(ctxt->directory); 11739 ctxt->directory = NULL; 11740 DICT_FREE(ctxt->extSubURI); 11741 ctxt->extSubURI = NULL; 11742 DICT_FREE(ctxt->extSubSystem); 11743 ctxt->extSubSystem = NULL; 11744 if (ctxt->myDoc != NULL) 11745 xmlFreeDoc(ctxt->myDoc); 11746 ctxt->myDoc = NULL; 11747 11748 ctxt->standalone = -1; 11749 ctxt->hasExternalSubset = 0; 11750 ctxt->hasPErefs = 0; 11751 ctxt->html = 0; 11752 ctxt->external = 0; 11753 ctxt->instate = XML_PARSER_START; 11754 ctxt->token = 0; 11755 11756 ctxt->wellFormed = 1; 11757 ctxt->nsWellFormed = 1; 11758 ctxt->disableSAX = 0; 11759 ctxt->valid = 1; 11760 ctxt->vctxt.userData = ctxt; 11761 ctxt->vctxt.error = xmlParserValidityError; 11762 ctxt->vctxt.warning = xmlParserValidityWarning; 11763 ctxt->record_info = 0; 11764 ctxt->nbChars = 0; 11765 ctxt->checkIndex = 0; 11766 ctxt->inSubset = 0; 11767 ctxt->errNo = XML_ERR_OK; 11768 ctxt->depth = 0; 11769 ctxt->charset = XML_CHAR_ENCODING_UTF8; 11770 ctxt->catalogs = NULL; 11771 xmlInitNodeInfoSeq(&ctxt->node_seq); 11772 11773 if (ctxt->attsDefault != NULL) { 11774 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 11775 ctxt->attsDefault = NULL; 11776 } 11777 if (ctxt->attsSpecial != NULL) { 11778 xmlHashFree(ctxt->attsSpecial, NULL); 11779 ctxt->attsSpecial = NULL; 11780 } 11781 11782#ifdef LIBXML_CATALOG_ENABLED 11783 if (ctxt->catalogs != NULL) 11784 xmlCatalogFreeLocal(ctxt->catalogs); 11785#endif 11786 if (ctxt->lastError.code != XML_ERR_OK) 11787 xmlResetError(&ctxt->lastError); 11788} 11789 11790/** 11791 * xmlCtxtResetPush: 11792 * @ctxt: an XML parser context 11793 * @chunk: a pointer to an array of chars 11794 * @size: number of chars in the array 11795 * @filename: an optional file name or URI 11796 * @encoding: the document encoding, or NULL 11797 * 11798 * Reset a push parser context 11799 * 11800 * Returns 0 in case of success and 1 in case of error 11801 */ 11802int 11803xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 11804 int size, const char *filename, const char *encoding) 11805{ 11806 xmlParserInputPtr inputStream; 11807 xmlParserInputBufferPtr buf; 11808 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11809 11810 if (ctxt == NULL) 11811 return(1); 11812 11813 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 11814 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11815 11816 buf = xmlAllocParserInputBuffer(enc); 11817 if (buf == NULL) 11818 return(1); 11819 11820 if (ctxt == NULL) { 11821 xmlFreeParserInputBuffer(buf); 11822 return(1); 11823 } 11824 11825 xmlCtxtReset(ctxt); 11826 11827 if (ctxt->pushTab == NULL) { 11828 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 11829 sizeof(xmlChar *)); 11830 if (ctxt->pushTab == NULL) { 11831 xmlErrMemory(ctxt, NULL); 11832 xmlFreeParserInputBuffer(buf); 11833 return(1); 11834 } 11835 } 11836 11837 if (filename == NULL) { 11838 ctxt->directory = NULL; 11839 } else { 11840 ctxt->directory = xmlParserGetDirectory(filename); 11841 } 11842 11843 inputStream = xmlNewInputStream(ctxt); 11844 if (inputStream == NULL) { 11845 xmlFreeParserInputBuffer(buf); 11846 return(1); 11847 } 11848 11849 if (filename == NULL) 11850 inputStream->filename = NULL; 11851 else 11852 inputStream->filename = (char *) 11853 xmlCanonicPath((const xmlChar *) filename); 11854 inputStream->buf = buf; 11855 inputStream->base = inputStream->buf->buffer->content; 11856 inputStream->cur = inputStream->buf->buffer->content; 11857 inputStream->end = 11858 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11859 11860 inputPush(ctxt, inputStream); 11861 11862 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11863 (ctxt->input->buf != NULL)) { 11864 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11865 int cur = ctxt->input->cur - ctxt->input->base; 11866 11867 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11868 11869 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11870 ctxt->input->cur = ctxt->input->base + cur; 11871 ctxt->input->end = 11872 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 11873 use]; 11874#ifdef DEBUG_PUSH 11875 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11876#endif 11877 } 11878 11879 if (encoding != NULL) { 11880 xmlCharEncodingHandlerPtr hdlr; 11881 11882 hdlr = xmlFindCharEncodingHandler(encoding); 11883 if (hdlr != NULL) { 11884 xmlSwitchToEncoding(ctxt, hdlr); 11885 } else { 11886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 11887 "Unsupported encoding %s\n", BAD_CAST encoding); 11888 } 11889 } else if (enc != XML_CHAR_ENCODING_NONE) { 11890 xmlSwitchEncoding(ctxt, enc); 11891 } 11892 11893 return(0); 11894} 11895 11896/** 11897 * xmlCtxtUseOptions: 11898 * @ctxt: an XML parser context 11899 * @options: a combination of xmlParserOption 11900 * 11901 * Applies the options to the parser context 11902 * 11903 * Returns 0 in case of success, the set of unknown or unimplemented options 11904 * in case of error. 11905 */ 11906int 11907xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 11908{ 11909 if (options & XML_PARSE_RECOVER) { 11910 ctxt->recovery = 1; 11911 options -= XML_PARSE_RECOVER; 11912 } else 11913 ctxt->recovery = 0; 11914 if (options & XML_PARSE_DTDLOAD) { 11915 ctxt->loadsubset = XML_DETECT_IDS; 11916 options -= XML_PARSE_DTDLOAD; 11917 } else 11918 ctxt->loadsubset = 0; 11919 if (options & XML_PARSE_DTDATTR) { 11920 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 11921 options -= XML_PARSE_DTDATTR; 11922 } 11923 if (options & XML_PARSE_NOENT) { 11924 ctxt->replaceEntities = 1; 11925 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 11926 options -= XML_PARSE_NOENT; 11927 } else 11928 ctxt->replaceEntities = 0; 11929 if (options & XML_PARSE_NOWARNING) { 11930 ctxt->sax->warning = NULL; 11931 options -= XML_PARSE_NOWARNING; 11932 } 11933 if (options & XML_PARSE_NOERROR) { 11934 ctxt->sax->error = NULL; 11935 ctxt->sax->fatalError = NULL; 11936 options -= XML_PARSE_NOERROR; 11937 } 11938 if (options & XML_PARSE_PEDANTIC) { 11939 ctxt->pedantic = 1; 11940 options -= XML_PARSE_PEDANTIC; 11941 } else 11942 ctxt->pedantic = 0; 11943 if (options & XML_PARSE_NOBLANKS) { 11944 ctxt->keepBlanks = 0; 11945 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 11946 options -= XML_PARSE_NOBLANKS; 11947 } else 11948 ctxt->keepBlanks = 1; 11949 if (options & XML_PARSE_DTDVALID) { 11950 ctxt->validate = 1; 11951 if (options & XML_PARSE_NOWARNING) 11952 ctxt->vctxt.warning = NULL; 11953 if (options & XML_PARSE_NOERROR) 11954 ctxt->vctxt.error = NULL; 11955 options -= XML_PARSE_DTDVALID; 11956 } else 11957 ctxt->validate = 0; 11958#ifdef LIBXML_SAX1_ENABLED 11959 if (options & XML_PARSE_SAX1) { 11960 ctxt->sax->startElement = xmlSAX2StartElement; 11961 ctxt->sax->endElement = xmlSAX2EndElement; 11962 ctxt->sax->startElementNs = NULL; 11963 ctxt->sax->endElementNs = NULL; 11964 ctxt->sax->initialized = 1; 11965 options -= XML_PARSE_SAX1; 11966 } 11967#endif /* LIBXML_SAX1_ENABLED */ 11968 if (options & XML_PARSE_NODICT) { 11969 ctxt->dictNames = 0; 11970 options -= XML_PARSE_NODICT; 11971 } else { 11972 ctxt->dictNames = 1; 11973 } 11974 if (options & XML_PARSE_NOCDATA) { 11975 ctxt->sax->cdataBlock = NULL; 11976 options -= XML_PARSE_NOCDATA; 11977 } 11978 if (options & XML_PARSE_NSCLEAN) { 11979 ctxt->options |= XML_PARSE_NSCLEAN; 11980 options -= XML_PARSE_NSCLEAN; 11981 } 11982 if (options & XML_PARSE_NONET) { 11983 ctxt->options |= XML_PARSE_NONET; 11984 options -= XML_PARSE_NONET; 11985 } 11986 ctxt->linenumbers = 1; 11987 return (options); 11988} 11989 11990/** 11991 * xmlDoRead: 11992 * @ctxt: an XML parser context 11993 * @URL: the base URL to use for the document 11994 * @encoding: the document encoding, or NULL 11995 * @options: a combination of xmlParserOption 11996 * @reuse: keep the context for reuse 11997 * 11998 * Common front-end for the xmlRead functions 11999 * 12000 * Returns the resulting document tree or NULL 12001 */ 12002static xmlDocPtr 12003xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 12004 int options, int reuse) 12005{ 12006 xmlDocPtr ret; 12007 12008 xmlCtxtUseOptions(ctxt, options); 12009 if (encoding != NULL) { 12010 xmlCharEncodingHandlerPtr hdlr; 12011 12012 hdlr = xmlFindCharEncodingHandler(encoding); 12013 if (hdlr != NULL) 12014 xmlSwitchToEncoding(ctxt, hdlr); 12015 } 12016 if ((URL != NULL) && (ctxt->input != NULL) && 12017 (ctxt->input->filename == NULL)) 12018 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 12019 xmlParseDocument(ctxt); 12020 if ((ctxt->wellFormed) || ctxt->recovery) 12021 ret = ctxt->myDoc; 12022 else { 12023 ret = NULL; 12024 if (ctxt->myDoc != NULL) { 12025 if ((ctxt->dictNames) && 12026 (ctxt->myDoc->dict == ctxt->dict)) 12027 xmlDictReference(ctxt->dict); 12028 xmlFreeDoc(ctxt->myDoc); 12029 } 12030 } 12031 ctxt->myDoc = NULL; 12032 if (!reuse) { 12033 if ((ctxt->dictNames) && 12034 (ret != NULL) && 12035 (ret->dict == ctxt->dict)) 12036 ctxt->dict = NULL; 12037 xmlFreeParserCtxt(ctxt); 12038 } else { 12039 /* Must duplicate the reference to the dictionary */ 12040 if ((ctxt->dictNames) && 12041 (ret != NULL) && 12042 (ret->dict == ctxt->dict)) 12043 xmlDictReference(ctxt->dict); 12044 } 12045 12046 return (ret); 12047} 12048 12049/** 12050 * xmlReadDoc: 12051 * @cur: a pointer to a zero terminated string 12052 * @URL: the base URL to use for the document 12053 * @encoding: the document encoding, or NULL 12054 * @options: a combination of xmlParserOption 12055 * 12056 * parse an XML in-memory document and build a tree. 12057 * 12058 * Returns the resulting document tree 12059 */ 12060xmlDocPtr 12061xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 12062{ 12063 xmlParserCtxtPtr ctxt; 12064 12065 if (cur == NULL) 12066 return (NULL); 12067 12068 ctxt = xmlCreateDocParserCtxt(cur); 12069 if (ctxt == NULL) 12070 return (NULL); 12071 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12072} 12073 12074/** 12075 * xmlReadFile: 12076 * @filename: a file or URL 12077 * @encoding: the document encoding, or NULL 12078 * @options: a combination of xmlParserOption 12079 * 12080 * parse an XML file from the filesystem or the network. 12081 * 12082 * Returns the resulting document tree 12083 */ 12084xmlDocPtr 12085xmlReadFile(const char *filename, const char *encoding, int options) 12086{ 12087 xmlParserCtxtPtr ctxt; 12088 12089 ctxt = xmlCreateURLParserCtxt(filename, options); 12090 if (ctxt == NULL) 12091 return (NULL); 12092 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 12093} 12094 12095/** 12096 * xmlReadMemory: 12097 * @buffer: a pointer to a char array 12098 * @size: the size of the array 12099 * @URL: the base URL to use for the document 12100 * @encoding: the document encoding, or NULL 12101 * @options: a combination of xmlParserOption 12102 * 12103 * parse an XML in-memory document and build a tree. 12104 * 12105 * Returns the resulting document tree 12106 */ 12107xmlDocPtr 12108xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 12109{ 12110 xmlParserCtxtPtr ctxt; 12111 12112 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12113 if (ctxt == NULL) 12114 return (NULL); 12115 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12116} 12117 12118/** 12119 * xmlReadFd: 12120 * @fd: an open file descriptor 12121 * @URL: the base URL to use for the document 12122 * @encoding: the document encoding, or NULL 12123 * @options: a combination of xmlParserOption 12124 * 12125 * parse an XML from a file descriptor and build a tree. 12126 * NOTE that the file descriptor will not be closed when the 12127 * reader is closed or reset. 12128 * 12129 * Returns the resulting document tree 12130 */ 12131xmlDocPtr 12132xmlReadFd(int fd, const char *URL, const char *encoding, int options) 12133{ 12134 xmlParserCtxtPtr ctxt; 12135 xmlParserInputBufferPtr input; 12136 xmlParserInputPtr stream; 12137 12138 if (fd < 0) 12139 return (NULL); 12140 12141 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12142 if (input == NULL) 12143 return (NULL); 12144 input->closecallback = NULL; 12145 ctxt = xmlNewParserCtxt(); 12146 if (ctxt == NULL) { 12147 xmlFreeParserInputBuffer(input); 12148 return (NULL); 12149 } 12150 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12151 if (stream == NULL) { 12152 xmlFreeParserInputBuffer(input); 12153 xmlFreeParserCtxt(ctxt); 12154 return (NULL); 12155 } 12156 inputPush(ctxt, stream); 12157 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12158} 12159 12160/** 12161 * xmlReadIO: 12162 * @ioread: an I/O read function 12163 * @ioclose: an I/O close function 12164 * @ioctx: an I/O handler 12165 * @URL: the base URL to use for the document 12166 * @encoding: the document encoding, or NULL 12167 * @options: a combination of xmlParserOption 12168 * 12169 * parse an XML document from I/O functions and source and build a tree. 12170 * 12171 * Returns the resulting document tree 12172 */ 12173xmlDocPtr 12174xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12175 void *ioctx, const char *URL, const char *encoding, int options) 12176{ 12177 xmlParserCtxtPtr ctxt; 12178 xmlParserInputBufferPtr input; 12179 xmlParserInputPtr stream; 12180 12181 if (ioread == NULL) 12182 return (NULL); 12183 12184 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12185 XML_CHAR_ENCODING_NONE); 12186 if (input == NULL) 12187 return (NULL); 12188 ctxt = xmlNewParserCtxt(); 12189 if (ctxt == NULL) { 12190 xmlFreeParserInputBuffer(input); 12191 return (NULL); 12192 } 12193 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12194 if (stream == NULL) { 12195 xmlFreeParserInputBuffer(input); 12196 xmlFreeParserCtxt(ctxt); 12197 return (NULL); 12198 } 12199 inputPush(ctxt, stream); 12200 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12201} 12202 12203/** 12204 * xmlCtxtReadDoc: 12205 * @ctxt: an XML parser context 12206 * @cur: a pointer to a zero terminated string 12207 * @URL: the base URL to use for the document 12208 * @encoding: the document encoding, or NULL 12209 * @options: a combination of xmlParserOption 12210 * 12211 * parse an XML in-memory document and build a tree. 12212 * This reuses the existing @ctxt parser context 12213 * 12214 * Returns the resulting document tree 12215 */ 12216xmlDocPtr 12217xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 12218 const char *URL, const char *encoding, int options) 12219{ 12220 xmlParserInputPtr stream; 12221 12222 if (cur == NULL) 12223 return (NULL); 12224 if (ctxt == NULL) 12225 return (NULL); 12226 12227 xmlCtxtReset(ctxt); 12228 12229 stream = xmlNewStringInputStream(ctxt, cur); 12230 if (stream == NULL) { 12231 return (NULL); 12232 } 12233 inputPush(ctxt, stream); 12234 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12235} 12236 12237/** 12238 * xmlCtxtReadFile: 12239 * @ctxt: an XML parser context 12240 * @filename: a file or URL 12241 * @encoding: the document encoding, or NULL 12242 * @options: a combination of xmlParserOption 12243 * 12244 * parse an XML file from the filesystem or the network. 12245 * This reuses the existing @ctxt parser context 12246 * 12247 * Returns the resulting document tree 12248 */ 12249xmlDocPtr 12250xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 12251 const char *encoding, int options) 12252{ 12253 xmlParserInputPtr stream; 12254 12255 if (filename == NULL) 12256 return (NULL); 12257 if (ctxt == NULL) 12258 return (NULL); 12259 12260 xmlCtxtReset(ctxt); 12261 12262 stream = xmlNewInputFromFile(ctxt, filename); 12263 if (stream == NULL) { 12264 return (NULL); 12265 } 12266 inputPush(ctxt, stream); 12267 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 12268} 12269 12270/** 12271 * xmlCtxtReadMemory: 12272 * @ctxt: an XML parser context 12273 * @buffer: a pointer to a char array 12274 * @size: the size of the array 12275 * @URL: the base URL to use for the document 12276 * @encoding: the document encoding, or NULL 12277 * @options: a combination of xmlParserOption 12278 * 12279 * parse an XML in-memory document and build a tree. 12280 * This reuses the existing @ctxt parser context 12281 * 12282 * Returns the resulting document tree 12283 */ 12284xmlDocPtr 12285xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 12286 const char *URL, const char *encoding, int options) 12287{ 12288 xmlParserInputBufferPtr input; 12289 xmlParserInputPtr stream; 12290 12291 if (ctxt == NULL) 12292 return (NULL); 12293 if (buffer == NULL) 12294 return (NULL); 12295 12296 xmlCtxtReset(ctxt); 12297 12298 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12299 if (input == NULL) { 12300 return(NULL); 12301 } 12302 12303 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12304 if (stream == NULL) { 12305 xmlFreeParserInputBuffer(input); 12306 return(NULL); 12307 } 12308 12309 inputPush(ctxt, stream); 12310 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12311} 12312 12313/** 12314 * xmlCtxtReadFd: 12315 * @ctxt: an XML parser context 12316 * @fd: an open file descriptor 12317 * @URL: the base URL to use for the document 12318 * @encoding: the document encoding, or NULL 12319 * @options: a combination of xmlParserOption 12320 * 12321 * parse an XML from a file descriptor and build a tree. 12322 * This reuses the existing @ctxt parser context 12323 * NOTE that the file descriptor will not be closed when the 12324 * reader is closed or reset. 12325 * 12326 * Returns the resulting document tree 12327 */ 12328xmlDocPtr 12329xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 12330 const char *URL, const char *encoding, int options) 12331{ 12332 xmlParserInputBufferPtr input; 12333 xmlParserInputPtr stream; 12334 12335 if (fd < 0) 12336 return (NULL); 12337 if (ctxt == NULL) 12338 return (NULL); 12339 12340 xmlCtxtReset(ctxt); 12341 12342 12343 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12344 if (input == NULL) 12345 return (NULL); 12346 input->closecallback = NULL; 12347 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12348 if (stream == NULL) { 12349 xmlFreeParserInputBuffer(input); 12350 return (NULL); 12351 } 12352 inputPush(ctxt, stream); 12353 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12354} 12355 12356/** 12357 * xmlCtxtReadIO: 12358 * @ctxt: an XML parser context 12359 * @ioread: an I/O read function 12360 * @ioclose: an I/O close function 12361 * @ioctx: an I/O handler 12362 * @URL: the base URL to use for the document 12363 * @encoding: the document encoding, or NULL 12364 * @options: a combination of xmlParserOption 12365 * 12366 * parse an XML document from I/O functions and source and build a tree. 12367 * This reuses the existing @ctxt parser context 12368 * 12369 * Returns the resulting document tree 12370 */ 12371xmlDocPtr 12372xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 12373 xmlInputCloseCallback ioclose, void *ioctx, 12374 const char *URL, 12375 const char *encoding, int options) 12376{ 12377 xmlParserInputBufferPtr input; 12378 xmlParserInputPtr stream; 12379 12380 if (ioread == NULL) 12381 return (NULL); 12382 if (ctxt == NULL) 12383 return (NULL); 12384 12385 xmlCtxtReset(ctxt); 12386 12387 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12388 XML_CHAR_ENCODING_NONE); 12389 if (input == NULL) 12390 return (NULL); 12391 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12392 if (stream == NULL) { 12393 xmlFreeParserInputBuffer(input); 12394 return (NULL); 12395 } 12396 inputPush(ctxt, stream); 12397 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12398} 12399