parser.c revision a07050ddac5c9160974bc65b1369309d36fa13f4
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60 61#ifdef HAVE_CTYPE_H 62#include <ctype.h> 63#endif 64#ifdef HAVE_STDLIB_H 65#include <stdlib.h> 66#endif 67#ifdef HAVE_SYS_STAT_H 68#include <sys/stat.h> 69#endif 70#ifdef HAVE_FCNTL_H 71#include <fcntl.h> 72#endif 73#ifdef HAVE_UNISTD_H 74#include <unistd.h> 75#endif 76#ifdef HAVE_ZLIB_H 77#include <zlib.h> 78#endif 79 80/** 81 * xmlParserMaxDepth: 82 * 83 * arbitrary depth limit for the XML documents that we allow to 84 * process. This is not a limitation of the parser but a safety 85 * boundary feature. 86 */ 87unsigned int xmlParserMaxDepth = 1024; 88 89#define SAX2 1 90 91#define XML_PARSER_BIG_BUFFER_SIZE 300 92#define XML_PARSER_BUFFER_SIZE 100 93 94#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 95 96/* 97 * List of XML prefixed PI allowed by W3C specs 98 */ 99 100static const char *xmlW3CPIs[] = { 101 "xml-stylesheet", 102 NULL 103}; 104 105 106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 108 const xmlChar **str); 109 110static xmlParserErrors 111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 112 xmlSAXHandlerPtr sax, 113 void *user_data, int depth, const xmlChar *URL, 114 const xmlChar *ID, xmlNodePtr *list); 115 116#ifdef LIBXML_LEGACY_ENABLED 117static void 118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 119 xmlNodePtr lastNode); 120#endif /* LIBXML_LEGACY_ENABLED */ 121 122static xmlParserErrors 123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 124 const xmlChar *string, void *user_data, xmlNodePtr *lst); 125 126/************************************************************************ 127 * * 128 * Some factorized error routines * 129 * * 130 ************************************************************************/ 131 132/** 133 * xmlErrAttributeDup: 134 * @ctxt: an XML parser context 135 * @prefix: the attribute prefix 136 * @localname: the attribute localname 137 * 138 * Handle a redefinition of attribute error 139 */ 140static void 141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 142 const xmlChar * localname) 143{ 144 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 145 if (prefix == NULL) 146 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 147 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 148 (const char *) localname, NULL, NULL, 0, 0, 149 "Attribute %s redefined\n", localname); 150 else 151 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 152 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 153 (const char *) prefix, (const char *) localname, 154 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 155 localname); 156 ctxt->wellFormed = 0; 157 if (ctxt->recovery == 0) 158 ctxt->disableSAX = 1; 159} 160 161/** 162 * xmlFatalErr: 163 * @ctxt: an XML parser context 164 * @error: the error number 165 * @extra: extra information string 166 * 167 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 168 */ 169static void 170xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 171{ 172 const char *errmsg; 173 174 switch (error) { 175 case XML_ERR_INVALID_HEX_CHARREF: 176 errmsg = "CharRef: invalid hexadecimal value\n"; 177 break; 178 case XML_ERR_INVALID_DEC_CHARREF: 179 errmsg = "CharRef: invalid decimal value\n"; 180 break; 181 case XML_ERR_INVALID_CHARREF: 182 errmsg = "CharRef: invalid value\n"; 183 break; 184 case XML_ERR_INTERNAL_ERROR: 185 errmsg = "internal error"; 186 break; 187 case XML_ERR_PEREF_AT_EOF: 188 errmsg = "PEReference at end of document\n"; 189 break; 190 case XML_ERR_PEREF_IN_PROLOG: 191 errmsg = "PEReference in prolog\n"; 192 break; 193 case XML_ERR_PEREF_IN_EPILOG: 194 errmsg = "PEReference in epilog\n"; 195 break; 196 case XML_ERR_PEREF_NO_NAME: 197 errmsg = "PEReference: no name\n"; 198 break; 199 case XML_ERR_PEREF_SEMICOL_MISSING: 200 errmsg = "PEReference: expecting ';'\n"; 201 break; 202 case XML_ERR_ENTITY_LOOP: 203 errmsg = "Detected an entity reference loop\n"; 204 break; 205 case XML_ERR_ENTITY_NOT_STARTED: 206 errmsg = "EntityValue: \" or ' expected\n"; 207 break; 208 case XML_ERR_ENTITY_PE_INTERNAL: 209 errmsg = "PEReferences forbidden in internal subset\n"; 210 break; 211 case XML_ERR_ENTITY_NOT_FINISHED: 212 errmsg = "EntityValue: \" or ' expected\n"; 213 break; 214 case XML_ERR_ATTRIBUTE_NOT_STARTED: 215 errmsg = "AttValue: \" or ' expected\n"; 216 break; 217 case XML_ERR_LT_IN_ATTRIBUTE: 218 errmsg = "Unescaped '<' not allowed in attributes values\n"; 219 break; 220 case XML_ERR_LITERAL_NOT_STARTED: 221 errmsg = "SystemLiteral \" or ' expected\n"; 222 break; 223 case XML_ERR_LITERAL_NOT_FINISHED: 224 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 225 break; 226 case XML_ERR_MISPLACED_CDATA_END: 227 errmsg = "Sequence ']]>' not allowed in content\n"; 228 break; 229 case XML_ERR_URI_REQUIRED: 230 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 231 break; 232 case XML_ERR_PUBID_REQUIRED: 233 errmsg = "PUBLIC, the Public Identifier is missing\n"; 234 break; 235 case XML_ERR_HYPHEN_IN_COMMENT: 236 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 237 break; 238 case XML_ERR_PI_NOT_STARTED: 239 errmsg = "xmlParsePI : no target name\n"; 240 break; 241 case XML_ERR_RESERVED_XML_NAME: 242 errmsg = "Invalid PI name\n"; 243 break; 244 case XML_ERR_NOTATION_NOT_STARTED: 245 errmsg = "NOTATION: Name expected here\n"; 246 break; 247 case XML_ERR_NOTATION_NOT_FINISHED: 248 errmsg = "'>' required to close NOTATION declaration\n"; 249 break; 250 case XML_ERR_VALUE_REQUIRED: 251 errmsg = "Entity value required\n"; 252 break; 253 case XML_ERR_URI_FRAGMENT: 254 errmsg = "Fragment not allowed"; 255 break; 256 case XML_ERR_ATTLIST_NOT_STARTED: 257 errmsg = "'(' required to start ATTLIST enumeration\n"; 258 break; 259 case XML_ERR_NMTOKEN_REQUIRED: 260 errmsg = "NmToken expected in ATTLIST enumeration\n"; 261 break; 262 case XML_ERR_ATTLIST_NOT_FINISHED: 263 errmsg = "')' required to finish ATTLIST enumeration\n"; 264 break; 265 case XML_ERR_MIXED_NOT_STARTED: 266 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 267 break; 268 case XML_ERR_PCDATA_REQUIRED: 269 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 270 break; 271 case XML_ERR_ELEMCONTENT_NOT_STARTED: 272 errmsg = "ContentDecl : Name or '(' expected\n"; 273 break; 274 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 275 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 276 break; 277 case XML_ERR_PEREF_IN_INT_SUBSET: 278 errmsg = 279 "PEReference: forbidden within markup decl in internal subset\n"; 280 break; 281 case XML_ERR_GT_REQUIRED: 282 errmsg = "expected '>'\n"; 283 break; 284 case XML_ERR_CONDSEC_INVALID: 285 errmsg = "XML conditional section '[' expected\n"; 286 break; 287 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 288 errmsg = "Content error in the external subset\n"; 289 break; 290 case XML_ERR_CONDSEC_INVALID_KEYWORD: 291 errmsg = 292 "conditional section INCLUDE or IGNORE keyword expected\n"; 293 break; 294 case XML_ERR_CONDSEC_NOT_FINISHED: 295 errmsg = "XML conditional section not closed\n"; 296 break; 297 case XML_ERR_XMLDECL_NOT_STARTED: 298 errmsg = "Text declaration '<?xml' required\n"; 299 break; 300 case XML_ERR_XMLDECL_NOT_FINISHED: 301 errmsg = "parsing XML declaration: '?>' expected\n"; 302 break; 303 case XML_ERR_EXT_ENTITY_STANDALONE: 304 errmsg = "external parsed entities cannot be standalone\n"; 305 break; 306 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 307 errmsg = "EntityRef: expecting ';'\n"; 308 break; 309 case XML_ERR_DOCTYPE_NOT_FINISHED: 310 errmsg = "DOCTYPE improperly terminated\n"; 311 break; 312 case XML_ERR_LTSLASH_REQUIRED: 313 errmsg = "EndTag: '</' not found\n"; 314 break; 315 case XML_ERR_EQUAL_REQUIRED: 316 errmsg = "expected '='\n"; 317 break; 318 case XML_ERR_STRING_NOT_CLOSED: 319 errmsg = "String not closed expecting \" or '\n"; 320 break; 321 case XML_ERR_STRING_NOT_STARTED: 322 errmsg = "String not started expecting ' or \"\n"; 323 break; 324 case XML_ERR_ENCODING_NAME: 325 errmsg = "Invalid XML encoding name\n"; 326 break; 327 case XML_ERR_STANDALONE_VALUE: 328 errmsg = "standalone accepts only 'yes' or 'no'\n"; 329 break; 330 case XML_ERR_DOCUMENT_EMPTY: 331 errmsg = "Document is empty\n"; 332 break; 333 case XML_ERR_DOCUMENT_END: 334 errmsg = "Extra content at the end of the document\n"; 335 break; 336 case XML_ERR_NOT_WELL_BALANCED: 337 errmsg = "chunk is not well balanced\n"; 338 break; 339 case XML_ERR_EXTRA_CONTENT: 340 errmsg = "extra content at the end of well balanced chunk\n"; 341 break; 342 case XML_ERR_VERSION_MISSING: 343 errmsg = "Malformed declaration expecting version\n"; 344 break; 345#if 0 346 case: 347 errmsg = "\n"; 348 break; 349#endif 350 default: 351 errmsg = "Unregistered error message\n"; 352 } 353 ctxt->errNo = error; 354 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 355 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 356 info); 357 ctxt->wellFormed = 0; 358 if (ctxt->recovery == 0) 359 ctxt->disableSAX = 1; 360} 361 362/** 363 * xmlFatalErrMsg: 364 * @ctxt: an XML parser context 365 * @error: the error number 366 * @msg: the error message 367 * 368 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 369 */ 370static void 371xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 372 const char *msg) 373{ 374 ctxt->errNo = error; 375 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 376 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 377 ctxt->wellFormed = 0; 378 if (ctxt->recovery == 0) 379 ctxt->disableSAX = 1; 380} 381 382/** 383 * xmlWarningMsg: 384 * @ctxt: an XML parser context 385 * @error: the error number 386 * @msg: the error message 387 * @str1: extra data 388 * @str2: extra data 389 * 390 * Handle a warning. 391 */ 392static void 393xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 394 const char *msg, const xmlChar *str1, const xmlChar *str2) 395{ 396 xmlStructuredErrorFunc schannel = NULL; 397 398 ctxt->errNo = error; 399 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 400 schannel = ctxt->sax->serror; 401 __xmlRaiseError(schannel, 402 (ctxt->sax) ? ctxt->sax->warning : NULL, 403 ctxt->userData, 404 ctxt, NULL, XML_FROM_PARSER, error, 405 XML_ERR_WARNING, NULL, 0, 406 (const char *) str1, (const char *) str2, NULL, 0, 0, 407 msg, (const char *) str1, (const char *) str2); 408} 409 410/** 411 * xmlValidityError: 412 * @ctxt: an XML parser context 413 * @error: the error number 414 * @msg: the error message 415 * @str1: extra data 416 * 417 * Handle a warning. 418 */ 419static void 420xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 421 const char *msg, const xmlChar *str1) 422{ 423 xmlStructuredErrorFunc schannel = NULL; 424 ctxt->errNo = error; 425 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 426 schannel = ctxt->sax->serror; 427 __xmlRaiseError(schannel, 428 ctxt->vctxt.error, ctxt->vctxt.userData, 429 ctxt, NULL, XML_FROM_DTD, error, 430 XML_ERR_ERROR, NULL, 0, (const char *) str1, 431 NULL, NULL, 0, 0, 432 msg, (const char *) str1); 433 ctxt->valid = 0; 434} 435 436/** 437 * xmlFatalErrMsgInt: 438 * @ctxt: an XML parser context 439 * @error: the error number 440 * @msg: the error message 441 * @val: an integer value 442 * 443 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 444 */ 445static void 446xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 447 const char *msg, int val) 448{ 449 ctxt->errNo = error; 450 __xmlRaiseError(NULL, NULL, NULL, 451 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 452 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 453 ctxt->wellFormed = 0; 454 if (ctxt->recovery == 0) 455 ctxt->disableSAX = 1; 456} 457 458/** 459 * xmlFatalErrMsgStrIntStr: 460 * @ctxt: an XML parser context 461 * @error: the error number 462 * @msg: the error message 463 * @str1: an string info 464 * @val: an integer value 465 * @str2: an string info 466 * 467 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 468 */ 469static void 470xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 471 const char *msg, const xmlChar *str1, int val, 472 const xmlChar *str2) 473{ 474 ctxt->errNo = error; 475 __xmlRaiseError(NULL, NULL, NULL, 476 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 477 NULL, 0, (const char *) str1, (const char *) str2, 478 NULL, val, 0, msg, str1, val, str2); 479 ctxt->wellFormed = 0; 480 if (ctxt->recovery == 0) 481 ctxt->disableSAX = 1; 482} 483 484/** 485 * xmlFatalErrMsgStr: 486 * @ctxt: an XML parser context 487 * @error: the error number 488 * @msg: the error message 489 * @val: a string value 490 * 491 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 492 */ 493static void 494xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 495 const char *msg, const xmlChar * val) 496{ 497 ctxt->errNo = error; 498 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 499 XML_FROM_PARSER, error, XML_ERR_FATAL, 500 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 501 val); 502 ctxt->wellFormed = 0; 503 if (ctxt->recovery == 0) 504 ctxt->disableSAX = 1; 505} 506 507/** 508 * xmlErrMsgStr: 509 * @ctxt: an XML parser context 510 * @error: the error number 511 * @msg: the error message 512 * @val: a string value 513 * 514 * Handle a non fatal parser error 515 */ 516static void 517xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 518 const char *msg, const xmlChar * val) 519{ 520 ctxt->errNo = error; 521 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 522 XML_FROM_PARSER, error, XML_ERR_ERROR, 523 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 524 val); 525} 526 527/** 528 * xmlNsErr: 529 * @ctxt: an XML parser context 530 * @error: the error number 531 * @msg: the message 532 * @info1: extra information string 533 * @info2: extra information string 534 * 535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 536 */ 537static void 538xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 539 const char *msg, 540 const xmlChar * info1, const xmlChar * info2, 541 const xmlChar * info3) 542{ 543 ctxt->errNo = error; 544 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 545 XML_ERR_ERROR, NULL, 0, (const char *) info1, 546 (const char *) info2, (const char *) info3, 0, 0, msg, 547 info1, info2, info3); 548 ctxt->nsWellFormed = 0; 549} 550 551/************************************************************************ 552 * * 553 * SAX2 defaulted attributes handling * 554 * * 555 ************************************************************************/ 556 557/** 558 * xmlDetectSAX2: 559 * @ctxt: an XML parser context 560 * 561 * Do the SAX2 detection and specific intialization 562 */ 563static void 564xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 565 if (ctxt == NULL) return; 566#ifdef LIBXML_SAX1_ENABLED 567 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 568 ((ctxt->sax->startElementNs != NULL) || 569 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 570#else 571 ctxt->sax2 = 1; 572#endif /* LIBXML_SAX1_ENABLED */ 573 574 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 575 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 576 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 577} 578 579typedef struct _xmlDefAttrs xmlDefAttrs; 580typedef xmlDefAttrs *xmlDefAttrsPtr; 581struct _xmlDefAttrs { 582 int nbAttrs; /* number of defaulted attributes on that element */ 583 int maxAttrs; /* the size of the array */ 584 const xmlChar *values[4]; /* array of localname/prefix/values */ 585}; 586 587/** 588 * xmlAddDefAttrs: 589 * @ctxt: an XML parser context 590 * @fullname: the element fullname 591 * @fullattr: the attribute fullname 592 * @value: the attribute value 593 * 594 * Add a defaulted attribute for an element 595 */ 596static void 597xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 598 const xmlChar *fullname, 599 const xmlChar *fullattr, 600 const xmlChar *value) { 601 xmlDefAttrsPtr defaults; 602 int len; 603 const xmlChar *name; 604 const xmlChar *prefix; 605 606 if (ctxt->attsDefault == NULL) { 607 ctxt->attsDefault = xmlHashCreate(10); 608 if (ctxt->attsDefault == NULL) 609 goto mem_error; 610 } 611 612 /* 613 * plit the element name into prefix:localname , the string found 614 * are within the DTD and hen not associated to namespace names. 615 */ 616 name = xmlSplitQName3(fullname, &len); 617 if (name == NULL) { 618 name = xmlDictLookup(ctxt->dict, fullname, -1); 619 prefix = NULL; 620 } else { 621 name = xmlDictLookup(ctxt->dict, name, -1); 622 prefix = xmlDictLookup(ctxt->dict, fullname, len); 623 } 624 625 /* 626 * make sure there is some storage 627 */ 628 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 629 if (defaults == NULL) { 630 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 631 12 * sizeof(const xmlChar *)); 632 if (defaults == NULL) 633 goto mem_error; 634 defaults->maxAttrs = 4; 635 defaults->nbAttrs = 0; 636 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 637 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 638 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 639 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 640 if (defaults == NULL) 641 goto mem_error; 642 defaults->maxAttrs *= 2; 643 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 644 } 645 646 /* 647 * plit the element name into prefix:localname , the string found 648 * are within the DTD and hen not associated to namespace names. 649 */ 650 name = xmlSplitQName3(fullattr, &len); 651 if (name == NULL) { 652 name = xmlDictLookup(ctxt->dict, fullattr, -1); 653 prefix = NULL; 654 } else { 655 name = xmlDictLookup(ctxt->dict, name, -1); 656 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 657 } 658 659 defaults->values[4 * defaults->nbAttrs] = name; 660 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 661 /* intern the string and precompute the end */ 662 len = xmlStrlen(value); 663 value = xmlDictLookup(ctxt->dict, value, len); 664 defaults->values[4 * defaults->nbAttrs + 2] = value; 665 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 666 defaults->nbAttrs++; 667 668 return; 669 670mem_error: 671 xmlErrMemory(ctxt, NULL); 672 return; 673} 674 675/** 676 * xmlAddSpecialAttr: 677 * @ctxt: an XML parser context 678 * @fullname: the element fullname 679 * @fullattr: the attribute fullname 680 * @type: the attribute type 681 * 682 * Register that this attribute is not CDATA 683 */ 684static void 685xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 686 const xmlChar *fullname, 687 const xmlChar *fullattr, 688 int type) 689{ 690 if (ctxt->attsSpecial == NULL) { 691 ctxt->attsSpecial = xmlHashCreate(10); 692 if (ctxt->attsSpecial == NULL) 693 goto mem_error; 694 } 695 696 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 697 (void *) (long) type); 698 return; 699 700mem_error: 701 xmlErrMemory(ctxt, NULL); 702 return; 703} 704 705/** 706 * xmlCheckLanguageID: 707 * @lang: pointer to the string value 708 * 709 * Checks that the value conforms to the LanguageID production: 710 * 711 * NOTE: this is somewhat deprecated, those productions were removed from 712 * the XML Second edition. 713 * 714 * [33] LanguageID ::= Langcode ('-' Subcode)* 715 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 716 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 717 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 718 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 719 * [38] Subcode ::= ([a-z] | [A-Z])+ 720 * 721 * Returns 1 if correct 0 otherwise 722 **/ 723int 724xmlCheckLanguageID(const xmlChar * lang) 725{ 726 const xmlChar *cur = lang; 727 728 if (cur == NULL) 729 return (0); 730 if (((cur[0] == 'i') && (cur[1] == '-')) || 731 ((cur[0] == 'I') && (cur[1] == '-'))) { 732 /* 733 * IANA code 734 */ 735 cur += 2; 736 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 737 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 738 cur++; 739 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 740 ((cur[0] == 'X') && (cur[1] == '-'))) { 741 /* 742 * User code 743 */ 744 cur += 2; 745 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 746 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 747 cur++; 748 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 749 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 750 /* 751 * ISO639 752 */ 753 cur++; 754 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 755 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 756 cur++; 757 else 758 return (0); 759 } else 760 return (0); 761 while (cur[0] != 0) { /* non input consuming */ 762 if (cur[0] != '-') 763 return (0); 764 cur++; 765 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 766 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 767 cur++; 768 else 769 return (0); 770 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 771 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 772 cur++; 773 } 774 return (1); 775} 776 777/************************************************************************ 778 * * 779 * Parser stacks related functions and macros * 780 * * 781 ************************************************************************/ 782 783xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 784 const xmlChar ** str); 785 786#ifdef SAX2 787/** 788 * nsPush: 789 * @ctxt: an XML parser context 790 * @prefix: the namespace prefix or NULL 791 * @URL: the namespace name 792 * 793 * Pushes a new parser namespace on top of the ns stack 794 * 795 * Returns -1 in case of error, -2 if the namespace should be discarded 796 * and the index in the stack otherwise. 797 */ 798static int 799nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 800{ 801 if (ctxt->options & XML_PARSE_NSCLEAN) { 802 int i; 803 for (i = 0;i < ctxt->nsNr;i += 2) { 804 if (ctxt->nsTab[i] == prefix) { 805 /* in scope */ 806 if (ctxt->nsTab[i + 1] == URL) 807 return(-2); 808 /* out of scope keep it */ 809 break; 810 } 811 } 812 } 813 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 814 ctxt->nsMax = 10; 815 ctxt->nsNr = 0; 816 ctxt->nsTab = (const xmlChar **) 817 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 818 if (ctxt->nsTab == NULL) { 819 xmlErrMemory(ctxt, NULL); 820 ctxt->nsMax = 0; 821 return (-1); 822 } 823 } else if (ctxt->nsNr >= ctxt->nsMax) { 824 ctxt->nsMax *= 2; 825 ctxt->nsTab = (const xmlChar **) 826 xmlRealloc(ctxt->nsTab, 827 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 828 if (ctxt->nsTab == NULL) { 829 xmlErrMemory(ctxt, NULL); 830 ctxt->nsMax /= 2; 831 return (-1); 832 } 833 } 834 ctxt->nsTab[ctxt->nsNr++] = prefix; 835 ctxt->nsTab[ctxt->nsNr++] = URL; 836 return (ctxt->nsNr); 837} 838/** 839 * nsPop: 840 * @ctxt: an XML parser context 841 * @nr: the number to pop 842 * 843 * Pops the top @nr parser prefix/namespace from the ns stack 844 * 845 * Returns the number of namespaces removed 846 */ 847static int 848nsPop(xmlParserCtxtPtr ctxt, int nr) 849{ 850 int i; 851 852 if (ctxt->nsTab == NULL) return(0); 853 if (ctxt->nsNr < nr) { 854 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 855 nr = ctxt->nsNr; 856 } 857 if (ctxt->nsNr <= 0) 858 return (0); 859 860 for (i = 0;i < nr;i++) { 861 ctxt->nsNr--; 862 ctxt->nsTab[ctxt->nsNr] = NULL; 863 } 864 return(nr); 865} 866#endif 867 868static int 869xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 870 const xmlChar **atts; 871 int *attallocs; 872 int maxatts; 873 874 if (ctxt->atts == NULL) { 875 maxatts = 55; /* allow for 10 attrs by default */ 876 atts = (const xmlChar **) 877 xmlMalloc(maxatts * sizeof(xmlChar *)); 878 if (atts == NULL) goto mem_error; 879 ctxt->atts = atts; 880 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 881 if (attallocs == NULL) goto mem_error; 882 ctxt->attallocs = attallocs; 883 ctxt->maxatts = maxatts; 884 } else if (nr + 5 > ctxt->maxatts) { 885 maxatts = (nr + 5) * 2; 886 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 887 maxatts * sizeof(const xmlChar *)); 888 if (atts == NULL) goto mem_error; 889 ctxt->atts = atts; 890 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 891 (maxatts / 5) * sizeof(int)); 892 if (attallocs == NULL) goto mem_error; 893 ctxt->attallocs = attallocs; 894 ctxt->maxatts = maxatts; 895 } 896 return(ctxt->maxatts); 897mem_error: 898 xmlErrMemory(ctxt, NULL); 899 return(-1); 900} 901 902/** 903 * inputPush: 904 * @ctxt: an XML parser context 905 * @value: the parser input 906 * 907 * Pushes a new parser input on top of the input stack 908 * 909 * Returns 0 in case of error, the index in the stack otherwise 910 */ 911extern int 912inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 913{ 914 if (ctxt->inputNr >= ctxt->inputMax) { 915 ctxt->inputMax *= 2; 916 ctxt->inputTab = 917 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 918 ctxt->inputMax * 919 sizeof(ctxt->inputTab[0])); 920 if (ctxt->inputTab == NULL) { 921 xmlErrMemory(ctxt, NULL); 922 return (0); 923 } 924 } 925 ctxt->inputTab[ctxt->inputNr] = value; 926 ctxt->input = value; 927 return (ctxt->inputNr++); 928} 929/** 930 * inputPop: 931 * @ctxt: an XML parser context 932 * 933 * Pops the top parser input from the input stack 934 * 935 * Returns the input just removed 936 */ 937extern xmlParserInputPtr 938inputPop(xmlParserCtxtPtr ctxt) 939{ 940 xmlParserInputPtr ret; 941 942 if (ctxt->inputNr <= 0) 943 return (0); 944 ctxt->inputNr--; 945 if (ctxt->inputNr > 0) 946 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 947 else 948 ctxt->input = NULL; 949 ret = ctxt->inputTab[ctxt->inputNr]; 950 ctxt->inputTab[ctxt->inputNr] = 0; 951 return (ret); 952} 953/** 954 * nodePush: 955 * @ctxt: an XML parser context 956 * @value: the element node 957 * 958 * Pushes a new element node on top of the node stack 959 * 960 * Returns 0 in case of error, the index in the stack otherwise 961 */ 962extern int 963nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 964{ 965 if (ctxt->nodeNr >= ctxt->nodeMax) { 966 ctxt->nodeMax *= 2; 967 ctxt->nodeTab = 968 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 969 ctxt->nodeMax * 970 sizeof(ctxt->nodeTab[0])); 971 if (ctxt->nodeTab == NULL) { 972 xmlErrMemory(ctxt, NULL); 973 return (0); 974 } 975 } 976 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 977 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 978 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 979 xmlParserMaxDepth); 980 ctxt->instate = XML_PARSER_EOF; 981 return(0); 982 } 983 ctxt->nodeTab[ctxt->nodeNr] = value; 984 ctxt->node = value; 985 return (ctxt->nodeNr++); 986} 987/** 988 * nodePop: 989 * @ctxt: an XML parser context 990 * 991 * Pops the top element node from the node stack 992 * 993 * Returns the node just removed 994 */ 995extern xmlNodePtr 996nodePop(xmlParserCtxtPtr ctxt) 997{ 998 xmlNodePtr ret; 999 1000 if (ctxt->nodeNr <= 0) 1001 return (0); 1002 ctxt->nodeNr--; 1003 if (ctxt->nodeNr > 0) 1004 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1005 else 1006 ctxt->node = NULL; 1007 ret = ctxt->nodeTab[ctxt->nodeNr]; 1008 ctxt->nodeTab[ctxt->nodeNr] = 0; 1009 return (ret); 1010} 1011/** 1012 * nameNsPush: 1013 * @ctxt: an XML parser context 1014 * @value: the element name 1015 * @prefix: the element prefix 1016 * @URI: the element namespace name 1017 * 1018 * Pushes a new element name/prefix/URL on top of the name stack 1019 * 1020 * Returns -1 in case of error, the index in the stack otherwise 1021 */ 1022static int 1023nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1024 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1025{ 1026 if (ctxt->nameNr >= ctxt->nameMax) { 1027 const xmlChar * *tmp; 1028 void **tmp2; 1029 ctxt->nameMax *= 2; 1030 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1031 ctxt->nameMax * 1032 sizeof(ctxt->nameTab[0])); 1033 if (tmp == NULL) { 1034 ctxt->nameMax /= 2; 1035 goto mem_error; 1036 } 1037 ctxt->nameTab = tmp; 1038 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1039 ctxt->nameMax * 3 * 1040 sizeof(ctxt->pushTab[0])); 1041 if (tmp2 == NULL) { 1042 ctxt->nameMax /= 2; 1043 goto mem_error; 1044 } 1045 ctxt->pushTab = tmp2; 1046 } 1047 ctxt->nameTab[ctxt->nameNr] = value; 1048 ctxt->name = value; 1049 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1050 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1051 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1052 return (ctxt->nameNr++); 1053mem_error: 1054 xmlErrMemory(ctxt, NULL); 1055 return (-1); 1056} 1057/** 1058 * nameNsPop: 1059 * @ctxt: an XML parser context 1060 * 1061 * Pops the top element/prefix/URI name from the name stack 1062 * 1063 * Returns the name just removed 1064 */ 1065static const xmlChar * 1066nameNsPop(xmlParserCtxtPtr ctxt) 1067{ 1068 const xmlChar *ret; 1069 1070 if (ctxt->nameNr <= 0) 1071 return (0); 1072 ctxt->nameNr--; 1073 if (ctxt->nameNr > 0) 1074 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1075 else 1076 ctxt->name = NULL; 1077 ret = ctxt->nameTab[ctxt->nameNr]; 1078 ctxt->nameTab[ctxt->nameNr] = NULL; 1079 return (ret); 1080} 1081 1082/** 1083 * namePush: 1084 * @ctxt: an XML parser context 1085 * @value: the element name 1086 * 1087 * Pushes a new element name on top of the name stack 1088 * 1089 * Returns -1 in case of error, the index in the stack otherwise 1090 */ 1091extern int 1092namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1093{ 1094 if (ctxt->nameNr >= ctxt->nameMax) { 1095 const xmlChar * *tmp; 1096 ctxt->nameMax *= 2; 1097 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1098 ctxt->nameMax * 1099 sizeof(ctxt->nameTab[0])); 1100 if (tmp == NULL) { 1101 ctxt->nameMax /= 2; 1102 goto mem_error; 1103 } 1104 ctxt->nameTab = tmp; 1105 } 1106 ctxt->nameTab[ctxt->nameNr] = value; 1107 ctxt->name = value; 1108 return (ctxt->nameNr++); 1109mem_error: 1110 xmlErrMemory(ctxt, NULL); 1111 return (-1); 1112} 1113/** 1114 * namePop: 1115 * @ctxt: an XML parser context 1116 * 1117 * Pops the top element name from the name stack 1118 * 1119 * Returns the name just removed 1120 */ 1121extern const xmlChar * 1122namePop(xmlParserCtxtPtr ctxt) 1123{ 1124 const xmlChar *ret; 1125 1126 if (ctxt->nameNr <= 0) 1127 return (0); 1128 ctxt->nameNr--; 1129 if (ctxt->nameNr > 0) 1130 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1131 else 1132 ctxt->name = NULL; 1133 ret = ctxt->nameTab[ctxt->nameNr]; 1134 ctxt->nameTab[ctxt->nameNr] = 0; 1135 return (ret); 1136} 1137 1138static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1139 if (ctxt->spaceNr >= ctxt->spaceMax) { 1140 ctxt->spaceMax *= 2; 1141 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1142 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1143 if (ctxt->spaceTab == NULL) { 1144 xmlErrMemory(ctxt, NULL); 1145 return(0); 1146 } 1147 } 1148 ctxt->spaceTab[ctxt->spaceNr] = val; 1149 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1150 return(ctxt->spaceNr++); 1151} 1152 1153static int spacePop(xmlParserCtxtPtr ctxt) { 1154 int ret; 1155 if (ctxt->spaceNr <= 0) return(0); 1156 ctxt->spaceNr--; 1157 if (ctxt->spaceNr > 0) 1158 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1159 else 1160 ctxt->space = NULL; 1161 ret = ctxt->spaceTab[ctxt->spaceNr]; 1162 ctxt->spaceTab[ctxt->spaceNr] = -1; 1163 return(ret); 1164} 1165 1166/* 1167 * Macros for accessing the content. Those should be used only by the parser, 1168 * and not exported. 1169 * 1170 * Dirty macros, i.e. one often need to make assumption on the context to 1171 * use them 1172 * 1173 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1174 * To be used with extreme caution since operations consuming 1175 * characters may move the input buffer to a different location ! 1176 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1177 * This should be used internally by the parser 1178 * only to compare to ASCII values otherwise it would break when 1179 * running with UTF-8 encoding. 1180 * RAW same as CUR but in the input buffer, bypass any token 1181 * extraction that may have been done 1182 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1183 * to compare on ASCII based substring. 1184 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1185 * strings without newlines within the parser. 1186 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1187 * defined char within the parser. 1188 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1189 * 1190 * NEXT Skip to the next character, this does the proper decoding 1191 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1192 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1193 * CUR_CHAR(l) returns the current unicode character (int), set l 1194 * to the number of xmlChars used for the encoding [0-5]. 1195 * CUR_SCHAR same but operate on a string instead of the context 1196 * COPY_BUF copy the current unicode char to the target buffer, increment 1197 * the index 1198 * GROW, SHRINK handling of input buffers 1199 */ 1200 1201#define RAW (*ctxt->input->cur) 1202#define CUR (*ctxt->input->cur) 1203#define NXT(val) ctxt->input->cur[(val)] 1204#define CUR_PTR ctxt->input->cur 1205 1206#define CMP4( s, c1, c2, c3, c4 ) \ 1207 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1208 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1209#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1210 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1211#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1212 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1213#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1214 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1215#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1216 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1217#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1218 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1219 ((unsigned char *) s)[ 8 ] == c9 ) 1220#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1221 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1222 ((unsigned char *) s)[ 9 ] == c10 ) 1223 1224#define SKIP(val) do { \ 1225 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1226 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1227 if ((*ctxt->input->cur == 0) && \ 1228 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1229 xmlPopInput(ctxt); \ 1230 } while (0) 1231 1232#define SHRINK if ((ctxt->progressive == 0) && \ 1233 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1234 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1235 xmlSHRINK (ctxt); 1236 1237static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1238 xmlParserInputShrink(ctxt->input); 1239 if ((*ctxt->input->cur == 0) && 1240 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1241 xmlPopInput(ctxt); 1242 } 1243 1244#define GROW if ((ctxt->progressive == 0) && \ 1245 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1246 xmlGROW (ctxt); 1247 1248static void xmlGROW (xmlParserCtxtPtr ctxt) { 1249 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1250 if ((*ctxt->input->cur == 0) && 1251 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1252 xmlPopInput(ctxt); 1253} 1254 1255#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1256 1257#define NEXT xmlNextChar(ctxt) 1258 1259#define NEXT1 { \ 1260 ctxt->input->col++; \ 1261 ctxt->input->cur++; \ 1262 ctxt->nbChars++; \ 1263 if (*ctxt->input->cur == 0) \ 1264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1265 } 1266 1267#define NEXTL(l) do { \ 1268 if (*(ctxt->input->cur) == '\n') { \ 1269 ctxt->input->line++; ctxt->input->col = 1; \ 1270 } else ctxt->input->col++; \ 1271 ctxt->input->cur += l; \ 1272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1273 } while (0) 1274 1275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1277 1278#define COPY_BUF(l,b,i,v) \ 1279 if (l == 1) b[i++] = (xmlChar) v; \ 1280 else i += xmlCopyCharMultiByte(&b[i],v) 1281 1282/** 1283 * xmlSkipBlankChars: 1284 * @ctxt: the XML parser context 1285 * 1286 * skip all blanks character found at that point in the input streams. 1287 * It pops up finished entities in the process if allowable at that point. 1288 * 1289 * Returns the number of space chars skipped 1290 */ 1291 1292int 1293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1294 int res = 0; 1295 1296 /* 1297 * It's Okay to use CUR/NEXT here since all the blanks are on 1298 * the ASCII range. 1299 */ 1300 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1301 const xmlChar *cur; 1302 /* 1303 * if we are in the document content, go really fast 1304 */ 1305 cur = ctxt->input->cur; 1306 while (IS_BLANK_CH(*cur)) { 1307 if (*cur == '\n') { 1308 ctxt->input->line++; ctxt->input->col = 1; 1309 } 1310 cur++; 1311 res++; 1312 if (*cur == 0) { 1313 ctxt->input->cur = cur; 1314 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1315 cur = ctxt->input->cur; 1316 } 1317 } 1318 ctxt->input->cur = cur; 1319 } else { 1320 int cur; 1321 do { 1322 cur = CUR; 1323 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 1324 NEXT; 1325 cur = CUR; 1326 res++; 1327 } 1328 while ((cur == 0) && (ctxt->inputNr > 1) && 1329 (ctxt->instate != XML_PARSER_COMMENT)) { 1330 xmlPopInput(ctxt); 1331 cur = CUR; 1332 } 1333 /* 1334 * Need to handle support of entities branching here 1335 */ 1336 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1337 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1338 } 1339 return(res); 1340} 1341 1342/************************************************************************ 1343 * * 1344 * Commodity functions to handle entities * 1345 * * 1346 ************************************************************************/ 1347 1348/** 1349 * xmlPopInput: 1350 * @ctxt: an XML parser context 1351 * 1352 * xmlPopInput: the current input pointed by ctxt->input came to an end 1353 * pop it and return the next char. 1354 * 1355 * Returns the current xmlChar in the parser context 1356 */ 1357xmlChar 1358xmlPopInput(xmlParserCtxtPtr ctxt) { 1359 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 1360 if (xmlParserDebugEntities) 1361 xmlGenericError(xmlGenericErrorContext, 1362 "Popping input %d\n", ctxt->inputNr); 1363 xmlFreeInputStream(inputPop(ctxt)); 1364 if ((*ctxt->input->cur == 0) && 1365 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1366 return(xmlPopInput(ctxt)); 1367 return(CUR); 1368} 1369 1370/** 1371 * xmlPushInput: 1372 * @ctxt: an XML parser context 1373 * @input: an XML parser input fragment (entity, XML fragment ...). 1374 * 1375 * xmlPushInput: switch to a new input stream which is stacked on top 1376 * of the previous one(s). 1377 */ 1378void 1379xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1380 if (input == NULL) return; 1381 1382 if (xmlParserDebugEntities) { 1383 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1384 xmlGenericError(xmlGenericErrorContext, 1385 "%s(%d): ", ctxt->input->filename, 1386 ctxt->input->line); 1387 xmlGenericError(xmlGenericErrorContext, 1388 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1389 } 1390 inputPush(ctxt, input); 1391 GROW; 1392} 1393 1394/** 1395 * xmlParseCharRef: 1396 * @ctxt: an XML parser context 1397 * 1398 * parse Reference declarations 1399 * 1400 * [66] CharRef ::= '&#' [0-9]+ ';' | 1401 * '&#x' [0-9a-fA-F]+ ';' 1402 * 1403 * [ WFC: Legal Character ] 1404 * Characters referred to using character references must match the 1405 * production for Char. 1406 * 1407 * Returns the value parsed (as an int), 0 in case of error 1408 */ 1409int 1410xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1411 unsigned int val = 0; 1412 int count = 0; 1413 1414 /* 1415 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1416 */ 1417 if ((RAW == '&') && (NXT(1) == '#') && 1418 (NXT(2) == 'x')) { 1419 SKIP(3); 1420 GROW; 1421 while (RAW != ';') { /* loop blocked by count */ 1422 if (count++ > 20) { 1423 count = 0; 1424 GROW; 1425 } 1426 if ((RAW >= '0') && (RAW <= '9')) 1427 val = val * 16 + (CUR - '0'); 1428 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1429 val = val * 16 + (CUR - 'a') + 10; 1430 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1431 val = val * 16 + (CUR - 'A') + 10; 1432 else { 1433 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1434 val = 0; 1435 break; 1436 } 1437 NEXT; 1438 count++; 1439 } 1440 if (RAW == ';') { 1441 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1442 ctxt->input->col++; 1443 ctxt->nbChars ++; 1444 ctxt->input->cur++; 1445 } 1446 } else if ((RAW == '&') && (NXT(1) == '#')) { 1447 SKIP(2); 1448 GROW; 1449 while (RAW != ';') { /* loop blocked by count */ 1450 if (count++ > 20) { 1451 count = 0; 1452 GROW; 1453 } 1454 if ((RAW >= '0') && (RAW <= '9')) 1455 val = val * 10 + (CUR - '0'); 1456 else { 1457 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1458 val = 0; 1459 break; 1460 } 1461 NEXT; 1462 count++; 1463 } 1464 if (RAW == ';') { 1465 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1466 ctxt->input->col++; 1467 ctxt->nbChars ++; 1468 ctxt->input->cur++; 1469 } 1470 } else { 1471 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1472 } 1473 1474 /* 1475 * [ WFC: Legal Character ] 1476 * Characters referred to using character references must match the 1477 * production for Char. 1478 */ 1479 if (IS_CHAR(val)) { 1480 return(val); 1481 } else { 1482 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1483 "xmlParseCharRef: invalid xmlChar value %d\n", 1484 val); 1485 } 1486 return(0); 1487} 1488 1489/** 1490 * xmlParseStringCharRef: 1491 * @ctxt: an XML parser context 1492 * @str: a pointer to an index in the string 1493 * 1494 * parse Reference declarations, variant parsing from a string rather 1495 * than an an input flow. 1496 * 1497 * [66] CharRef ::= '&#' [0-9]+ ';' | 1498 * '&#x' [0-9a-fA-F]+ ';' 1499 * 1500 * [ WFC: Legal Character ] 1501 * Characters referred to using character references must match the 1502 * production for Char. 1503 * 1504 * Returns the value parsed (as an int), 0 in case of error, str will be 1505 * updated to the current value of the index 1506 */ 1507static int 1508xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1509 const xmlChar *ptr; 1510 xmlChar cur; 1511 int val = 0; 1512 1513 if ((str == NULL) || (*str == NULL)) return(0); 1514 ptr = *str; 1515 cur = *ptr; 1516 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1517 ptr += 3; 1518 cur = *ptr; 1519 while (cur != ';') { /* Non input consuming loop */ 1520 if ((cur >= '0') && (cur <= '9')) 1521 val = val * 16 + (cur - '0'); 1522 else if ((cur >= 'a') && (cur <= 'f')) 1523 val = val * 16 + (cur - 'a') + 10; 1524 else if ((cur >= 'A') && (cur <= 'F')) 1525 val = val * 16 + (cur - 'A') + 10; 1526 else { 1527 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1528 val = 0; 1529 break; 1530 } 1531 ptr++; 1532 cur = *ptr; 1533 } 1534 if (cur == ';') 1535 ptr++; 1536 } else if ((cur == '&') && (ptr[1] == '#')){ 1537 ptr += 2; 1538 cur = *ptr; 1539 while (cur != ';') { /* Non input consuming loops */ 1540 if ((cur >= '0') && (cur <= '9')) 1541 val = val * 10 + (cur - '0'); 1542 else { 1543 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1544 val = 0; 1545 break; 1546 } 1547 ptr++; 1548 cur = *ptr; 1549 } 1550 if (cur == ';') 1551 ptr++; 1552 } else { 1553 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1554 return(0); 1555 } 1556 *str = ptr; 1557 1558 /* 1559 * [ WFC: Legal Character ] 1560 * Characters referred to using character references must match the 1561 * production for Char. 1562 */ 1563 if (IS_CHAR(val)) { 1564 return(val); 1565 } else { 1566 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1567 "xmlParseStringCharRef: invalid xmlChar value %d\n", 1568 val); 1569 } 1570 return(0); 1571} 1572 1573/** 1574 * xmlNewBlanksWrapperInputStream: 1575 * @ctxt: an XML parser context 1576 * @entity: an Entity pointer 1577 * 1578 * Create a new input stream for wrapping 1579 * blanks around a PEReference 1580 * 1581 * Returns the new input stream or NULL 1582 */ 1583 1584static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 1585 1586static xmlParserInputPtr 1587xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1588 xmlParserInputPtr input; 1589 xmlChar *buffer; 1590 size_t length; 1591 if (entity == NULL) { 1592 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 1593 "xmlNewBlanksWrapperInputStream entity\n"); 1594 return(NULL); 1595 } 1596 if (xmlParserDebugEntities) 1597 xmlGenericError(xmlGenericErrorContext, 1598 "new blanks wrapper for entity: %s\n", entity->name); 1599 input = xmlNewInputStream(ctxt); 1600 if (input == NULL) { 1601 return(NULL); 1602 } 1603 length = xmlStrlen(entity->name) + 5; 1604 buffer = xmlMallocAtomic(length); 1605 if (buffer == NULL) { 1606 xmlErrMemory(ctxt, NULL); 1607 return(NULL); 1608 } 1609 buffer [0] = ' '; 1610 buffer [1] = '%'; 1611 buffer [length-3] = ';'; 1612 buffer [length-2] = ' '; 1613 buffer [length-1] = 0; 1614 memcpy(buffer + 2, entity->name, length - 5); 1615 input->free = deallocblankswrapper; 1616 input->base = buffer; 1617 input->cur = buffer; 1618 input->length = length; 1619 input->end = &buffer[length]; 1620 return(input); 1621} 1622 1623/** 1624 * xmlParserHandlePEReference: 1625 * @ctxt: the parser context 1626 * 1627 * [69] PEReference ::= '%' Name ';' 1628 * 1629 * [ WFC: No Recursion ] 1630 * A parsed entity must not contain a recursive 1631 * reference to itself, either directly or indirectly. 1632 * 1633 * [ WFC: Entity Declared ] 1634 * In a document without any DTD, a document with only an internal DTD 1635 * subset which contains no parameter entity references, or a document 1636 * with "standalone='yes'", ... ... The declaration of a parameter 1637 * entity must precede any reference to it... 1638 * 1639 * [ VC: Entity Declared ] 1640 * In a document with an external subset or external parameter entities 1641 * with "standalone='no'", ... ... The declaration of a parameter entity 1642 * must precede any reference to it... 1643 * 1644 * [ WFC: In DTD ] 1645 * Parameter-entity references may only appear in the DTD. 1646 * NOTE: misleading but this is handled. 1647 * 1648 * A PEReference may have been detected in the current input stream 1649 * the handling is done accordingly to 1650 * http://www.w3.org/TR/REC-xml#entproc 1651 * i.e. 1652 * - Included in literal in entity values 1653 * - Included as Parameter Entity reference within DTDs 1654 */ 1655void 1656xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1657 const xmlChar *name; 1658 xmlEntityPtr entity = NULL; 1659 xmlParserInputPtr input; 1660 1661 if (RAW != '%') return; 1662 switch(ctxt->instate) { 1663 case XML_PARSER_CDATA_SECTION: 1664 return; 1665 case XML_PARSER_COMMENT: 1666 return; 1667 case XML_PARSER_START_TAG: 1668 return; 1669 case XML_PARSER_END_TAG: 1670 return; 1671 case XML_PARSER_EOF: 1672 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 1673 return; 1674 case XML_PARSER_PROLOG: 1675 case XML_PARSER_START: 1676 case XML_PARSER_MISC: 1677 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 1678 return; 1679 case XML_PARSER_ENTITY_DECL: 1680 case XML_PARSER_CONTENT: 1681 case XML_PARSER_ATTRIBUTE_VALUE: 1682 case XML_PARSER_PI: 1683 case XML_PARSER_SYSTEM_LITERAL: 1684 case XML_PARSER_PUBLIC_LITERAL: 1685 /* we just ignore it there */ 1686 return; 1687 case XML_PARSER_EPILOG: 1688 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 1689 return; 1690 case XML_PARSER_ENTITY_VALUE: 1691 /* 1692 * NOTE: in the case of entity values, we don't do the 1693 * substitution here since we need the literal 1694 * entity value to be able to save the internal 1695 * subset of the document. 1696 * This will be handled by xmlStringDecodeEntities 1697 */ 1698 return; 1699 case XML_PARSER_DTD: 1700 /* 1701 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 1702 * In the internal DTD subset, parameter-entity references 1703 * can occur only where markup declarations can occur, not 1704 * within markup declarations. 1705 * In that case this is handled in xmlParseMarkupDecl 1706 */ 1707 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 1708 return; 1709 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 1710 return; 1711 break; 1712 case XML_PARSER_IGNORE: 1713 return; 1714 } 1715 1716 NEXT; 1717 name = xmlParseName(ctxt); 1718 if (xmlParserDebugEntities) 1719 xmlGenericError(xmlGenericErrorContext, 1720 "PEReference: %s\n", name); 1721 if (name == NULL) { 1722 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 1723 } else { 1724 if (RAW == ';') { 1725 NEXT; 1726 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 1727 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 1728 if (entity == NULL) { 1729 1730 /* 1731 * [ WFC: Entity Declared ] 1732 * In a document without any DTD, a document with only an 1733 * internal DTD subset which contains no parameter entity 1734 * references, or a document with "standalone='yes'", ... 1735 * ... The declaration of a parameter entity must precede 1736 * any reference to it... 1737 */ 1738 if ((ctxt->standalone == 1) || 1739 ((ctxt->hasExternalSubset == 0) && 1740 (ctxt->hasPErefs == 0))) { 1741 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 1742 "PEReference: %%%s; not found\n", name); 1743 } else { 1744 /* 1745 * [ VC: Entity Declared ] 1746 * In a document with an external subset or external 1747 * parameter entities with "standalone='no'", ... 1748 * ... The declaration of a parameter entity must precede 1749 * any reference to it... 1750 */ 1751 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 1752 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 1753 "PEReference: %%%s; not found\n", 1754 name); 1755 } else 1756 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 1757 "PEReference: %%%s; not found\n", 1758 name, NULL); 1759 ctxt->valid = 0; 1760 } 1761 } else if (ctxt->input->free != deallocblankswrapper) { 1762 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 1763 xmlPushInput(ctxt, input); 1764 } else { 1765 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 1766 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 1767 xmlChar start[4]; 1768 xmlCharEncoding enc; 1769 1770 /* 1771 * handle the extra spaces added before and after 1772 * c.f. http://www.w3.org/TR/REC-xml#as-PE 1773 * this is done independently. 1774 */ 1775 input = xmlNewEntityInputStream(ctxt, entity); 1776 xmlPushInput(ctxt, input); 1777 1778 /* 1779 * Get the 4 first bytes and decode the charset 1780 * if enc != XML_CHAR_ENCODING_NONE 1781 * plug some encoding conversion routines. 1782 */ 1783 GROW 1784 if (entity->length >= 4) { 1785 start[0] = RAW; 1786 start[1] = NXT(1); 1787 start[2] = NXT(2); 1788 start[3] = NXT(3); 1789 enc = xmlDetectCharEncoding(start, 4); 1790 if (enc != XML_CHAR_ENCODING_NONE) { 1791 xmlSwitchEncoding(ctxt, enc); 1792 } 1793 } 1794 1795 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 1796 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 1797 (IS_BLANK_CH(NXT(5)))) { 1798 xmlParseTextDecl(ctxt); 1799 } 1800 } else { 1801 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 1802 "PEReference: %s is not a parameter entity\n", 1803 name); 1804 } 1805 } 1806 } else { 1807 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 1808 } 1809 } 1810} 1811 1812/* 1813 * Macro used to grow the current buffer. 1814 */ 1815#define growBuffer(buffer) { \ 1816 buffer##_size *= 2; \ 1817 buffer = (xmlChar *) \ 1818 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1819 if (buffer == NULL) goto mem_error; \ 1820} 1821 1822/** 1823 * xmlStringLenDecodeEntities: 1824 * @ctxt: the parser context 1825 * @str: the input string 1826 * @len: the string length 1827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1828 * @end: an end marker xmlChar, 0 if none 1829 * @end2: an end marker xmlChar, 0 if none 1830 * @end3: an end marker xmlChar, 0 if none 1831 * 1832 * Takes a entity string content and process to do the adequate substitutions. 1833 * 1834 * [67] Reference ::= EntityRef | CharRef 1835 * 1836 * [69] PEReference ::= '%' Name ';' 1837 * 1838 * Returns A newly allocated string with the substitution done. The caller 1839 * must deallocate it ! 1840 */ 1841xmlChar * 1842xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 1843 int what, xmlChar end, xmlChar end2, xmlChar end3) { 1844 xmlChar *buffer = NULL; 1845 int buffer_size = 0; 1846 1847 xmlChar *current = NULL; 1848 const xmlChar *last; 1849 xmlEntityPtr ent; 1850 int c,l; 1851 int nbchars = 0; 1852 1853 if ((str == NULL) || (len < 0)) 1854 return(NULL); 1855 last = str + len; 1856 1857 if (ctxt->depth > 40) { 1858 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 1859 return(NULL); 1860 } 1861 1862 /* 1863 * allocate a translation buffer. 1864 */ 1865 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1866 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 1867 if (buffer == NULL) goto mem_error; 1868 1869 /* 1870 * OK loop until we reach one of the ending char or a size limit. 1871 * we are operating on already parsed values. 1872 */ 1873 if (str < last) 1874 c = CUR_SCHAR(str, l); 1875 else 1876 c = 0; 1877 while ((c != 0) && (c != end) && /* non input consuming loop */ 1878 (c != end2) && (c != end3)) { 1879 1880 if (c == 0) break; 1881 if ((c == '&') && (str[1] == '#')) { 1882 int val = xmlParseStringCharRef(ctxt, &str); 1883 if (val != 0) { 1884 COPY_BUF(0,buffer,nbchars,val); 1885 } 1886 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1887 if (xmlParserDebugEntities) 1888 xmlGenericError(xmlGenericErrorContext, 1889 "String decoding Entity Reference: %.30s\n", 1890 str); 1891 ent = xmlParseStringEntityRef(ctxt, &str); 1892 if ((ent != NULL) && 1893 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1894 if (ent->content != NULL) { 1895 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1896 } else { 1897 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 1898 "predefined entity has no content\n"); 1899 } 1900 } else if ((ent != NULL) && (ent->content != NULL)) { 1901 xmlChar *rep; 1902 1903 ctxt->depth++; 1904 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1905 0, 0, 0); 1906 ctxt->depth--; 1907 if (rep != NULL) { 1908 current = rep; 1909 while (*current != 0) { /* non input consuming loop */ 1910 buffer[nbchars++] = *current++; 1911 if (nbchars > 1912 buffer_size - XML_PARSER_BUFFER_SIZE) { 1913 growBuffer(buffer); 1914 } 1915 } 1916 xmlFree(rep); 1917 } 1918 } else if (ent != NULL) { 1919 int i = xmlStrlen(ent->name); 1920 const xmlChar *cur = ent->name; 1921 1922 buffer[nbchars++] = '&'; 1923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1924 growBuffer(buffer); 1925 } 1926 for (;i > 0;i--) 1927 buffer[nbchars++] = *cur++; 1928 buffer[nbchars++] = ';'; 1929 } 1930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1931 if (xmlParserDebugEntities) 1932 xmlGenericError(xmlGenericErrorContext, 1933 "String decoding PE Reference: %.30s\n", str); 1934 ent = xmlParseStringPEReference(ctxt, &str); 1935 if (ent != NULL) { 1936 xmlChar *rep; 1937 1938 ctxt->depth++; 1939 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1940 0, 0, 0); 1941 ctxt->depth--; 1942 if (rep != NULL) { 1943 current = rep; 1944 while (*current != 0) { /* non input consuming loop */ 1945 buffer[nbchars++] = *current++; 1946 if (nbchars > 1947 buffer_size - XML_PARSER_BUFFER_SIZE) { 1948 growBuffer(buffer); 1949 } 1950 } 1951 xmlFree(rep); 1952 } 1953 } 1954 } else { 1955 COPY_BUF(l,buffer,nbchars,c); 1956 str += l; 1957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 1958 growBuffer(buffer); 1959 } 1960 } 1961 if (str < last) 1962 c = CUR_SCHAR(str, l); 1963 else 1964 c = 0; 1965 } 1966 buffer[nbchars++] = 0; 1967 return(buffer); 1968 1969mem_error: 1970 xmlErrMemory(ctxt, NULL); 1971 return(NULL); 1972} 1973 1974/** 1975 * xmlStringDecodeEntities: 1976 * @ctxt: the parser context 1977 * @str: the input string 1978 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1979 * @end: an end marker xmlChar, 0 if none 1980 * @end2: an end marker xmlChar, 0 if none 1981 * @end3: an end marker xmlChar, 0 if none 1982 * 1983 * Takes a entity string content and process to do the adequate substitutions. 1984 * 1985 * [67] Reference ::= EntityRef | CharRef 1986 * 1987 * [69] PEReference ::= '%' Name ';' 1988 * 1989 * Returns A newly allocated string with the substitution done. The caller 1990 * must deallocate it ! 1991 */ 1992xmlChar * 1993xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 1994 xmlChar end, xmlChar end2, xmlChar end3) { 1995 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 1996 end, end2, end3)); 1997} 1998 1999/************************************************************************ 2000 * * 2001 * Commodity functions to handle xmlChars * 2002 * * 2003 ************************************************************************/ 2004 2005/** 2006 * xmlStrndup: 2007 * @cur: the input xmlChar * 2008 * @len: the len of @cur 2009 * 2010 * a strndup for array of xmlChar's 2011 * 2012 * Returns a new xmlChar * or NULL 2013 */ 2014xmlChar * 2015xmlStrndup(const xmlChar *cur, int len) { 2016 xmlChar *ret; 2017 2018 if ((cur == NULL) || (len < 0)) return(NULL); 2019 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 2020 if (ret == NULL) { 2021 xmlErrMemory(NULL, NULL); 2022 return(NULL); 2023 } 2024 memcpy(ret, cur, len * sizeof(xmlChar)); 2025 ret[len] = 0; 2026 return(ret); 2027} 2028 2029/** 2030 * xmlStrdup: 2031 * @cur: the input xmlChar * 2032 * 2033 * a strdup for array of xmlChar's. Since they are supposed to be 2034 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 2035 * a termination mark of '0'. 2036 * 2037 * Returns a new xmlChar * or NULL 2038 */ 2039xmlChar * 2040xmlStrdup(const xmlChar *cur) { 2041 const xmlChar *p = cur; 2042 2043 if (cur == NULL) return(NULL); 2044 while (*p != 0) p++; /* non input consuming */ 2045 return(xmlStrndup(cur, p - cur)); 2046} 2047 2048/** 2049 * xmlCharStrndup: 2050 * @cur: the input char * 2051 * @len: the len of @cur 2052 * 2053 * a strndup for char's to xmlChar's 2054 * 2055 * Returns a new xmlChar * or NULL 2056 */ 2057 2058xmlChar * 2059xmlCharStrndup(const char *cur, int len) { 2060 int i; 2061 xmlChar *ret; 2062 2063 if ((cur == NULL) || (len < 0)) return(NULL); 2064 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 2065 if (ret == NULL) { 2066 xmlErrMemory(NULL, NULL); 2067 return(NULL); 2068 } 2069 for (i = 0;i < len;i++) 2070 ret[i] = (xmlChar) cur[i]; 2071 ret[len] = 0; 2072 return(ret); 2073} 2074 2075/** 2076 * xmlCharStrdup: 2077 * @cur: the input char * 2078 * 2079 * a strdup for char's to xmlChar's 2080 * 2081 * Returns a new xmlChar * or NULL 2082 */ 2083 2084xmlChar * 2085xmlCharStrdup(const char *cur) { 2086 const char *p = cur; 2087 2088 if (cur == NULL) return(NULL); 2089 while (*p != '\0') p++; /* non input consuming */ 2090 return(xmlCharStrndup(cur, p - cur)); 2091} 2092 2093/** 2094 * xmlStrcmp: 2095 * @str1: the first xmlChar * 2096 * @str2: the second xmlChar * 2097 * 2098 * a strcmp for xmlChar's 2099 * 2100 * Returns the integer result of the comparison 2101 */ 2102 2103int 2104xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 2105 register int tmp; 2106 2107 if (str1 == str2) return(0); 2108 if (str1 == NULL) return(-1); 2109 if (str2 == NULL) return(1); 2110 do { 2111 tmp = *str1++ - *str2; 2112 if (tmp != 0) return(tmp); 2113 } while (*str2++ != 0); 2114 return 0; 2115} 2116 2117/** 2118 * xmlStrEqual: 2119 * @str1: the first xmlChar * 2120 * @str2: the second xmlChar * 2121 * 2122 * Check if both string are equal of have same content 2123 * Should be a bit more readable and faster than xmlStrEqual() 2124 * 2125 * Returns 1 if they are equal, 0 if they are different 2126 */ 2127 2128int 2129xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 2130 if (str1 == str2) return(1); 2131 if (str1 == NULL) return(0); 2132 if (str2 == NULL) return(0); 2133 do { 2134 if (*str1++ != *str2) return(0); 2135 } while (*str2++); 2136 return(1); 2137} 2138 2139/** 2140 * xmlStrQEqual: 2141 * @pref: the prefix of the QName 2142 * @name: the localname of the QName 2143 * @str: the second xmlChar * 2144 * 2145 * Check if a QName is Equal to a given string 2146 * 2147 * Returns 1 if they are equal, 0 if they are different 2148 */ 2149 2150int 2151xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { 2152 if (pref == NULL) return(xmlStrEqual(name, str)); 2153 if (name == NULL) return(0); 2154 if (str == NULL) return(0); 2155 2156 do { 2157 if (*pref++ != *str) return(0); 2158 } while ((*str++) && (*pref)); 2159 if (*str++ != ':') return(0); 2160 do { 2161 if (*name++ != *str) return(0); 2162 } while (*str++); 2163 return(1); 2164} 2165 2166/** 2167 * xmlStrncmp: 2168 * @str1: the first xmlChar * 2169 * @str2: the second xmlChar * 2170 * @len: the max comparison length 2171 * 2172 * a strncmp for xmlChar's 2173 * 2174 * Returns the integer result of the comparison 2175 */ 2176 2177int 2178xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 2179 register int tmp; 2180 2181 if (len <= 0) return(0); 2182 if (str1 == str2) return(0); 2183 if (str1 == NULL) return(-1); 2184 if (str2 == NULL) return(1); 2185 do { 2186 tmp = *str1++ - *str2; 2187 if (tmp != 0 || --len == 0) return(tmp); 2188 } while (*str2++ != 0); 2189 return 0; 2190} 2191 2192static const xmlChar casemap[256] = { 2193 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 2194 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 2195 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 2196 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 2197 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 2198 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 2199 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 2200 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 2201 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 2202 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 2203 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 2204 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 2205 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 2206 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 2207 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 2208 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 2209 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 2210 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 2211 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 2212 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 2213 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 2214 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 2215 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 2216 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 2217 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 2218 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 2219 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 2220 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 2221 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 2222 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 2223 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 2224 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 2225}; 2226 2227/** 2228 * xmlStrcasecmp: 2229 * @str1: the first xmlChar * 2230 * @str2: the second xmlChar * 2231 * 2232 * a strcasecmp for xmlChar's 2233 * 2234 * Returns the integer result of the comparison 2235 */ 2236 2237int 2238xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 2239 register int tmp; 2240 2241 if (str1 == str2) return(0); 2242 if (str1 == NULL) return(-1); 2243 if (str2 == NULL) return(1); 2244 do { 2245 tmp = casemap[*str1++] - casemap[*str2]; 2246 if (tmp != 0) return(tmp); 2247 } while (*str2++ != 0); 2248 return 0; 2249} 2250 2251/** 2252 * xmlStrncasecmp: 2253 * @str1: the first xmlChar * 2254 * @str2: the second xmlChar * 2255 * @len: the max comparison length 2256 * 2257 * a strncasecmp for xmlChar's 2258 * 2259 * Returns the integer result of the comparison 2260 */ 2261 2262int 2263xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 2264 register int tmp; 2265 2266 if (len <= 0) return(0); 2267 if (str1 == str2) return(0); 2268 if (str1 == NULL) return(-1); 2269 if (str2 == NULL) return(1); 2270 do { 2271 tmp = casemap[*str1++] - casemap[*str2]; 2272 if (tmp != 0 || --len == 0) return(tmp); 2273 } while (*str2++ != 0); 2274 return 0; 2275} 2276 2277/** 2278 * xmlStrchr: 2279 * @str: the xmlChar * array 2280 * @val: the xmlChar to search 2281 * 2282 * a strchr for xmlChar's 2283 * 2284 * Returns the xmlChar * for the first occurrence or NULL. 2285 */ 2286 2287const xmlChar * 2288xmlStrchr(const xmlChar *str, xmlChar val) { 2289 if (str == NULL) return(NULL); 2290 while (*str != 0) { /* non input consuming */ 2291 if (*str == val) return((xmlChar *) str); 2292 str++; 2293 } 2294 return(NULL); 2295} 2296 2297/** 2298 * xmlStrstr: 2299 * @str: the xmlChar * array (haystack) 2300 * @val: the xmlChar to search (needle) 2301 * 2302 * a strstr for xmlChar's 2303 * 2304 * Returns the xmlChar * for the first occurrence or NULL. 2305 */ 2306 2307const xmlChar * 2308xmlStrstr(const xmlChar *str, const xmlChar *val) { 2309 int n; 2310 2311 if (str == NULL) return(NULL); 2312 if (val == NULL) return(NULL); 2313 n = xmlStrlen(val); 2314 2315 if (n == 0) return(str); 2316 while (*str != 0) { /* non input consuming */ 2317 if (*str == *val) { 2318 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 2319 } 2320 str++; 2321 } 2322 return(NULL); 2323} 2324 2325/** 2326 * xmlStrcasestr: 2327 * @str: the xmlChar * array (haystack) 2328 * @val: the xmlChar to search (needle) 2329 * 2330 * a case-ignoring strstr for xmlChar's 2331 * 2332 * Returns the xmlChar * for the first occurrence or NULL. 2333 */ 2334 2335const xmlChar * 2336xmlStrcasestr(const xmlChar *str, xmlChar *val) { 2337 int n; 2338 2339 if (str == NULL) return(NULL); 2340 if (val == NULL) return(NULL); 2341 n = xmlStrlen(val); 2342 2343 if (n == 0) return(str); 2344 while (*str != 0) { /* non input consuming */ 2345 if (casemap[*str] == casemap[*val]) 2346 if (!xmlStrncasecmp(str, val, n)) return(str); 2347 str++; 2348 } 2349 return(NULL); 2350} 2351 2352/** 2353 * xmlStrsub: 2354 * @str: the xmlChar * array (haystack) 2355 * @start: the index of the first char (zero based) 2356 * @len: the length of the substring 2357 * 2358 * Extract a substring of a given string 2359 * 2360 * Returns the xmlChar * for the first occurrence or NULL. 2361 */ 2362 2363xmlChar * 2364xmlStrsub(const xmlChar *str, int start, int len) { 2365 int i; 2366 2367 if (str == NULL) return(NULL); 2368 if (start < 0) return(NULL); 2369 if (len < 0) return(NULL); 2370 2371 for (i = 0;i < start;i++) { 2372 if (*str == 0) return(NULL); 2373 str++; 2374 } 2375 if (*str == 0) return(NULL); 2376 return(xmlStrndup(str, len)); 2377} 2378 2379/** 2380 * xmlStrlen: 2381 * @str: the xmlChar * array 2382 * 2383 * length of a xmlChar's string 2384 * 2385 * Returns the number of xmlChar contained in the ARRAY. 2386 */ 2387 2388int 2389xmlStrlen(const xmlChar *str) { 2390 int len = 0; 2391 2392 if (str == NULL) return(0); 2393 while (*str != 0) { /* non input consuming */ 2394 str++; 2395 len++; 2396 } 2397 return(len); 2398} 2399 2400/** 2401 * xmlStrncat: 2402 * @cur: the original xmlChar * array 2403 * @add: the xmlChar * array added 2404 * @len: the length of @add 2405 * 2406 * a strncat for array of xmlChar's, it will extend @cur with the len 2407 * first bytes of @add. 2408 * 2409 * Returns a new xmlChar *, the original @cur is reallocated if needed 2410 * and should not be freed 2411 */ 2412 2413xmlChar * 2414xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 2415 int size; 2416 xmlChar *ret; 2417 2418 if ((add == NULL) || (len == 0)) 2419 return(cur); 2420 if (cur == NULL) 2421 return(xmlStrndup(add, len)); 2422 2423 size = xmlStrlen(cur); 2424 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 2425 if (ret == NULL) { 2426 xmlErrMemory(NULL, NULL); 2427 return(cur); 2428 } 2429 memcpy(&ret[size], add, len * sizeof(xmlChar)); 2430 ret[size + len] = 0; 2431 return(ret); 2432} 2433 2434/** 2435 * xmlStrcat: 2436 * @cur: the original xmlChar * array 2437 * @add: the xmlChar * array added 2438 * 2439 * a strcat for array of xmlChar's. Since they are supposed to be 2440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume 2441 * a termination mark of '0'. 2442 * 2443 * Returns a new xmlChar * containing the concatenated string. 2444 */ 2445xmlChar * 2446xmlStrcat(xmlChar *cur, const xmlChar *add) { 2447 const xmlChar *p = add; 2448 2449 if (add == NULL) return(cur); 2450 if (cur == NULL) 2451 return(xmlStrdup(add)); 2452 2453 while (*p != 0) p++; /* non input consuming */ 2454 return(xmlStrncat(cur, add, p - add)); 2455} 2456 2457/** 2458 * xmlStrPrintf: 2459 * @buf: the result buffer. 2460 * @len: the result buffer length. 2461 * @msg: the message with printf formatting. 2462 * @...: extra parameters for the message. 2463 * 2464 * Formats @msg and places result into @buf. 2465 * 2466 * Returns the number of characters written to @buf or -1 if an error occurs. 2467 */ 2468int 2469xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) { 2470 va_list args; 2471 int ret; 2472 2473 if((buf == NULL) || (msg == NULL)) { 2474 return(-1); 2475 } 2476 2477 va_start(args, msg); 2478 ret = vsnprintf((char *) buf, len, (const char *) msg, args); 2479 va_end(args); 2480 buf[len - 1] = 0; /* be safe ! */ 2481 2482 return(ret); 2483} 2484 2485/************************************************************************ 2486 * * 2487 * Commodity functions, cleanup needed ? * 2488 * * 2489 ************************************************************************/ 2490 2491/** 2492 * areBlanks: 2493 * @ctxt: an XML parser context 2494 * @str: a xmlChar * 2495 * @len: the size of @str 2496 * 2497 * Is this a sequence of blank chars that one can ignore ? 2498 * 2499 * Returns 1 if ignorable 0 otherwise. 2500 */ 2501 2502static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 2503 int i, ret; 2504 xmlNodePtr lastChild; 2505 2506 /* 2507 * Don't spend time trying to differentiate them, the same callback is 2508 * used ! 2509 */ 2510 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2511 return(0); 2512 2513 /* 2514 * Check for xml:space value. 2515 */ 2516 if (*(ctxt->space) == 1) 2517 return(0); 2518 2519 /* 2520 * Check that the string is made of blanks 2521 */ 2522 for (i = 0;i < len;i++) 2523 if (!(IS_BLANK_CH(str[i]))) return(0); 2524 2525 /* 2526 * Look if the element is mixed content in the DTD if available 2527 */ 2528 if (ctxt->node == NULL) return(0); 2529 if (ctxt->myDoc != NULL) { 2530 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2531 if (ret == 0) return(1); 2532 if (ret == 1) return(0); 2533 } 2534 2535 /* 2536 * Otherwise, heuristic :-\ 2537 */ 2538 if (RAW != '<') return(0); 2539 if ((ctxt->node->children == NULL) && 2540 (RAW == '<') && (NXT(1) == '/')) return(0); 2541 2542 lastChild = xmlGetLastChild(ctxt->node); 2543 if (lastChild == NULL) { 2544 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2545 (ctxt->node->content != NULL)) return(0); 2546 } else if (xmlNodeIsText(lastChild)) 2547 return(0); 2548 else if ((ctxt->node->children != NULL) && 2549 (xmlNodeIsText(ctxt->node->children))) 2550 return(0); 2551 return(1); 2552} 2553 2554/************************************************************************ 2555 * * 2556 * Extra stuff for namespace support * 2557 * Relates to http://www.w3.org/TR/WD-xml-names * 2558 * * 2559 ************************************************************************/ 2560 2561/** 2562 * xmlSplitQName: 2563 * @ctxt: an XML parser context 2564 * @name: an XML parser context 2565 * @prefix: a xmlChar ** 2566 * 2567 * parse an UTF8 encoded XML qualified name string 2568 * 2569 * [NS 5] QName ::= (Prefix ':')? LocalPart 2570 * 2571 * [NS 6] Prefix ::= NCName 2572 * 2573 * [NS 7] LocalPart ::= NCName 2574 * 2575 * Returns the local part, and prefix is updated 2576 * to get the Prefix if any. 2577 */ 2578 2579xmlChar * 2580xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2581 xmlChar buf[XML_MAX_NAMELEN + 5]; 2582 xmlChar *buffer = NULL; 2583 int len = 0; 2584 int max = XML_MAX_NAMELEN; 2585 xmlChar *ret = NULL; 2586 const xmlChar *cur = name; 2587 int c; 2588 2589 *prefix = NULL; 2590 2591 if (cur == NULL) return(NULL); 2592 2593#ifndef XML_XML_NAMESPACE 2594 /* xml: prefix is not really a namespace */ 2595 if ((cur[0] == 'x') && (cur[1] == 'm') && 2596 (cur[2] == 'l') && (cur[3] == ':')) 2597 return(xmlStrdup(name)); 2598#endif 2599 2600 /* nasty but well=formed */ 2601 if (cur[0] == ':') 2602 return(xmlStrdup(name)); 2603 2604 c = *cur++; 2605 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2606 buf[len++] = c; 2607 c = *cur++; 2608 } 2609 if (len >= max) { 2610 /* 2611 * Okay someone managed to make a huge name, so he's ready to pay 2612 * for the processing speed. 2613 */ 2614 max = len * 2; 2615 2616 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2617 if (buffer == NULL) { 2618 xmlErrMemory(ctxt, NULL); 2619 return(NULL); 2620 } 2621 memcpy(buffer, buf, len); 2622 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2623 if (len + 10 > max) { 2624 max *= 2; 2625 buffer = (xmlChar *) xmlRealloc(buffer, 2626 max * sizeof(xmlChar)); 2627 if (buffer == NULL) { 2628 xmlErrMemory(ctxt, NULL); 2629 return(NULL); 2630 } 2631 } 2632 buffer[len++] = c; 2633 c = *cur++; 2634 } 2635 buffer[len] = 0; 2636 } 2637 2638 /* nasty but well=formed 2639 if ((c == ':') && (*cur == 0)) { 2640 return(xmlStrdup(name)); 2641 } */ 2642 2643 if (buffer == NULL) 2644 ret = xmlStrndup(buf, len); 2645 else { 2646 ret = buffer; 2647 buffer = NULL; 2648 max = XML_MAX_NAMELEN; 2649 } 2650 2651 2652 if (c == ':') { 2653 c = *cur; 2654 *prefix = ret; 2655 if (c == 0) { 2656 return(xmlStrndup(BAD_CAST "", 0)); 2657 } 2658 len = 0; 2659 2660 /* 2661 * Check that the first character is proper to start 2662 * a new name 2663 */ 2664 if (!(((c >= 0x61) && (c <= 0x7A)) || 2665 ((c >= 0x41) && (c <= 0x5A)) || 2666 (c == '_') || (c == ':'))) { 2667 int l; 2668 int first = CUR_SCHAR(cur, l); 2669 2670 if (!IS_LETTER(first) && (first != '_')) { 2671 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2672 "Name %s is not XML Namespace compliant\n", 2673 name); 2674 } 2675 } 2676 cur++; 2677 2678 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2679 buf[len++] = c; 2680 c = *cur++; 2681 } 2682 if (len >= max) { 2683 /* 2684 * Okay someone managed to make a huge name, so he's ready to pay 2685 * for the processing speed. 2686 */ 2687 max = len * 2; 2688 2689 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2690 if (buffer == NULL) { 2691 xmlErrMemory(ctxt, NULL); 2692 return(NULL); 2693 } 2694 memcpy(buffer, buf, len); 2695 while (c != 0) { /* tested bigname2.xml */ 2696 if (len + 10 > max) { 2697 max *= 2; 2698 buffer = (xmlChar *) xmlRealloc(buffer, 2699 max * sizeof(xmlChar)); 2700 if (buffer == NULL) { 2701 xmlErrMemory(ctxt, NULL); 2702 return(NULL); 2703 } 2704 } 2705 buffer[len++] = c; 2706 c = *cur++; 2707 } 2708 buffer[len] = 0; 2709 } 2710 2711 if (buffer == NULL) 2712 ret = xmlStrndup(buf, len); 2713 else { 2714 ret = buffer; 2715 } 2716 } 2717 2718 return(ret); 2719} 2720 2721/************************************************************************ 2722 * * 2723 * The parser itself * 2724 * Relates to http://www.w3.org/TR/REC-xml * 2725 * * 2726 ************************************************************************/ 2727 2728static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2729static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2730 int *len, int *alloc, int normalize); 2731 2732/** 2733 * xmlParseName: 2734 * @ctxt: an XML parser context 2735 * 2736 * parse an XML name. 2737 * 2738 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2739 * CombiningChar | Extender 2740 * 2741 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2742 * 2743 * [6] Names ::= Name (S Name)* 2744 * 2745 * Returns the Name parsed or NULL 2746 */ 2747 2748const xmlChar * 2749xmlParseName(xmlParserCtxtPtr ctxt) { 2750 const xmlChar *in; 2751 const xmlChar *ret; 2752 int count = 0; 2753 2754 GROW; 2755 2756 /* 2757 * Accelerator for simple ASCII names 2758 */ 2759 in = ctxt->input->cur; 2760 if (((*in >= 0x61) && (*in <= 0x7A)) || 2761 ((*in >= 0x41) && (*in <= 0x5A)) || 2762 (*in == '_') || (*in == ':')) { 2763 in++; 2764 while (((*in >= 0x61) && (*in <= 0x7A)) || 2765 ((*in >= 0x41) && (*in <= 0x5A)) || 2766 ((*in >= 0x30) && (*in <= 0x39)) || 2767 (*in == '_') || (*in == '-') || 2768 (*in == ':') || (*in == '.')) 2769 in++; 2770 if ((*in > 0) && (*in < 0x80)) { 2771 count = in - ctxt->input->cur; 2772 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2773 ctxt->input->cur = in; 2774 ctxt->nbChars += count; 2775 ctxt->input->col += count; 2776 if (ret == NULL) 2777 xmlErrMemory(ctxt, NULL); 2778 return(ret); 2779 } 2780 } 2781 return(xmlParseNameComplex(ctxt)); 2782} 2783 2784/** 2785 * xmlParseNameAndCompare: 2786 * @ctxt: an XML parser context 2787 * 2788 * parse an XML name and compares for match 2789 * (specialized for endtag parsing) 2790 * 2791 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2792 * and the name for mismatch 2793 */ 2794 2795static const xmlChar * 2796xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2797 const xmlChar *cmp = other; 2798 const xmlChar *in; 2799 const xmlChar *ret; 2800 2801 GROW; 2802 2803 in = ctxt->input->cur; 2804 while (*in != 0 && *in == *cmp) { 2805 ++in; 2806 ++cmp; 2807 } 2808 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2809 /* success */ 2810 ctxt->input->cur = in; 2811 return (const xmlChar*) 1; 2812 } 2813 /* failure (or end of input buffer), check with full function */ 2814 ret = xmlParseName (ctxt); 2815 /* strings coming from the dictionnary direct compare possible */ 2816 if (ret == other) { 2817 return (const xmlChar*) 1; 2818 } 2819 return ret; 2820} 2821 2822static const xmlChar * 2823xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2824 int len = 0, l; 2825 int c; 2826 int count = 0; 2827 2828 /* 2829 * Handler for more complex cases 2830 */ 2831 GROW; 2832 c = CUR_CHAR(l); 2833 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2834 (!IS_LETTER(c) && (c != '_') && 2835 (c != ':'))) { 2836 return(NULL); 2837 } 2838 2839 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2840 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2841 (c == '.') || (c == '-') || 2842 (c == '_') || (c == ':') || 2843 (IS_COMBINING(c)) || 2844 (IS_EXTENDER(c)))) { 2845 if (count++ > 100) { 2846 count = 0; 2847 GROW; 2848 } 2849 len += l; 2850 NEXTL(l); 2851 c = CUR_CHAR(l); 2852 } 2853 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2854} 2855 2856/** 2857 * xmlParseStringName: 2858 * @ctxt: an XML parser context 2859 * @str: a pointer to the string pointer (IN/OUT) 2860 * 2861 * parse an XML name. 2862 * 2863 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2864 * CombiningChar | Extender 2865 * 2866 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2867 * 2868 * [6] Names ::= Name (S Name)* 2869 * 2870 * Returns the Name parsed or NULL. The @str pointer 2871 * is updated to the current location in the string. 2872 */ 2873 2874static xmlChar * 2875xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2876 xmlChar buf[XML_MAX_NAMELEN + 5]; 2877 const xmlChar *cur = *str; 2878 int len = 0, l; 2879 int c; 2880 2881 c = CUR_SCHAR(cur, l); 2882 if (!IS_LETTER(c) && (c != '_') && 2883 (c != ':')) { 2884 return(NULL); 2885 } 2886 2887 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2888 (c == '.') || (c == '-') || 2889 (c == '_') || (c == ':') || 2890 (IS_COMBINING(c)) || 2891 (IS_EXTENDER(c))) { 2892 COPY_BUF(l,buf,len,c); 2893 cur += l; 2894 c = CUR_SCHAR(cur, l); 2895 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2896 /* 2897 * Okay someone managed to make a huge name, so he's ready to pay 2898 * for the processing speed. 2899 */ 2900 xmlChar *buffer; 2901 int max = len * 2; 2902 2903 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2904 if (buffer == NULL) { 2905 xmlErrMemory(ctxt, NULL); 2906 return(NULL); 2907 } 2908 memcpy(buffer, buf, len); 2909 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2910 /* test bigentname.xml */ 2911 (c == '.') || (c == '-') || 2912 (c == '_') || (c == ':') || 2913 (IS_COMBINING(c)) || 2914 (IS_EXTENDER(c))) { 2915 if (len + 10 > max) { 2916 max *= 2; 2917 buffer = (xmlChar *) xmlRealloc(buffer, 2918 max * sizeof(xmlChar)); 2919 if (buffer == NULL) { 2920 xmlErrMemory(ctxt, NULL); 2921 return(NULL); 2922 } 2923 } 2924 COPY_BUF(l,buffer,len,c); 2925 cur += l; 2926 c = CUR_SCHAR(cur, l); 2927 } 2928 buffer[len] = 0; 2929 *str = cur; 2930 return(buffer); 2931 } 2932 } 2933 *str = cur; 2934 return(xmlStrndup(buf, len)); 2935} 2936 2937/** 2938 * xmlParseNmtoken: 2939 * @ctxt: an XML parser context 2940 * 2941 * parse an XML Nmtoken. 2942 * 2943 * [7] Nmtoken ::= (NameChar)+ 2944 * 2945 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2946 * 2947 * Returns the Nmtoken parsed or NULL 2948 */ 2949 2950xmlChar * 2951xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2952 xmlChar buf[XML_MAX_NAMELEN + 5]; 2953 int len = 0, l; 2954 int c; 2955 int count = 0; 2956 2957 GROW; 2958 c = CUR_CHAR(l); 2959 2960 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2961 (c == '.') || (c == '-') || 2962 (c == '_') || (c == ':') || 2963 (IS_COMBINING(c)) || 2964 (IS_EXTENDER(c))) { 2965 if (count++ > 100) { 2966 count = 0; 2967 GROW; 2968 } 2969 COPY_BUF(l,buf,len,c); 2970 NEXTL(l); 2971 c = CUR_CHAR(l); 2972 if (len >= XML_MAX_NAMELEN) { 2973 /* 2974 * Okay someone managed to make a huge token, so he's ready to pay 2975 * for the processing speed. 2976 */ 2977 xmlChar *buffer; 2978 int max = len * 2; 2979 2980 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2981 if (buffer == NULL) { 2982 xmlErrMemory(ctxt, NULL); 2983 return(NULL); 2984 } 2985 memcpy(buffer, buf, len); 2986 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2987 (c == '.') || (c == '-') || 2988 (c == '_') || (c == ':') || 2989 (IS_COMBINING(c)) || 2990 (IS_EXTENDER(c))) { 2991 if (count++ > 100) { 2992 count = 0; 2993 GROW; 2994 } 2995 if (len + 10 > max) { 2996 max *= 2; 2997 buffer = (xmlChar *) xmlRealloc(buffer, 2998 max * sizeof(xmlChar)); 2999 if (buffer == NULL) { 3000 xmlErrMemory(ctxt, NULL); 3001 return(NULL); 3002 } 3003 } 3004 COPY_BUF(l,buffer,len,c); 3005 NEXTL(l); 3006 c = CUR_CHAR(l); 3007 } 3008 buffer[len] = 0; 3009 return(buffer); 3010 } 3011 } 3012 if (len == 0) 3013 return(NULL); 3014 return(xmlStrndup(buf, len)); 3015} 3016 3017/** 3018 * xmlParseEntityValue: 3019 * @ctxt: an XML parser context 3020 * @orig: if non-NULL store a copy of the original entity value 3021 * 3022 * parse a value for ENTITY declarations 3023 * 3024 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3025 * "'" ([^%&'] | PEReference | Reference)* "'" 3026 * 3027 * Returns the EntityValue parsed with reference substituted or NULL 3028 */ 3029 3030xmlChar * 3031xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3032 xmlChar *buf = NULL; 3033 int len = 0; 3034 int size = XML_PARSER_BUFFER_SIZE; 3035 int c, l; 3036 xmlChar stop; 3037 xmlChar *ret = NULL; 3038 const xmlChar *cur = NULL; 3039 xmlParserInputPtr input; 3040 3041 if (RAW == '"') stop = '"'; 3042 else if (RAW == '\'') stop = '\''; 3043 else { 3044 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3045 return(NULL); 3046 } 3047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3048 if (buf == NULL) { 3049 xmlErrMemory(ctxt, NULL); 3050 return(NULL); 3051 } 3052 3053 /* 3054 * The content of the entity definition is copied in a buffer. 3055 */ 3056 3057 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3058 input = ctxt->input; 3059 GROW; 3060 NEXT; 3061 c = CUR_CHAR(l); 3062 /* 3063 * NOTE: 4.4.5 Included in Literal 3064 * When a parameter entity reference appears in a literal entity 3065 * value, ... a single or double quote character in the replacement 3066 * text is always treated as a normal data character and will not 3067 * terminate the literal. 3068 * In practice it means we stop the loop only when back at parsing 3069 * the initial entity and the quote is found 3070 */ 3071 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3072 (ctxt->input != input))) { 3073 if (len + 5 >= size) { 3074 size *= 2; 3075 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3076 if (buf == NULL) { 3077 xmlErrMemory(ctxt, NULL); 3078 return(NULL); 3079 } 3080 } 3081 COPY_BUF(l,buf,len,c); 3082 NEXTL(l); 3083 /* 3084 * Pop-up of finished entities. 3085 */ 3086 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3087 xmlPopInput(ctxt); 3088 3089 GROW; 3090 c = CUR_CHAR(l); 3091 if (c == 0) { 3092 GROW; 3093 c = CUR_CHAR(l); 3094 } 3095 } 3096 buf[len] = 0; 3097 3098 /* 3099 * Raise problem w.r.t. '&' and '%' being used in non-entities 3100 * reference constructs. Note Charref will be handled in 3101 * xmlStringDecodeEntities() 3102 */ 3103 cur = buf; 3104 while (*cur != 0) { /* non input consuming */ 3105 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3106 xmlChar *name; 3107 xmlChar tmp = *cur; 3108 3109 cur++; 3110 name = xmlParseStringName(ctxt, &cur); 3111 if ((name == NULL) || (*cur != ';')) { 3112 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3113 "EntityValue: '%c' forbidden except for entities references\n", 3114 tmp); 3115 } 3116 if ((tmp == '%') && (ctxt->inSubset == 1) && 3117 (ctxt->inputNr == 1)) { 3118 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3119 } 3120 if (name != NULL) 3121 xmlFree(name); 3122 if (*cur == 0) 3123 break; 3124 } 3125 cur++; 3126 } 3127 3128 /* 3129 * Then PEReference entities are substituted. 3130 */ 3131 if (c != stop) { 3132 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3133 xmlFree(buf); 3134 } else { 3135 NEXT; 3136 /* 3137 * NOTE: 4.4.7 Bypassed 3138 * When a general entity reference appears in the EntityValue in 3139 * an entity declaration, it is bypassed and left as is. 3140 * so XML_SUBSTITUTE_REF is not set here. 3141 */ 3142 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3143 0, 0, 0); 3144 if (orig != NULL) 3145 *orig = buf; 3146 else 3147 xmlFree(buf); 3148 } 3149 3150 return(ret); 3151} 3152 3153/** 3154 * xmlParseAttValueComplex: 3155 * @ctxt: an XML parser context 3156 * @len: the resulting attribute len 3157 * @normalize: wether to apply the inner normalization 3158 * 3159 * parse a value for an attribute, this is the fallback function 3160 * of xmlParseAttValue() when the attribute parsing requires handling 3161 * of non-ASCII characters, or normalization compaction. 3162 * 3163 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3164 */ 3165static xmlChar * 3166xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3167 xmlChar limit = 0; 3168 xmlChar *buf = NULL; 3169 int len = 0; 3170 int buf_size = 0; 3171 int c, l, in_space = 0; 3172 xmlChar *current = NULL; 3173 xmlEntityPtr ent; 3174 3175 if (NXT(0) == '"') { 3176 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3177 limit = '"'; 3178 NEXT; 3179 } else if (NXT(0) == '\'') { 3180 limit = '\''; 3181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3182 NEXT; 3183 } else { 3184 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3185 return(NULL); 3186 } 3187 3188 /* 3189 * allocate a translation buffer. 3190 */ 3191 buf_size = XML_PARSER_BUFFER_SIZE; 3192 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3193 if (buf == NULL) goto mem_error; 3194 3195 /* 3196 * OK loop until we reach one of the ending char or a size limit. 3197 */ 3198 c = CUR_CHAR(l); 3199 while ((NXT(0) != limit) && /* checked */ 3200 (c != '<')) { 3201 if (c == 0) break; 3202 if (c == '&') { 3203 in_space = 0; 3204 if (NXT(1) == '#') { 3205 int val = xmlParseCharRef(ctxt); 3206 3207 if (val == '&') { 3208 if (ctxt->replaceEntities) { 3209 if (len > buf_size - 10) { 3210 growBuffer(buf); 3211 } 3212 buf[len++] = '&'; 3213 } else { 3214 /* 3215 * The reparsing will be done in xmlStringGetNodeList() 3216 * called by the attribute() function in SAX.c 3217 */ 3218 if (len > buf_size - 10) { 3219 growBuffer(buf); 3220 } 3221 buf[len++] = '&'; 3222 buf[len++] = '#'; 3223 buf[len++] = '3'; 3224 buf[len++] = '8'; 3225 buf[len++] = ';'; 3226 } 3227 } else { 3228 if (len > buf_size - 10) { 3229 growBuffer(buf); 3230 } 3231 len += xmlCopyChar(0, &buf[len], val); 3232 } 3233 } else { 3234 ent = xmlParseEntityRef(ctxt); 3235 if ((ent != NULL) && 3236 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3237 if (len > buf_size - 10) { 3238 growBuffer(buf); 3239 } 3240 if ((ctxt->replaceEntities == 0) && 3241 (ent->content[0] == '&')) { 3242 buf[len++] = '&'; 3243 buf[len++] = '#'; 3244 buf[len++] = '3'; 3245 buf[len++] = '8'; 3246 buf[len++] = ';'; 3247 } else { 3248 buf[len++] = ent->content[0]; 3249 } 3250 } else if ((ent != NULL) && 3251 (ctxt->replaceEntities != 0)) { 3252 xmlChar *rep; 3253 3254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3255 rep = xmlStringDecodeEntities(ctxt, ent->content, 3256 XML_SUBSTITUTE_REF, 3257 0, 0, 0); 3258 if (rep != NULL) { 3259 current = rep; 3260 while (*current != 0) { /* non input consuming */ 3261 buf[len++] = *current++; 3262 if (len > buf_size - 10) { 3263 growBuffer(buf); 3264 } 3265 } 3266 xmlFree(rep); 3267 } 3268 } else { 3269 if (len > buf_size - 10) { 3270 growBuffer(buf); 3271 } 3272 if (ent->content != NULL) 3273 buf[len++] = ent->content[0]; 3274 } 3275 } else if (ent != NULL) { 3276 int i = xmlStrlen(ent->name); 3277 const xmlChar *cur = ent->name; 3278 3279 /* 3280 * This may look absurd but is needed to detect 3281 * entities problems 3282 */ 3283 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3284 (ent->content != NULL)) { 3285 xmlChar *rep; 3286 rep = xmlStringDecodeEntities(ctxt, ent->content, 3287 XML_SUBSTITUTE_REF, 0, 0, 0); 3288 if (rep != NULL) 3289 xmlFree(rep); 3290 } 3291 3292 /* 3293 * Just output the reference 3294 */ 3295 buf[len++] = '&'; 3296 if (len > buf_size - i - 10) { 3297 growBuffer(buf); 3298 } 3299 for (;i > 0;i--) 3300 buf[len++] = *cur++; 3301 buf[len++] = ';'; 3302 } 3303 } 3304 } else { 3305 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3306 if ((len != 0) || (!normalize)) { 3307 if ((!normalize) || (!in_space)) { 3308 COPY_BUF(l,buf,len,0x20); 3309 if (len > buf_size - 10) { 3310 growBuffer(buf); 3311 } 3312 } 3313 in_space = 1; 3314 } 3315 } else { 3316 in_space = 0; 3317 COPY_BUF(l,buf,len,c); 3318 if (len > buf_size - 10) { 3319 growBuffer(buf); 3320 } 3321 } 3322 NEXTL(l); 3323 } 3324 GROW; 3325 c = CUR_CHAR(l); 3326 } 3327 if ((in_space) && (normalize)) { 3328 while (buf[len - 1] == 0x20) len--; 3329 } 3330 buf[len] = 0; 3331 if (RAW == '<') { 3332 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3333 } else if (RAW != limit) { 3334 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3335 "AttValue: ' expected\n"); 3336 } else 3337 NEXT; 3338 if (attlen != NULL) *attlen = len; 3339 return(buf); 3340 3341mem_error: 3342 xmlErrMemory(ctxt, NULL); 3343 return(NULL); 3344} 3345 3346/** 3347 * xmlParseAttValue: 3348 * @ctxt: an XML parser context 3349 * 3350 * parse a value for an attribute 3351 * Note: the parser won't do substitution of entities here, this 3352 * will be handled later in xmlStringGetNodeList 3353 * 3354 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3355 * "'" ([^<&'] | Reference)* "'" 3356 * 3357 * 3.3.3 Attribute-Value Normalization: 3358 * Before the value of an attribute is passed to the application or 3359 * checked for validity, the XML processor must normalize it as follows: 3360 * - a character reference is processed by appending the referenced 3361 * character to the attribute value 3362 * - an entity reference is processed by recursively processing the 3363 * replacement text of the entity 3364 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3365 * appending #x20 to the normalized value, except that only a single 3366 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3367 * parsed entity or the literal entity value of an internal parsed entity 3368 * - other characters are processed by appending them to the normalized value 3369 * If the declared value is not CDATA, then the XML processor must further 3370 * process the normalized attribute value by discarding any leading and 3371 * trailing space (#x20) characters, and by replacing sequences of space 3372 * (#x20) characters by a single space (#x20) character. 3373 * All attributes for which no declaration has been read should be treated 3374 * by a non-validating parser as if declared CDATA. 3375 * 3376 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3377 */ 3378 3379 3380xmlChar * 3381xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3382 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3383} 3384 3385/** 3386 * xmlParseSystemLiteral: 3387 * @ctxt: an XML parser context 3388 * 3389 * parse an XML Literal 3390 * 3391 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3392 * 3393 * Returns the SystemLiteral parsed or NULL 3394 */ 3395 3396xmlChar * 3397xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3398 xmlChar *buf = NULL; 3399 int len = 0; 3400 int size = XML_PARSER_BUFFER_SIZE; 3401 int cur, l; 3402 xmlChar stop; 3403 int state = ctxt->instate; 3404 int count = 0; 3405 3406 SHRINK; 3407 if (RAW == '"') { 3408 NEXT; 3409 stop = '"'; 3410 } else if (RAW == '\'') { 3411 NEXT; 3412 stop = '\''; 3413 } else { 3414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3415 return(NULL); 3416 } 3417 3418 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3419 if (buf == NULL) { 3420 xmlErrMemory(ctxt, NULL); 3421 return(NULL); 3422 } 3423 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3424 cur = CUR_CHAR(l); 3425 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3426 if (len + 5 >= size) { 3427 size *= 2; 3428 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3429 if (buf == NULL) { 3430 xmlErrMemory(ctxt, NULL); 3431 ctxt->instate = (xmlParserInputState) state; 3432 return(NULL); 3433 } 3434 } 3435 count++; 3436 if (count > 50) { 3437 GROW; 3438 count = 0; 3439 } 3440 COPY_BUF(l,buf,len,cur); 3441 NEXTL(l); 3442 cur = CUR_CHAR(l); 3443 if (cur == 0) { 3444 GROW; 3445 SHRINK; 3446 cur = CUR_CHAR(l); 3447 } 3448 } 3449 buf[len] = 0; 3450 ctxt->instate = (xmlParserInputState) state; 3451 if (!IS_CHAR(cur)) { 3452 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3453 } else { 3454 NEXT; 3455 } 3456 return(buf); 3457} 3458 3459/** 3460 * xmlParsePubidLiteral: 3461 * @ctxt: an XML parser context 3462 * 3463 * parse an XML public literal 3464 * 3465 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3466 * 3467 * Returns the PubidLiteral parsed or NULL. 3468 */ 3469 3470xmlChar * 3471xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3472 xmlChar *buf = NULL; 3473 int len = 0; 3474 int size = XML_PARSER_BUFFER_SIZE; 3475 xmlChar cur; 3476 xmlChar stop; 3477 int count = 0; 3478 xmlParserInputState oldstate = ctxt->instate; 3479 3480 SHRINK; 3481 if (RAW == '"') { 3482 NEXT; 3483 stop = '"'; 3484 } else if (RAW == '\'') { 3485 NEXT; 3486 stop = '\''; 3487 } else { 3488 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3489 return(NULL); 3490 } 3491 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3492 if (buf == NULL) { 3493 xmlErrMemory(ctxt, NULL); 3494 return(NULL); 3495 } 3496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3497 cur = CUR; 3498 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3499 if (len + 1 >= size) { 3500 size *= 2; 3501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3502 if (buf == NULL) { 3503 xmlErrMemory(ctxt, NULL); 3504 return(NULL); 3505 } 3506 } 3507 buf[len++] = cur; 3508 count++; 3509 if (count > 50) { 3510 GROW; 3511 count = 0; 3512 } 3513 NEXT; 3514 cur = CUR; 3515 if (cur == 0) { 3516 GROW; 3517 SHRINK; 3518 cur = CUR; 3519 } 3520 } 3521 buf[len] = 0; 3522 if (cur != stop) { 3523 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3524 } else { 3525 NEXT; 3526 } 3527 ctxt->instate = oldstate; 3528 return(buf); 3529} 3530 3531void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3532/** 3533 * xmlParseCharData: 3534 * @ctxt: an XML parser context 3535 * @cdata: int indicating whether we are within a CDATA section 3536 * 3537 * parse a CharData section. 3538 * if we are within a CDATA section ']]>' marks an end of section. 3539 * 3540 * The right angle bracket (>) may be represented using the string ">", 3541 * and must, for compatibility, be escaped using ">" or a character 3542 * reference when it appears in the string "]]>" in content, when that 3543 * string is not marking the end of a CDATA section. 3544 * 3545 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3546 */ 3547 3548void 3549xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3550 const xmlChar *in; 3551 int nbchar = 0; 3552 int line = ctxt->input->line; 3553 int col = ctxt->input->col; 3554 3555 SHRINK; 3556 GROW; 3557 /* 3558 * Accelerated common case where input don't need to be 3559 * modified before passing it to the handler. 3560 */ 3561 if (!cdata) { 3562 in = ctxt->input->cur; 3563 do { 3564get_more: 3565 while (((*in >= 0x20) && (*in != '<') && (*in != ']') && 3566 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) 3567 in++; 3568 if (*in == 0xA) { 3569 ctxt->input->line++; 3570 in++; 3571 while (*in == 0xA) { 3572 ctxt->input->line++; 3573 in++; 3574 } 3575 goto get_more; 3576 } 3577 if (*in == ']') { 3578 if ((in[1] == ']') && (in[2] == '>')) { 3579 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3580 ctxt->input->cur = in; 3581 return; 3582 } 3583 in++; 3584 goto get_more; 3585 } 3586 nbchar = in - ctxt->input->cur; 3587 if (nbchar > 0) { 3588 if ((ctxt->sax->ignorableWhitespace != 3589 ctxt->sax->characters) && 3590 (IS_BLANK_CH(*ctxt->input->cur))) { 3591 const xmlChar *tmp = ctxt->input->cur; 3592 ctxt->input->cur = in; 3593 3594 if (areBlanks(ctxt, tmp, nbchar)) { 3595 ctxt->sax->ignorableWhitespace(ctxt->userData, 3596 tmp, nbchar); 3597 } else if (ctxt->sax->characters != NULL) 3598 ctxt->sax->characters(ctxt->userData, 3599 tmp, nbchar); 3600 line = ctxt->input->line; 3601 col = ctxt->input->col; 3602 } else { 3603 if (ctxt->sax->characters != NULL) 3604 ctxt->sax->characters(ctxt->userData, 3605 ctxt->input->cur, nbchar); 3606 line = ctxt->input->line; 3607 col = ctxt->input->col; 3608 } 3609 } 3610 ctxt->input->cur = in; 3611 if (*in == 0xD) { 3612 in++; 3613 if (*in == 0xA) { 3614 ctxt->input->cur = in; 3615 in++; 3616 ctxt->input->line++; 3617 continue; /* while */ 3618 } 3619 in--; 3620 } 3621 if (*in == '<') { 3622 return; 3623 } 3624 if (*in == '&') { 3625 return; 3626 } 3627 SHRINK; 3628 GROW; 3629 in = ctxt->input->cur; 3630 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3631 nbchar = 0; 3632 } 3633 ctxt->input->line = line; 3634 ctxt->input->col = col; 3635 xmlParseCharDataComplex(ctxt, cdata); 3636} 3637 3638/** 3639 * xmlParseCharDataComplex: 3640 * @ctxt: an XML parser context 3641 * @cdata: int indicating whether we are within a CDATA section 3642 * 3643 * parse a CharData section.this is the fallback function 3644 * of xmlParseCharData() when the parsing requires handling 3645 * of non-ASCII characters. 3646 */ 3647void 3648xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3649 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3650 int nbchar = 0; 3651 int cur, l; 3652 int count = 0; 3653 3654 SHRINK; 3655 GROW; 3656 cur = CUR_CHAR(l); 3657 while ((cur != '<') && /* checked */ 3658 (cur != '&') && 3659 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3660 if ((cur == ']') && (NXT(1) == ']') && 3661 (NXT(2) == '>')) { 3662 if (cdata) break; 3663 else { 3664 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3665 } 3666 } 3667 COPY_BUF(l,buf,nbchar,cur); 3668 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3669 buf[nbchar] = 0; 3670 3671 /* 3672 * OK the segment is to be consumed as chars. 3673 */ 3674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3675 if (areBlanks(ctxt, buf, nbchar)) { 3676 if (ctxt->sax->ignorableWhitespace != NULL) 3677 ctxt->sax->ignorableWhitespace(ctxt->userData, 3678 buf, nbchar); 3679 } else { 3680 if (ctxt->sax->characters != NULL) 3681 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3682 } 3683 } 3684 nbchar = 0; 3685 } 3686 count++; 3687 if (count > 50) { 3688 GROW; 3689 count = 0; 3690 } 3691 NEXTL(l); 3692 cur = CUR_CHAR(l); 3693 } 3694 if (nbchar != 0) { 3695 buf[nbchar] = 0; 3696 /* 3697 * OK the segment is to be consumed as chars. 3698 */ 3699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3700 if (areBlanks(ctxt, buf, nbchar)) { 3701 if (ctxt->sax->ignorableWhitespace != NULL) 3702 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3703 } else { 3704 if (ctxt->sax->characters != NULL) 3705 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3706 } 3707 } 3708 } 3709} 3710 3711/** 3712 * xmlParseExternalID: 3713 * @ctxt: an XML parser context 3714 * @publicID: a xmlChar** receiving PubidLiteral 3715 * @strict: indicate whether we should restrict parsing to only 3716 * production [75], see NOTE below 3717 * 3718 * Parse an External ID or a Public ID 3719 * 3720 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3721 * 'PUBLIC' S PubidLiteral S SystemLiteral 3722 * 3723 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3724 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3725 * 3726 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3727 * 3728 * Returns the function returns SystemLiteral and in the second 3729 * case publicID receives PubidLiteral, is strict is off 3730 * it is possible to return NULL and have publicID set. 3731 */ 3732 3733xmlChar * 3734xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3735 xmlChar *URI = NULL; 3736 3737 SHRINK; 3738 3739 *publicID = NULL; 3740 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3741 SKIP(6); 3742 if (!IS_BLANK_CH(CUR)) { 3743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3744 "Space required after 'SYSTEM'\n"); 3745 } 3746 SKIP_BLANKS; 3747 URI = xmlParseSystemLiteral(ctxt); 3748 if (URI == NULL) { 3749 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3750 } 3751 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3752 SKIP(6); 3753 if (!IS_BLANK_CH(CUR)) { 3754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3755 "Space required after 'PUBLIC'\n"); 3756 } 3757 SKIP_BLANKS; 3758 *publicID = xmlParsePubidLiteral(ctxt); 3759 if (*publicID == NULL) { 3760 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3761 } 3762 if (strict) { 3763 /* 3764 * We don't handle [83] so "S SystemLiteral" is required. 3765 */ 3766 if (!IS_BLANK_CH(CUR)) { 3767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3768 "Space required after the Public Identifier\n"); 3769 } 3770 } else { 3771 /* 3772 * We handle [83] so we return immediately, if 3773 * "S SystemLiteral" is not detected. From a purely parsing 3774 * point of view that's a nice mess. 3775 */ 3776 const xmlChar *ptr; 3777 GROW; 3778 3779 ptr = CUR_PTR; 3780 if (!IS_BLANK_CH(*ptr)) return(NULL); 3781 3782 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3783 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3784 } 3785 SKIP_BLANKS; 3786 URI = xmlParseSystemLiteral(ctxt); 3787 if (URI == NULL) { 3788 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3789 } 3790 } 3791 return(URI); 3792} 3793 3794/** 3795 * xmlParseComment: 3796 * @ctxt: an XML parser context 3797 * 3798 * Skip an XML (SGML) comment <!-- .... --> 3799 * The spec says that "For compatibility, the string "--" (double-hyphen) 3800 * must not occur within comments. " 3801 * 3802 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3803 */ 3804void 3805xmlParseComment(xmlParserCtxtPtr ctxt) { 3806 xmlChar *buf = NULL; 3807 int len; 3808 int size = XML_PARSER_BUFFER_SIZE; 3809 int q, ql; 3810 int r, rl; 3811 int cur, l; 3812 xmlParserInputState state; 3813 xmlParserInputPtr input = ctxt->input; 3814 int count = 0; 3815 3816 /* 3817 * Check that there is a comment right here. 3818 */ 3819 if ((RAW != '<') || (NXT(1) != '!') || 3820 (NXT(2) != '-') || (NXT(3) != '-')) return; 3821 3822 state = ctxt->instate; 3823 ctxt->instate = XML_PARSER_COMMENT; 3824 SHRINK; 3825 SKIP(4); 3826 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3827 if (buf == NULL) { 3828 xmlErrMemory(ctxt, NULL); 3829 ctxt->instate = state; 3830 return; 3831 } 3832 q = CUR_CHAR(ql); 3833 if (q == 0) 3834 goto not_terminated; 3835 NEXTL(ql); 3836 r = CUR_CHAR(rl); 3837 if (r == 0) 3838 goto not_terminated; 3839 NEXTL(rl); 3840 cur = CUR_CHAR(l); 3841 if (cur == 0) 3842 goto not_terminated; 3843 len = 0; 3844 while (IS_CHAR(cur) && /* checked */ 3845 ((cur != '>') || 3846 (r != '-') || (q != '-'))) { 3847 if ((r == '-') && (q == '-')) { 3848 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 3849 } 3850 if (len + 5 >= size) { 3851 size *= 2; 3852 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3853 if (buf == NULL) { 3854 xmlErrMemory(ctxt, NULL); 3855 ctxt->instate = state; 3856 return; 3857 } 3858 } 3859 COPY_BUF(ql,buf,len,q); 3860 q = r; 3861 ql = rl; 3862 r = cur; 3863 rl = l; 3864 3865 count++; 3866 if (count > 50) { 3867 GROW; 3868 count = 0; 3869 } 3870 NEXTL(l); 3871 cur = CUR_CHAR(l); 3872 if (cur == 0) { 3873 SHRINK; 3874 GROW; 3875 cur = CUR_CHAR(l); 3876 } 3877 } 3878 buf[len] = 0; 3879 if (!IS_CHAR(cur)) { 3880 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3881 "Comment not terminated \n<!--%.50s\n", buf); 3882 xmlFree(buf); 3883 } else { 3884 if (input != ctxt->input) { 3885 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3886 "Comment doesn't start and stop in the same entity\n"); 3887 } 3888 NEXT; 3889 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3890 (!ctxt->disableSAX)) 3891 ctxt->sax->comment(ctxt->userData, buf); 3892 xmlFree(buf); 3893 } 3894 ctxt->instate = state; 3895 return; 3896not_terminated: 3897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3898 "Comment not terminated\n", NULL); 3899 xmlFree(buf); 3900} 3901 3902/** 3903 * xmlParsePITarget: 3904 * @ctxt: an XML parser context 3905 * 3906 * parse the name of a PI 3907 * 3908 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3909 * 3910 * Returns the PITarget name or NULL 3911 */ 3912 3913const xmlChar * 3914xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3915 const xmlChar *name; 3916 3917 name = xmlParseName(ctxt); 3918 if ((name != NULL) && 3919 ((name[0] == 'x') || (name[0] == 'X')) && 3920 ((name[1] == 'm') || (name[1] == 'M')) && 3921 ((name[2] == 'l') || (name[2] == 'L'))) { 3922 int i; 3923 if ((name[0] == 'x') && (name[1] == 'm') && 3924 (name[2] == 'l') && (name[3] == 0)) { 3925 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3926 "XML declaration allowed only at the start of the document\n"); 3927 return(name); 3928 } else if (name[3] == 0) { 3929 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 3930 return(name); 3931 } 3932 for (i = 0;;i++) { 3933 if (xmlW3CPIs[i] == NULL) break; 3934 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3935 return(name); 3936 } 3937 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3938 "xmlParsePITarget: invalid name prefix 'xml'\n", 3939 NULL, NULL); 3940 } 3941 return(name); 3942} 3943 3944#ifdef LIBXML_CATALOG_ENABLED 3945/** 3946 * xmlParseCatalogPI: 3947 * @ctxt: an XML parser context 3948 * @catalog: the PI value string 3949 * 3950 * parse an XML Catalog Processing Instruction. 3951 * 3952 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3953 * 3954 * Occurs only if allowed by the user and if happening in the Misc 3955 * part of the document before any doctype informations 3956 * This will add the given catalog to the parsing context in order 3957 * to be used if there is a resolution need further down in the document 3958 */ 3959 3960static void 3961xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3962 xmlChar *URL = NULL; 3963 const xmlChar *tmp, *base; 3964 xmlChar marker; 3965 3966 tmp = catalog; 3967 while (IS_BLANK_CH(*tmp)) tmp++; 3968 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3969 goto error; 3970 tmp += 7; 3971 while (IS_BLANK_CH(*tmp)) tmp++; 3972 if (*tmp != '=') { 3973 return; 3974 } 3975 tmp++; 3976 while (IS_BLANK_CH(*tmp)) tmp++; 3977 marker = *tmp; 3978 if ((marker != '\'') && (marker != '"')) 3979 goto error; 3980 tmp++; 3981 base = tmp; 3982 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3983 if (*tmp == 0) 3984 goto error; 3985 URL = xmlStrndup(base, tmp - base); 3986 tmp++; 3987 while (IS_BLANK_CH(*tmp)) tmp++; 3988 if (*tmp != 0) 3989 goto error; 3990 3991 if (URL != NULL) { 3992 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3993 xmlFree(URL); 3994 } 3995 return; 3996 3997error: 3998 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 3999 "Catalog PI syntax error: %s\n", 4000 catalog, NULL); 4001 if (URL != NULL) 4002 xmlFree(URL); 4003} 4004#endif 4005 4006/** 4007 * xmlParsePI: 4008 * @ctxt: an XML parser context 4009 * 4010 * parse an XML Processing Instruction. 4011 * 4012 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4013 * 4014 * The processing is transfered to SAX once parsed. 4015 */ 4016 4017void 4018xmlParsePI(xmlParserCtxtPtr ctxt) { 4019 xmlChar *buf = NULL; 4020 int len = 0; 4021 int size = XML_PARSER_BUFFER_SIZE; 4022 int cur, l; 4023 const xmlChar *target; 4024 xmlParserInputState state; 4025 int count = 0; 4026 4027 if ((RAW == '<') && (NXT(1) == '?')) { 4028 xmlParserInputPtr input = ctxt->input; 4029 state = ctxt->instate; 4030 ctxt->instate = XML_PARSER_PI; 4031 /* 4032 * this is a Processing Instruction. 4033 */ 4034 SKIP(2); 4035 SHRINK; 4036 4037 /* 4038 * Parse the target name and check for special support like 4039 * namespace. 4040 */ 4041 target = xmlParsePITarget(ctxt); 4042 if (target != NULL) { 4043 if ((RAW == '?') && (NXT(1) == '>')) { 4044 if (input != ctxt->input) { 4045 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4046 "PI declaration doesn't start and stop in the same entity\n"); 4047 } 4048 SKIP(2); 4049 4050 /* 4051 * SAX: PI detected. 4052 */ 4053 if ((ctxt->sax) && (!ctxt->disableSAX) && 4054 (ctxt->sax->processingInstruction != NULL)) 4055 ctxt->sax->processingInstruction(ctxt->userData, 4056 target, NULL); 4057 ctxt->instate = state; 4058 return; 4059 } 4060 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4061 if (buf == NULL) { 4062 xmlErrMemory(ctxt, NULL); 4063 ctxt->instate = state; 4064 return; 4065 } 4066 cur = CUR; 4067 if (!IS_BLANK(cur)) { 4068 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4069 "ParsePI: PI %s space expected\n", target); 4070 } 4071 SKIP_BLANKS; 4072 cur = CUR_CHAR(l); 4073 while (IS_CHAR(cur) && /* checked */ 4074 ((cur != '?') || (NXT(1) != '>'))) { 4075 if (len + 5 >= size) { 4076 size *= 2; 4077 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4078 if (buf == NULL) { 4079 xmlErrMemory(ctxt, NULL); 4080 ctxt->instate = state; 4081 return; 4082 } 4083 } 4084 count++; 4085 if (count > 50) { 4086 GROW; 4087 count = 0; 4088 } 4089 COPY_BUF(l,buf,len,cur); 4090 NEXTL(l); 4091 cur = CUR_CHAR(l); 4092 if (cur == 0) { 4093 SHRINK; 4094 GROW; 4095 cur = CUR_CHAR(l); 4096 } 4097 } 4098 buf[len] = 0; 4099 if (cur != '?') { 4100 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4101 "ParsePI: PI %s never end ...\n", target); 4102 } else { 4103 if (input != ctxt->input) { 4104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4105 "PI declaration doesn't start and stop in the same entity\n"); 4106 } 4107 SKIP(2); 4108 4109#ifdef LIBXML_CATALOG_ENABLED 4110 if (((state == XML_PARSER_MISC) || 4111 (state == XML_PARSER_START)) && 4112 (xmlStrEqual(target, XML_CATALOG_PI))) { 4113 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4114 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4115 (allow == XML_CATA_ALLOW_ALL)) 4116 xmlParseCatalogPI(ctxt, buf); 4117 } 4118#endif 4119 4120 4121 /* 4122 * SAX: PI detected. 4123 */ 4124 if ((ctxt->sax) && (!ctxt->disableSAX) && 4125 (ctxt->sax->processingInstruction != NULL)) 4126 ctxt->sax->processingInstruction(ctxt->userData, 4127 target, buf); 4128 } 4129 xmlFree(buf); 4130 } else { 4131 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4132 } 4133 ctxt->instate = state; 4134 } 4135} 4136 4137/** 4138 * xmlParseNotationDecl: 4139 * @ctxt: an XML parser context 4140 * 4141 * parse a notation declaration 4142 * 4143 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4144 * 4145 * Hence there is actually 3 choices: 4146 * 'PUBLIC' S PubidLiteral 4147 * 'PUBLIC' S PubidLiteral S SystemLiteral 4148 * and 'SYSTEM' S SystemLiteral 4149 * 4150 * See the NOTE on xmlParseExternalID(). 4151 */ 4152 4153void 4154xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4155 const xmlChar *name; 4156 xmlChar *Pubid; 4157 xmlChar *Systemid; 4158 4159 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4160 xmlParserInputPtr input = ctxt->input; 4161 SHRINK; 4162 SKIP(10); 4163 if (!IS_BLANK_CH(CUR)) { 4164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4165 "Space required after '<!NOTATION'\n"); 4166 return; 4167 } 4168 SKIP_BLANKS; 4169 4170 name = xmlParseName(ctxt); 4171 if (name == NULL) { 4172 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4173 return; 4174 } 4175 if (!IS_BLANK_CH(CUR)) { 4176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4177 "Space required after the NOTATION name'\n"); 4178 return; 4179 } 4180 SKIP_BLANKS; 4181 4182 /* 4183 * Parse the IDs. 4184 */ 4185 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4186 SKIP_BLANKS; 4187 4188 if (RAW == '>') { 4189 if (input != ctxt->input) { 4190 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4191 "Notation declaration doesn't start and stop in the same entity\n"); 4192 } 4193 NEXT; 4194 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4195 (ctxt->sax->notationDecl != NULL)) 4196 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4197 } else { 4198 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4199 } 4200 if (Systemid != NULL) xmlFree(Systemid); 4201 if (Pubid != NULL) xmlFree(Pubid); 4202 } 4203} 4204 4205/** 4206 * xmlParseEntityDecl: 4207 * @ctxt: an XML parser context 4208 * 4209 * parse <!ENTITY declarations 4210 * 4211 * [70] EntityDecl ::= GEDecl | PEDecl 4212 * 4213 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4214 * 4215 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4216 * 4217 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4218 * 4219 * [74] PEDef ::= EntityValue | ExternalID 4220 * 4221 * [76] NDataDecl ::= S 'NDATA' S Name 4222 * 4223 * [ VC: Notation Declared ] 4224 * The Name must match the declared name of a notation. 4225 */ 4226 4227void 4228xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4229 const xmlChar *name = NULL; 4230 xmlChar *value = NULL; 4231 xmlChar *URI = NULL, *literal = NULL; 4232 const xmlChar *ndata = NULL; 4233 int isParameter = 0; 4234 xmlChar *orig = NULL; 4235 int skipped; 4236 4237 GROW; 4238 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 4239 xmlParserInputPtr input = ctxt->input; 4240 SHRINK; 4241 SKIP(8); 4242 skipped = SKIP_BLANKS; 4243 if (skipped == 0) { 4244 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4245 "Space required after '<!ENTITY'\n"); 4246 } 4247 4248 if (RAW == '%') { 4249 NEXT; 4250 skipped = SKIP_BLANKS; 4251 if (skipped == 0) { 4252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4253 "Space required after '%'\n"); 4254 } 4255 isParameter = 1; 4256 } 4257 4258 name = xmlParseName(ctxt); 4259 if (name == NULL) { 4260 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4261 "xmlParseEntityDecl: no name\n"); 4262 return; 4263 } 4264 skipped = SKIP_BLANKS; 4265 if (skipped == 0) { 4266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4267 "Space required after the entity name\n"); 4268 } 4269 4270 ctxt->instate = XML_PARSER_ENTITY_DECL; 4271 /* 4272 * handle the various case of definitions... 4273 */ 4274 if (isParameter) { 4275 if ((RAW == '"') || (RAW == '\'')) { 4276 value = xmlParseEntityValue(ctxt, &orig); 4277 if (value) { 4278 if ((ctxt->sax != NULL) && 4279 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4280 ctxt->sax->entityDecl(ctxt->userData, name, 4281 XML_INTERNAL_PARAMETER_ENTITY, 4282 NULL, NULL, value); 4283 } 4284 } else { 4285 URI = xmlParseExternalID(ctxt, &literal, 1); 4286 if ((URI == NULL) && (literal == NULL)) { 4287 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4288 } 4289 if (URI) { 4290 xmlURIPtr uri; 4291 4292 uri = xmlParseURI((const char *) URI); 4293 if (uri == NULL) { 4294 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4295 "Invalid URI: %s\n", URI); 4296 /* 4297 * This really ought to be a well formedness error 4298 * but the XML Core WG decided otherwise c.f. issue 4299 * E26 of the XML erratas. 4300 */ 4301 } else { 4302 if (uri->fragment != NULL) { 4303 /* 4304 * Okay this is foolish to block those but not 4305 * invalid URIs. 4306 */ 4307 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4308 } else { 4309 if ((ctxt->sax != NULL) && 4310 (!ctxt->disableSAX) && 4311 (ctxt->sax->entityDecl != NULL)) 4312 ctxt->sax->entityDecl(ctxt->userData, name, 4313 XML_EXTERNAL_PARAMETER_ENTITY, 4314 literal, URI, NULL); 4315 } 4316 xmlFreeURI(uri); 4317 } 4318 } 4319 } 4320 } else { 4321 if ((RAW == '"') || (RAW == '\'')) { 4322 value = xmlParseEntityValue(ctxt, &orig); 4323 if ((ctxt->sax != NULL) && 4324 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4325 ctxt->sax->entityDecl(ctxt->userData, name, 4326 XML_INTERNAL_GENERAL_ENTITY, 4327 NULL, NULL, value); 4328 /* 4329 * For expat compatibility in SAX mode. 4330 */ 4331 if ((ctxt->myDoc == NULL) || 4332 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4333 if (ctxt->myDoc == NULL) { 4334 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4335 } 4336 if (ctxt->myDoc->intSubset == NULL) 4337 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4338 BAD_CAST "fake", NULL, NULL); 4339 4340 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4341 NULL, NULL, value); 4342 } 4343 } else { 4344 URI = xmlParseExternalID(ctxt, &literal, 1); 4345 if ((URI == NULL) && (literal == NULL)) { 4346 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4347 } 4348 if (URI) { 4349 xmlURIPtr uri; 4350 4351 uri = xmlParseURI((const char *)URI); 4352 if (uri == NULL) { 4353 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4354 "Invalid URI: %s\n", URI); 4355 /* 4356 * This really ought to be a well formedness error 4357 * but the XML Core WG decided otherwise c.f. issue 4358 * E26 of the XML erratas. 4359 */ 4360 } else { 4361 if (uri->fragment != NULL) { 4362 /* 4363 * Okay this is foolish to block those but not 4364 * invalid URIs. 4365 */ 4366 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4367 } 4368 xmlFreeURI(uri); 4369 } 4370 } 4371 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 4372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4373 "Space required before 'NDATA'\n"); 4374 } 4375 SKIP_BLANKS; 4376 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 4377 SKIP(5); 4378 if (!IS_BLANK_CH(CUR)) { 4379 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4380 "Space required after 'NDATA'\n"); 4381 } 4382 SKIP_BLANKS; 4383 ndata = xmlParseName(ctxt); 4384 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4385 (ctxt->sax->unparsedEntityDecl != NULL)) 4386 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4387 literal, URI, ndata); 4388 } else { 4389 if ((ctxt->sax != NULL) && 4390 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4391 ctxt->sax->entityDecl(ctxt->userData, name, 4392 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4393 literal, URI, NULL); 4394 /* 4395 * For expat compatibility in SAX mode. 4396 * assuming the entity repalcement was asked for 4397 */ 4398 if ((ctxt->replaceEntities != 0) && 4399 ((ctxt->myDoc == NULL) || 4400 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4401 if (ctxt->myDoc == NULL) { 4402 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4403 } 4404 4405 if (ctxt->myDoc->intSubset == NULL) 4406 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4407 BAD_CAST "fake", NULL, NULL); 4408 xmlSAX2EntityDecl(ctxt, name, 4409 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4410 literal, URI, NULL); 4411 } 4412 } 4413 } 4414 } 4415 SKIP_BLANKS; 4416 if (RAW != '>') { 4417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4418 "xmlParseEntityDecl: entity %s not terminated\n", name); 4419 } else { 4420 if (input != ctxt->input) { 4421 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4422 "Entity declaration doesn't start and stop in the same entity\n"); 4423 } 4424 NEXT; 4425 } 4426 if (orig != NULL) { 4427 /* 4428 * Ugly mechanism to save the raw entity value. 4429 */ 4430 xmlEntityPtr cur = NULL; 4431 4432 if (isParameter) { 4433 if ((ctxt->sax != NULL) && 4434 (ctxt->sax->getParameterEntity != NULL)) 4435 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4436 } else { 4437 if ((ctxt->sax != NULL) && 4438 (ctxt->sax->getEntity != NULL)) 4439 cur = ctxt->sax->getEntity(ctxt->userData, name); 4440 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4441 cur = xmlSAX2GetEntity(ctxt, name); 4442 } 4443 } 4444 if (cur != NULL) { 4445 if (cur->orig != NULL) 4446 xmlFree(orig); 4447 else 4448 cur->orig = orig; 4449 } else 4450 xmlFree(orig); 4451 } 4452 if (value != NULL) xmlFree(value); 4453 if (URI != NULL) xmlFree(URI); 4454 if (literal != NULL) xmlFree(literal); 4455 } 4456} 4457 4458/** 4459 * xmlParseDefaultDecl: 4460 * @ctxt: an XML parser context 4461 * @value: Receive a possible fixed default value for the attribute 4462 * 4463 * Parse an attribute default declaration 4464 * 4465 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4466 * 4467 * [ VC: Required Attribute ] 4468 * if the default declaration is the keyword #REQUIRED, then the 4469 * attribute must be specified for all elements of the type in the 4470 * attribute-list declaration. 4471 * 4472 * [ VC: Attribute Default Legal ] 4473 * The declared default value must meet the lexical constraints of 4474 * the declared attribute type c.f. xmlValidateAttributeDecl() 4475 * 4476 * [ VC: Fixed Attribute Default ] 4477 * if an attribute has a default value declared with the #FIXED 4478 * keyword, instances of that attribute must match the default value. 4479 * 4480 * [ WFC: No < in Attribute Values ] 4481 * handled in xmlParseAttValue() 4482 * 4483 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4484 * or XML_ATTRIBUTE_FIXED. 4485 */ 4486 4487int 4488xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4489 int val; 4490 xmlChar *ret; 4491 4492 *value = NULL; 4493 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4494 SKIP(9); 4495 return(XML_ATTRIBUTE_REQUIRED); 4496 } 4497 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4498 SKIP(8); 4499 return(XML_ATTRIBUTE_IMPLIED); 4500 } 4501 val = XML_ATTRIBUTE_NONE; 4502 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4503 SKIP(6); 4504 val = XML_ATTRIBUTE_FIXED; 4505 if (!IS_BLANK_CH(CUR)) { 4506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4507 "Space required after '#FIXED'\n"); 4508 } 4509 SKIP_BLANKS; 4510 } 4511 ret = xmlParseAttValue(ctxt); 4512 ctxt->instate = XML_PARSER_DTD; 4513 if (ret == NULL) { 4514 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4515 "Attribute default value declaration error\n"); 4516 } else 4517 *value = ret; 4518 return(val); 4519} 4520 4521/** 4522 * xmlParseNotationType: 4523 * @ctxt: an XML parser context 4524 * 4525 * parse an Notation attribute type. 4526 * 4527 * Note: the leading 'NOTATION' S part has already being parsed... 4528 * 4529 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4530 * 4531 * [ VC: Notation Attributes ] 4532 * Values of this type must match one of the notation names included 4533 * in the declaration; all notation names in the declaration must be declared. 4534 * 4535 * Returns: the notation attribute tree built while parsing 4536 */ 4537 4538xmlEnumerationPtr 4539xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4540 const xmlChar *name; 4541 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4542 4543 if (RAW != '(') { 4544 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4545 return(NULL); 4546 } 4547 SHRINK; 4548 do { 4549 NEXT; 4550 SKIP_BLANKS; 4551 name = xmlParseName(ctxt); 4552 if (name == NULL) { 4553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4554 "Name expected in NOTATION declaration\n"); 4555 return(ret); 4556 } 4557 cur = xmlCreateEnumeration(name); 4558 if (cur == NULL) return(ret); 4559 if (last == NULL) ret = last = cur; 4560 else { 4561 last->next = cur; 4562 last = cur; 4563 } 4564 SKIP_BLANKS; 4565 } while (RAW == '|'); 4566 if (RAW != ')') { 4567 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4568 if ((last != NULL) && (last != ret)) 4569 xmlFreeEnumeration(last); 4570 return(ret); 4571 } 4572 NEXT; 4573 return(ret); 4574} 4575 4576/** 4577 * xmlParseEnumerationType: 4578 * @ctxt: an XML parser context 4579 * 4580 * parse an Enumeration attribute type. 4581 * 4582 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4583 * 4584 * [ VC: Enumeration ] 4585 * Values of this type must match one of the Nmtoken tokens in 4586 * the declaration 4587 * 4588 * Returns: the enumeration attribute tree built while parsing 4589 */ 4590 4591xmlEnumerationPtr 4592xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4593 xmlChar *name; 4594 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4595 4596 if (RAW != '(') { 4597 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4598 return(NULL); 4599 } 4600 SHRINK; 4601 do { 4602 NEXT; 4603 SKIP_BLANKS; 4604 name = xmlParseNmtoken(ctxt); 4605 if (name == NULL) { 4606 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4607 return(ret); 4608 } 4609 cur = xmlCreateEnumeration(name); 4610 xmlFree(name); 4611 if (cur == NULL) return(ret); 4612 if (last == NULL) ret = last = cur; 4613 else { 4614 last->next = cur; 4615 last = cur; 4616 } 4617 SKIP_BLANKS; 4618 } while (RAW == '|'); 4619 if (RAW != ')') { 4620 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4621 return(ret); 4622 } 4623 NEXT; 4624 return(ret); 4625} 4626 4627/** 4628 * xmlParseEnumeratedType: 4629 * @ctxt: an XML parser context 4630 * @tree: the enumeration tree built while parsing 4631 * 4632 * parse an Enumerated attribute type. 4633 * 4634 * [57] EnumeratedType ::= NotationType | Enumeration 4635 * 4636 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4637 * 4638 * 4639 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4640 */ 4641 4642int 4643xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4644 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4645 SKIP(8); 4646 if (!IS_BLANK_CH(CUR)) { 4647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4648 "Space required after 'NOTATION'\n"); 4649 return(0); 4650 } 4651 SKIP_BLANKS; 4652 *tree = xmlParseNotationType(ctxt); 4653 if (*tree == NULL) return(0); 4654 return(XML_ATTRIBUTE_NOTATION); 4655 } 4656 *tree = xmlParseEnumerationType(ctxt); 4657 if (*tree == NULL) return(0); 4658 return(XML_ATTRIBUTE_ENUMERATION); 4659} 4660 4661/** 4662 * xmlParseAttributeType: 4663 * @ctxt: an XML parser context 4664 * @tree: the enumeration tree built while parsing 4665 * 4666 * parse the Attribute list def for an element 4667 * 4668 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4669 * 4670 * [55] StringType ::= 'CDATA' 4671 * 4672 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4673 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4674 * 4675 * Validity constraints for attribute values syntax are checked in 4676 * xmlValidateAttributeValue() 4677 * 4678 * [ VC: ID ] 4679 * Values of type ID must match the Name production. A name must not 4680 * appear more than once in an XML document as a value of this type; 4681 * i.e., ID values must uniquely identify the elements which bear them. 4682 * 4683 * [ VC: One ID per Element Type ] 4684 * No element type may have more than one ID attribute specified. 4685 * 4686 * [ VC: ID Attribute Default ] 4687 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4688 * 4689 * [ VC: IDREF ] 4690 * Values of type IDREF must match the Name production, and values 4691 * of type IDREFS must match Names; each IDREF Name must match the value 4692 * of an ID attribute on some element in the XML document; i.e. IDREF 4693 * values must match the value of some ID attribute. 4694 * 4695 * [ VC: Entity Name ] 4696 * Values of type ENTITY must match the Name production, values 4697 * of type ENTITIES must match Names; each Entity Name must match the 4698 * name of an unparsed entity declared in the DTD. 4699 * 4700 * [ VC: Name Token ] 4701 * Values of type NMTOKEN must match the Nmtoken production; values 4702 * of type NMTOKENS must match Nmtokens. 4703 * 4704 * Returns the attribute type 4705 */ 4706int 4707xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4708 SHRINK; 4709 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 4710 SKIP(5); 4711 return(XML_ATTRIBUTE_CDATA); 4712 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 4713 SKIP(6); 4714 return(XML_ATTRIBUTE_IDREFS); 4715 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 4716 SKIP(5); 4717 return(XML_ATTRIBUTE_IDREF); 4718 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4719 SKIP(2); 4720 return(XML_ATTRIBUTE_ID); 4721 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 4722 SKIP(6); 4723 return(XML_ATTRIBUTE_ENTITY); 4724 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 4725 SKIP(8); 4726 return(XML_ATTRIBUTE_ENTITIES); 4727 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 4728 SKIP(8); 4729 return(XML_ATTRIBUTE_NMTOKENS); 4730 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 4731 SKIP(7); 4732 return(XML_ATTRIBUTE_NMTOKEN); 4733 } 4734 return(xmlParseEnumeratedType(ctxt, tree)); 4735} 4736 4737/** 4738 * xmlParseAttributeListDecl: 4739 * @ctxt: an XML parser context 4740 * 4741 * : parse the Attribute list def for an element 4742 * 4743 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4744 * 4745 * [53] AttDef ::= S Name S AttType S DefaultDecl 4746 * 4747 */ 4748void 4749xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4750 const xmlChar *elemName; 4751 const xmlChar *attrName; 4752 xmlEnumerationPtr tree; 4753 4754 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 4755 xmlParserInputPtr input = ctxt->input; 4756 4757 SKIP(9); 4758 if (!IS_BLANK_CH(CUR)) { 4759 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4760 "Space required after '<!ATTLIST'\n"); 4761 } 4762 SKIP_BLANKS; 4763 elemName = xmlParseName(ctxt); 4764 if (elemName == NULL) { 4765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4766 "ATTLIST: no name for Element\n"); 4767 return; 4768 } 4769 SKIP_BLANKS; 4770 GROW; 4771 while (RAW != '>') { 4772 const xmlChar *check = CUR_PTR; 4773 int type; 4774 int def; 4775 xmlChar *defaultValue = NULL; 4776 4777 GROW; 4778 tree = NULL; 4779 attrName = xmlParseName(ctxt); 4780 if (attrName == NULL) { 4781 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4782 "ATTLIST: no name for Attribute\n"); 4783 break; 4784 } 4785 GROW; 4786 if (!IS_BLANK_CH(CUR)) { 4787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4788 "Space required after the attribute name\n"); 4789 if (defaultValue != NULL) 4790 xmlFree(defaultValue); 4791 break; 4792 } 4793 SKIP_BLANKS; 4794 4795 type = xmlParseAttributeType(ctxt, &tree); 4796 if (type <= 0) { 4797 if (defaultValue != NULL) 4798 xmlFree(defaultValue); 4799 break; 4800 } 4801 4802 GROW; 4803 if (!IS_BLANK_CH(CUR)) { 4804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4805 "Space required after the attribute type\n"); 4806 if (defaultValue != NULL) 4807 xmlFree(defaultValue); 4808 if (tree != NULL) 4809 xmlFreeEnumeration(tree); 4810 break; 4811 } 4812 SKIP_BLANKS; 4813 4814 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4815 if (def <= 0) { 4816 if (defaultValue != NULL) 4817 xmlFree(defaultValue); 4818 if (tree != NULL) 4819 xmlFreeEnumeration(tree); 4820 break; 4821 } 4822 4823 GROW; 4824 if (RAW != '>') { 4825 if (!IS_BLANK_CH(CUR)) { 4826 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4827 "Space required after the attribute default value\n"); 4828 if (defaultValue != NULL) 4829 xmlFree(defaultValue); 4830 if (tree != NULL) 4831 xmlFreeEnumeration(tree); 4832 break; 4833 } 4834 SKIP_BLANKS; 4835 } 4836 if (check == CUR_PTR) { 4837 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 4838 "in xmlParseAttributeListDecl\n"); 4839 if (defaultValue != NULL) 4840 xmlFree(defaultValue); 4841 if (tree != NULL) 4842 xmlFreeEnumeration(tree); 4843 break; 4844 } 4845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4846 (ctxt->sax->attributeDecl != NULL)) 4847 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4848 type, def, defaultValue, tree); 4849 else if (tree != NULL) 4850 xmlFreeEnumeration(tree); 4851 4852 if ((ctxt->sax2) && (defaultValue != NULL) && 4853 (def != XML_ATTRIBUTE_IMPLIED) && 4854 (def != XML_ATTRIBUTE_REQUIRED)) { 4855 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 4856 } 4857 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { 4858 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 4859 } 4860 if (defaultValue != NULL) 4861 xmlFree(defaultValue); 4862 GROW; 4863 } 4864 if (RAW == '>') { 4865 if (input != ctxt->input) { 4866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4867 "Attribute list declaration doesn't start and stop in the same entity\n"); 4868 } 4869 NEXT; 4870 } 4871 } 4872} 4873 4874/** 4875 * xmlParseElementMixedContentDecl: 4876 * @ctxt: an XML parser context 4877 * @inputchk: the input used for the current entity, needed for boundary checks 4878 * 4879 * parse the declaration for a Mixed Element content 4880 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4881 * 4882 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4883 * '(' S? '#PCDATA' S? ')' 4884 * 4885 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4886 * 4887 * [ VC: No Duplicate Types ] 4888 * The same name must not appear more than once in a single 4889 * mixed-content declaration. 4890 * 4891 * returns: the list of the xmlElementContentPtr describing the element choices 4892 */ 4893xmlElementContentPtr 4894xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 4895 xmlElementContentPtr ret = NULL, cur = NULL, n; 4896 const xmlChar *elem = NULL; 4897 4898 GROW; 4899 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 4900 SKIP(7); 4901 SKIP_BLANKS; 4902 SHRINK; 4903 if (RAW == ')') { 4904 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4905 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4906"Element content declaration doesn't start and stop in the same entity\n", 4907 NULL); 4908 } 4909 NEXT; 4910 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4911 if (RAW == '*') { 4912 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4913 NEXT; 4914 } 4915 return(ret); 4916 } 4917 if ((RAW == '(') || (RAW == '|')) { 4918 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4919 if (ret == NULL) return(NULL); 4920 } 4921 while (RAW == '|') { 4922 NEXT; 4923 if (elem == NULL) { 4924 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4925 if (ret == NULL) return(NULL); 4926 ret->c1 = cur; 4927 if (cur != NULL) 4928 cur->parent = ret; 4929 cur = ret; 4930 } else { 4931 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4932 if (n == NULL) return(NULL); 4933 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4934 if (n->c1 != NULL) 4935 n->c1->parent = n; 4936 cur->c2 = n; 4937 if (n != NULL) 4938 n->parent = cur; 4939 cur = n; 4940 } 4941 SKIP_BLANKS; 4942 elem = xmlParseName(ctxt); 4943 if (elem == NULL) { 4944 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4945 "xmlParseElementMixedContentDecl : Name expected\n"); 4946 xmlFreeElementContent(cur); 4947 return(NULL); 4948 } 4949 SKIP_BLANKS; 4950 GROW; 4951 } 4952 if ((RAW == ')') && (NXT(1) == '*')) { 4953 if (elem != NULL) { 4954 cur->c2 = xmlNewElementContent(elem, 4955 XML_ELEMENT_CONTENT_ELEMENT); 4956 if (cur->c2 != NULL) 4957 cur->c2->parent = cur; 4958 } 4959 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4960 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4961 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4962"Element content declaration doesn't start and stop in the same entity\n", 4963 NULL); 4964 } 4965 SKIP(2); 4966 } else { 4967 xmlFreeElementContent(ret); 4968 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 4969 return(NULL); 4970 } 4971 4972 } else { 4973 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 4974 } 4975 return(ret); 4976} 4977 4978/** 4979 * xmlParseElementChildrenContentDecl: 4980 * @ctxt: an XML parser context 4981 * @inputchk: the input used for the current entity, needed for boundary checks 4982 * 4983 * parse the declaration for a Mixed Element content 4984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4985 * 4986 * 4987 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4988 * 4989 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4990 * 4991 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4992 * 4993 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4994 * 4995 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4996 * TODO Parameter-entity replacement text must be properly nested 4997 * with parenthesized groups. That is to say, if either of the 4998 * opening or closing parentheses in a choice, seq, or Mixed 4999 * construct is contained in the replacement text for a parameter 5000 * entity, both must be contained in the same replacement text. For 5001 * interoperability, if a parameter-entity reference appears in a 5002 * choice, seq, or Mixed construct, its replacement text should not 5003 * be empty, and neither the first nor last non-blank character of 5004 * the replacement text should be a connector (| or ,). 5005 * 5006 * Returns the tree of xmlElementContentPtr describing the element 5007 * hierarchy. 5008 */ 5009xmlElementContentPtr 5010xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 5011 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5012 const xmlChar *elem; 5013 xmlChar type = 0; 5014 5015 SKIP_BLANKS; 5016 GROW; 5017 if (RAW == '(') { 5018 int inputid = ctxt->input->id; 5019 5020 /* Recurse on first child */ 5021 NEXT; 5022 SKIP_BLANKS; 5023 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 5024 SKIP_BLANKS; 5025 GROW; 5026 } else { 5027 elem = xmlParseName(ctxt); 5028 if (elem == NULL) { 5029 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5030 return(NULL); 5031 } 5032 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 5033 if (cur == NULL) { 5034 xmlErrMemory(ctxt, NULL); 5035 return(NULL); 5036 } 5037 GROW; 5038 if (RAW == '?') { 5039 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5040 NEXT; 5041 } else if (RAW == '*') { 5042 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5043 NEXT; 5044 } else if (RAW == '+') { 5045 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5046 NEXT; 5047 } else { 5048 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5049 } 5050 GROW; 5051 } 5052 SKIP_BLANKS; 5053 SHRINK; 5054 while (RAW != ')') { 5055 /* 5056 * Each loop we parse one separator and one element. 5057 */ 5058 if (RAW == ',') { 5059 if (type == 0) type = CUR; 5060 5061 /* 5062 * Detect "Name | Name , Name" error 5063 */ 5064 else if (type != CUR) { 5065 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5066 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5067 type); 5068 if ((last != NULL) && (last != ret)) 5069 xmlFreeElementContent(last); 5070 if (ret != NULL) 5071 xmlFreeElementContent(ret); 5072 return(NULL); 5073 } 5074 NEXT; 5075 5076 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 5077 if (op == NULL) { 5078 if ((last != NULL) && (last != ret)) 5079 xmlFreeElementContent(last); 5080 xmlFreeElementContent(ret); 5081 return(NULL); 5082 } 5083 if (last == NULL) { 5084 op->c1 = ret; 5085 if (ret != NULL) 5086 ret->parent = op; 5087 ret = cur = op; 5088 } else { 5089 cur->c2 = op; 5090 if (op != NULL) 5091 op->parent = cur; 5092 op->c1 = last; 5093 if (last != NULL) 5094 last->parent = op; 5095 cur =op; 5096 last = NULL; 5097 } 5098 } else if (RAW == '|') { 5099 if (type == 0) type = CUR; 5100 5101 /* 5102 * Detect "Name , Name | Name" error 5103 */ 5104 else if (type != CUR) { 5105 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5106 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5107 type); 5108 if ((last != NULL) && (last != ret)) 5109 xmlFreeElementContent(last); 5110 if (ret != NULL) 5111 xmlFreeElementContent(ret); 5112 return(NULL); 5113 } 5114 NEXT; 5115 5116 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 5117 if (op == NULL) { 5118 if ((last != NULL) && (last != ret)) 5119 xmlFreeElementContent(last); 5120 if (ret != NULL) 5121 xmlFreeElementContent(ret); 5122 return(NULL); 5123 } 5124 if (last == NULL) { 5125 op->c1 = ret; 5126 if (ret != NULL) 5127 ret->parent = op; 5128 ret = cur = op; 5129 } else { 5130 cur->c2 = op; 5131 if (op != NULL) 5132 op->parent = cur; 5133 op->c1 = last; 5134 if (last != NULL) 5135 last->parent = op; 5136 cur =op; 5137 last = NULL; 5138 } 5139 } else { 5140 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5141 if (ret != NULL) 5142 xmlFreeElementContent(ret); 5143 return(NULL); 5144 } 5145 GROW; 5146 SKIP_BLANKS; 5147 GROW; 5148 if (RAW == '(') { 5149 int inputid = ctxt->input->id; 5150 /* Recurse on second child */ 5151 NEXT; 5152 SKIP_BLANKS; 5153 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5154 SKIP_BLANKS; 5155 } else { 5156 elem = xmlParseName(ctxt); 5157 if (elem == NULL) { 5158 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5159 if (ret != NULL) 5160 xmlFreeElementContent(ret); 5161 return(NULL); 5162 } 5163 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 5164 if (RAW == '?') { 5165 last->ocur = XML_ELEMENT_CONTENT_OPT; 5166 NEXT; 5167 } else if (RAW == '*') { 5168 last->ocur = XML_ELEMENT_CONTENT_MULT; 5169 NEXT; 5170 } else if (RAW == '+') { 5171 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5172 NEXT; 5173 } else { 5174 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5175 } 5176 } 5177 SKIP_BLANKS; 5178 GROW; 5179 } 5180 if ((cur != NULL) && (last != NULL)) { 5181 cur->c2 = last; 5182 if (last != NULL) 5183 last->parent = cur; 5184 } 5185 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5186 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5187"Element content declaration doesn't start and stop in the same entity\n", 5188 NULL); 5189 } 5190 NEXT; 5191 if (RAW == '?') { 5192 if (ret != NULL) 5193 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5194 NEXT; 5195 } else if (RAW == '*') { 5196 if (ret != NULL) { 5197 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5198 cur = ret; 5199 /* 5200 * Some normalization: 5201 * (a | b* | c?)* == (a | b | c)* 5202 */ 5203 while (cur->type == XML_ELEMENT_CONTENT_OR) { 5204 if ((cur->c1 != NULL) && 5205 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5206 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5207 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5208 if ((cur->c2 != NULL) && 5209 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5210 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5211 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5212 cur = cur->c2; 5213 } 5214 } 5215 NEXT; 5216 } else if (RAW == '+') { 5217 if (ret != NULL) { 5218 int found = 0; 5219 5220 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 5221 /* 5222 * Some normalization: 5223 * (a | b*)+ == (a | b)* 5224 * (a | b?)+ == (a | b)* 5225 */ 5226 while (cur->type == XML_ELEMENT_CONTENT_OR) { 5227 if ((cur->c1 != NULL) && 5228 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5229 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 5230 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5231 found = 1; 5232 } 5233 if ((cur->c2 != NULL) && 5234 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5235 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 5236 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5237 found = 1; 5238 } 5239 cur = cur->c2; 5240 } 5241 if (found) 5242 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5243 } 5244 NEXT; 5245 } 5246 return(ret); 5247} 5248 5249/** 5250 * xmlParseElementContentDecl: 5251 * @ctxt: an XML parser context 5252 * @name: the name of the element being defined. 5253 * @result: the Element Content pointer will be stored here if any 5254 * 5255 * parse the declaration for an Element content either Mixed or Children, 5256 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 5257 * 5258 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 5259 * 5260 * returns: the type of element content XML_ELEMENT_TYPE_xxx 5261 */ 5262 5263int 5264xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 5265 xmlElementContentPtr *result) { 5266 5267 xmlElementContentPtr tree = NULL; 5268 int inputid = ctxt->input->id; 5269 int res; 5270 5271 *result = NULL; 5272 5273 if (RAW != '(') { 5274 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5275 "xmlParseElementContentDecl : %s '(' expected\n", name); 5276 return(-1); 5277 } 5278 NEXT; 5279 GROW; 5280 SKIP_BLANKS; 5281 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5282 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 5283 res = XML_ELEMENT_TYPE_MIXED; 5284 } else { 5285 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 5286 res = XML_ELEMENT_TYPE_ELEMENT; 5287 } 5288 SKIP_BLANKS; 5289 *result = tree; 5290 return(res); 5291} 5292 5293/** 5294 * xmlParseElementDecl: 5295 * @ctxt: an XML parser context 5296 * 5297 * parse an Element declaration. 5298 * 5299 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 5300 * 5301 * [ VC: Unique Element Type Declaration ] 5302 * No element type may be declared more than once 5303 * 5304 * Returns the type of the element, or -1 in case of error 5305 */ 5306int 5307xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 5308 const xmlChar *name; 5309 int ret = -1; 5310 xmlElementContentPtr content = NULL; 5311 5312 GROW; 5313 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 5314 xmlParserInputPtr input = ctxt->input; 5315 5316 SKIP(9); 5317 if (!IS_BLANK_CH(CUR)) { 5318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5319 "Space required after 'ELEMENT'\n"); 5320 } 5321 SKIP_BLANKS; 5322 name = xmlParseName(ctxt); 5323 if (name == NULL) { 5324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5325 "xmlParseElementDecl: no name for Element\n"); 5326 return(-1); 5327 } 5328 while ((RAW == 0) && (ctxt->inputNr > 1)) 5329 xmlPopInput(ctxt); 5330 if (!IS_BLANK_CH(CUR)) { 5331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5332 "Space required after the element name\n"); 5333 } 5334 SKIP_BLANKS; 5335 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 5336 SKIP(5); 5337 /* 5338 * Element must always be empty. 5339 */ 5340 ret = XML_ELEMENT_TYPE_EMPTY; 5341 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5342 (NXT(2) == 'Y')) { 5343 SKIP(3); 5344 /* 5345 * Element is a generic container. 5346 */ 5347 ret = XML_ELEMENT_TYPE_ANY; 5348 } else if (RAW == '(') { 5349 ret = xmlParseElementContentDecl(ctxt, name, &content); 5350 } else { 5351 /* 5352 * [ WFC: PEs in Internal Subset ] error handling. 5353 */ 5354 if ((RAW == '%') && (ctxt->external == 0) && 5355 (ctxt->inputNr == 1)) { 5356 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 5357 "PEReference: forbidden within markup decl in internal subset\n"); 5358 } else { 5359 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5360 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5361 } 5362 return(-1); 5363 } 5364 5365 SKIP_BLANKS; 5366 /* 5367 * Pop-up of finished entities. 5368 */ 5369 while ((RAW == 0) && (ctxt->inputNr > 1)) 5370 xmlPopInput(ctxt); 5371 SKIP_BLANKS; 5372 5373 if (RAW != '>') { 5374 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5375 } else { 5376 if (input != ctxt->input) { 5377 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5378 "Element declaration doesn't start and stop in the same entity\n"); 5379 } 5380 5381 NEXT; 5382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5383 (ctxt->sax->elementDecl != NULL)) 5384 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5385 content); 5386 } 5387 if (content != NULL) { 5388 xmlFreeElementContent(content); 5389 } 5390 } 5391 return(ret); 5392} 5393 5394/** 5395 * xmlParseConditionalSections 5396 * @ctxt: an XML parser context 5397 * 5398 * [61] conditionalSect ::= includeSect | ignoreSect 5399 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5400 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5401 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5402 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5403 */ 5404 5405static void 5406xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5407 SKIP(3); 5408 SKIP_BLANKS; 5409 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 5410 SKIP(7); 5411 SKIP_BLANKS; 5412 if (RAW != '[') { 5413 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5414 } else { 5415 NEXT; 5416 } 5417 if (xmlParserDebugEntities) { 5418 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5419 xmlGenericError(xmlGenericErrorContext, 5420 "%s(%d): ", ctxt->input->filename, 5421 ctxt->input->line); 5422 xmlGenericError(xmlGenericErrorContext, 5423 "Entering INCLUDE Conditional Section\n"); 5424 } 5425 5426 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5427 (NXT(2) != '>'))) { 5428 const xmlChar *check = CUR_PTR; 5429 unsigned int cons = ctxt->input->consumed; 5430 5431 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5432 xmlParseConditionalSections(ctxt); 5433 } else if (IS_BLANK_CH(CUR)) { 5434 NEXT; 5435 } else if (RAW == '%') { 5436 xmlParsePEReference(ctxt); 5437 } else 5438 xmlParseMarkupDecl(ctxt); 5439 5440 /* 5441 * Pop-up of finished entities. 5442 */ 5443 while ((RAW == 0) && (ctxt->inputNr > 1)) 5444 xmlPopInput(ctxt); 5445 5446 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5447 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5448 break; 5449 } 5450 } 5451 if (xmlParserDebugEntities) { 5452 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5453 xmlGenericError(xmlGenericErrorContext, 5454 "%s(%d): ", ctxt->input->filename, 5455 ctxt->input->line); 5456 xmlGenericError(xmlGenericErrorContext, 5457 "Leaving INCLUDE Conditional Section\n"); 5458 } 5459 5460 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5461 int state; 5462 xmlParserInputState instate; 5463 int depth = 0; 5464 5465 SKIP(6); 5466 SKIP_BLANKS; 5467 if (RAW != '[') { 5468 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5469 } else { 5470 NEXT; 5471 } 5472 if (xmlParserDebugEntities) { 5473 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5474 xmlGenericError(xmlGenericErrorContext, 5475 "%s(%d): ", ctxt->input->filename, 5476 ctxt->input->line); 5477 xmlGenericError(xmlGenericErrorContext, 5478 "Entering IGNORE Conditional Section\n"); 5479 } 5480 5481 /* 5482 * Parse up to the end of the conditional section 5483 * But disable SAX event generating DTD building in the meantime 5484 */ 5485 state = ctxt->disableSAX; 5486 instate = ctxt->instate; 5487 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5488 ctxt->instate = XML_PARSER_IGNORE; 5489 5490 while ((depth >= 0) && (RAW != 0)) { 5491 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5492 depth++; 5493 SKIP(3); 5494 continue; 5495 } 5496 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5497 if (--depth >= 0) SKIP(3); 5498 continue; 5499 } 5500 NEXT; 5501 continue; 5502 } 5503 5504 ctxt->disableSAX = state; 5505 ctxt->instate = instate; 5506 5507 if (xmlParserDebugEntities) { 5508 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5509 xmlGenericError(xmlGenericErrorContext, 5510 "%s(%d): ", ctxt->input->filename, 5511 ctxt->input->line); 5512 xmlGenericError(xmlGenericErrorContext, 5513 "Leaving IGNORE Conditional Section\n"); 5514 } 5515 5516 } else { 5517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5518 } 5519 5520 if (RAW == 0) 5521 SHRINK; 5522 5523 if (RAW == 0) { 5524 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5525 } else { 5526 SKIP(3); 5527 } 5528} 5529 5530/** 5531 * xmlParseMarkupDecl: 5532 * @ctxt: an XML parser context 5533 * 5534 * parse Markup declarations 5535 * 5536 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5537 * NotationDecl | PI | Comment 5538 * 5539 * [ VC: Proper Declaration/PE Nesting ] 5540 * Parameter-entity replacement text must be properly nested with 5541 * markup declarations. That is to say, if either the first character 5542 * or the last character of a markup declaration (markupdecl above) is 5543 * contained in the replacement text for a parameter-entity reference, 5544 * both must be contained in the same replacement text. 5545 * 5546 * [ WFC: PEs in Internal Subset ] 5547 * In the internal DTD subset, parameter-entity references can occur 5548 * only where markup declarations can occur, not within markup declarations. 5549 * (This does not apply to references that occur in external parameter 5550 * entities or to the external subset.) 5551 */ 5552void 5553xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5554 GROW; 5555 xmlParseElementDecl(ctxt); 5556 xmlParseAttributeListDecl(ctxt); 5557 xmlParseEntityDecl(ctxt); 5558 xmlParseNotationDecl(ctxt); 5559 xmlParsePI(ctxt); 5560 xmlParseComment(ctxt); 5561 /* 5562 * This is only for internal subset. On external entities, 5563 * the replacement is done before parsing stage 5564 */ 5565 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5566 xmlParsePEReference(ctxt); 5567 5568 /* 5569 * Conditional sections are allowed from entities included 5570 * by PE References in the internal subset. 5571 */ 5572 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5573 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5574 xmlParseConditionalSections(ctxt); 5575 } 5576 } 5577 5578 ctxt->instate = XML_PARSER_DTD; 5579} 5580 5581/** 5582 * xmlParseTextDecl: 5583 * @ctxt: an XML parser context 5584 * 5585 * parse an XML declaration header for external entities 5586 * 5587 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5588 * 5589 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5590 */ 5591 5592void 5593xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5594 xmlChar *version; 5595 5596 /* 5597 * We know that '<?xml' is here. 5598 */ 5599 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5600 SKIP(5); 5601 } else { 5602 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5603 return; 5604 } 5605 5606 if (!IS_BLANK_CH(CUR)) { 5607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5608 "Space needed after '<?xml'\n"); 5609 } 5610 SKIP_BLANKS; 5611 5612 /* 5613 * We may have the VersionInfo here. 5614 */ 5615 version = xmlParseVersionInfo(ctxt); 5616 if (version == NULL) 5617 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5618 else { 5619 if (!IS_BLANK_CH(CUR)) { 5620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5621 "Space needed here\n"); 5622 } 5623 } 5624 ctxt->input->version = version; 5625 5626 /* 5627 * We must have the encoding declaration 5628 */ 5629 xmlParseEncodingDecl(ctxt); 5630 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5631 /* 5632 * The XML REC instructs us to stop parsing right here 5633 */ 5634 return; 5635 } 5636 5637 SKIP_BLANKS; 5638 if ((RAW == '?') && (NXT(1) == '>')) { 5639 SKIP(2); 5640 } else if (RAW == '>') { 5641 /* Deprecated old WD ... */ 5642 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5643 NEXT; 5644 } else { 5645 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5646 MOVETO_ENDTAG(CUR_PTR); 5647 NEXT; 5648 } 5649} 5650 5651/** 5652 * xmlParseExternalSubset: 5653 * @ctxt: an XML parser context 5654 * @ExternalID: the external identifier 5655 * @SystemID: the system identifier (or URL) 5656 * 5657 * parse Markup declarations from an external subset 5658 * 5659 * [30] extSubset ::= textDecl? extSubsetDecl 5660 * 5661 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5662 */ 5663void 5664xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5665 const xmlChar *SystemID) { 5666 xmlDetectSAX2(ctxt); 5667 GROW; 5668 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 5669 xmlParseTextDecl(ctxt); 5670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5671 /* 5672 * The XML REC instructs us to stop parsing right here 5673 */ 5674 ctxt->instate = XML_PARSER_EOF; 5675 return; 5676 } 5677 } 5678 if (ctxt->myDoc == NULL) { 5679 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5680 } 5681 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5682 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5683 5684 ctxt->instate = XML_PARSER_DTD; 5685 ctxt->external = 1; 5686 while (((RAW == '<') && (NXT(1) == '?')) || 5687 ((RAW == '<') && (NXT(1) == '!')) || 5688 (RAW == '%') || IS_BLANK_CH(CUR)) { 5689 const xmlChar *check = CUR_PTR; 5690 unsigned int cons = ctxt->input->consumed; 5691 5692 GROW; 5693 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5694 xmlParseConditionalSections(ctxt); 5695 } else if (IS_BLANK_CH(CUR)) { 5696 NEXT; 5697 } else if (RAW == '%') { 5698 xmlParsePEReference(ctxt); 5699 } else 5700 xmlParseMarkupDecl(ctxt); 5701 5702 /* 5703 * Pop-up of finished entities. 5704 */ 5705 while ((RAW == 0) && (ctxt->inputNr > 1)) 5706 xmlPopInput(ctxt); 5707 5708 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5709 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5710 break; 5711 } 5712 } 5713 5714 if (RAW != 0) { 5715 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5716 } 5717 5718} 5719 5720/** 5721 * xmlParseReference: 5722 * @ctxt: an XML parser context 5723 * 5724 * parse and handle entity references in content, depending on the SAX 5725 * interface, this may end-up in a call to character() if this is a 5726 * CharRef, a predefined entity, if there is no reference() callback. 5727 * or if the parser was asked to switch to that mode. 5728 * 5729 * [67] Reference ::= EntityRef | CharRef 5730 */ 5731void 5732xmlParseReference(xmlParserCtxtPtr ctxt) { 5733 xmlEntityPtr ent; 5734 xmlChar *val; 5735 if (RAW != '&') return; 5736 5737 if (NXT(1) == '#') { 5738 int i = 0; 5739 xmlChar out[10]; 5740 int hex = NXT(2); 5741 int value = xmlParseCharRef(ctxt); 5742 5743 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5744 /* 5745 * So we are using non-UTF-8 buffers 5746 * Check that the char fit on 8bits, if not 5747 * generate a CharRef. 5748 */ 5749 if (value <= 0xFF) { 5750 out[0] = value; 5751 out[1] = 0; 5752 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5753 (!ctxt->disableSAX)) 5754 ctxt->sax->characters(ctxt->userData, out, 1); 5755 } else { 5756 if ((hex == 'x') || (hex == 'X')) 5757 snprintf((char *)out, sizeof(out), "#x%X", value); 5758 else 5759 snprintf((char *)out, sizeof(out), "#%d", value); 5760 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5761 (!ctxt->disableSAX)) 5762 ctxt->sax->reference(ctxt->userData, out); 5763 } 5764 } else { 5765 /* 5766 * Just encode the value in UTF-8 5767 */ 5768 COPY_BUF(0 ,out, i, value); 5769 out[i] = 0; 5770 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5771 (!ctxt->disableSAX)) 5772 ctxt->sax->characters(ctxt->userData, out, i); 5773 } 5774 } else { 5775 ent = xmlParseEntityRef(ctxt); 5776 if (ent == NULL) return; 5777 if (!ctxt->wellFormed) 5778 return; 5779 if ((ent->name != NULL) && 5780 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5781 xmlNodePtr list = NULL; 5782 xmlParserErrors ret = XML_ERR_OK; 5783 5784 5785 /* 5786 * The first reference to the entity trigger a parsing phase 5787 * where the ent->children is filled with the result from 5788 * the parsing. 5789 */ 5790 if (ent->children == NULL) { 5791 xmlChar *value; 5792 value = ent->content; 5793 5794 /* 5795 * Check that this entity is well formed 5796 */ 5797 if ((value != NULL) && (value[0] != 0) && 5798 (value[1] == 0) && (value[0] == '<') && 5799 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5800 /* 5801 * DONE: get definite answer on this !!! 5802 * Lots of entity decls are used to declare a single 5803 * char 5804 * <!ENTITY lt "<"> 5805 * Which seems to be valid since 5806 * 2.4: The ampersand character (&) and the left angle 5807 * bracket (<) may appear in their literal form only 5808 * when used ... They are also legal within the literal 5809 * entity value of an internal entity declaration;i 5810 * see "4.3.2 Well-Formed Parsed Entities". 5811 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5812 * Looking at the OASIS test suite and James Clark 5813 * tests, this is broken. However the XML REC uses 5814 * it. Is the XML REC not well-formed ???? 5815 * This is a hack to avoid this problem 5816 * 5817 * ANSWER: since lt gt amp .. are already defined, 5818 * this is a redefinition and hence the fact that the 5819 * content is not well balanced is not a Wf error, this 5820 * is lousy but acceptable. 5821 */ 5822 list = xmlNewDocText(ctxt->myDoc, value); 5823 if (list != NULL) { 5824 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5825 (ent->children == NULL)) { 5826 ent->children = list; 5827 ent->last = list; 5828 ent->owner = 1; 5829 list->parent = (xmlNodePtr) ent; 5830 } else { 5831 xmlFreeNodeList(list); 5832 } 5833 } else if (list != NULL) { 5834 xmlFreeNodeList(list); 5835 } 5836 } else { 5837 /* 5838 * 4.3.2: An internal general parsed entity is well-formed 5839 * if its replacement text matches the production labeled 5840 * content. 5841 */ 5842 5843 void *user_data; 5844 /* 5845 * This is a bit hackish but this seems the best 5846 * way to make sure both SAX and DOM entity support 5847 * behaves okay. 5848 */ 5849 if (ctxt->userData == ctxt) 5850 user_data = NULL; 5851 else 5852 user_data = ctxt->userData; 5853 5854 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5855 ctxt->depth++; 5856 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 5857 value, user_data, &list); 5858 ctxt->depth--; 5859 } else if (ent->etype == 5860 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5861 ctxt->depth++; 5862 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5863 ctxt->sax, user_data, ctxt->depth, 5864 ent->URI, ent->ExternalID, &list); 5865 ctxt->depth--; 5866 } else { 5867 ret = XML_ERR_ENTITY_PE_INTERNAL; 5868 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 5869 "invalid entity type found\n", NULL); 5870 } 5871 if (ret == XML_ERR_ENTITY_LOOP) { 5872 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 5873 return; 5874 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 5875 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5876 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5877 (ent->children == NULL)) { 5878 ent->children = list; 5879 if (ctxt->replaceEntities) { 5880 /* 5881 * Prune it directly in the generated document 5882 * except for single text nodes. 5883 */ 5884 if ((list->type == XML_TEXT_NODE) && 5885 (list->next == NULL)) { 5886 list->parent = (xmlNodePtr) ent; 5887 list = NULL; 5888 ent->owner = 1; 5889 } else { 5890 ent->owner = 0; 5891 while (list != NULL) { 5892 list->parent = (xmlNodePtr) ctxt->node; 5893 list->doc = ctxt->myDoc; 5894 if (list->next == NULL) 5895 ent->last = list; 5896 list = list->next; 5897 } 5898 list = ent->children; 5899#ifdef LIBXML_LEGACY_ENABLED 5900 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5901 xmlAddEntityReference(ent, list, NULL); 5902#endif /* LIBXML_LEGACY_ENABLED */ 5903 } 5904 } else { 5905 ent->owner = 1; 5906 while (list != NULL) { 5907 list->parent = (xmlNodePtr) ent; 5908 if (list->next == NULL) 5909 ent->last = list; 5910 list = list->next; 5911 } 5912 } 5913 } else { 5914 xmlFreeNodeList(list); 5915 list = NULL; 5916 } 5917 } else if ((ret != XML_ERR_OK) && 5918 (ret != XML_WAR_UNDECLARED_ENTITY)) { 5919 xmlFatalErr(ctxt, ret, NULL); 5920 } else if (list != NULL) { 5921 xmlFreeNodeList(list); 5922 list = NULL; 5923 } 5924 } 5925 } 5926 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5927 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5928 /* 5929 * Create a node. 5930 */ 5931 ctxt->sax->reference(ctxt->userData, ent->name); 5932 return; 5933 } else if (ctxt->replaceEntities) { 5934 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5935 /* 5936 * Seems we are generating the DOM content, do 5937 * a simple tree copy for all references except the first 5938 * In the first occurrence list contains the replacement 5939 */ 5940 if ((list == NULL) && (ent->owner == 0)) { 5941 xmlNodePtr nw = NULL, cur, firstChild = NULL; 5942 cur = ent->children; 5943 while (cur != NULL) { 5944 nw = xmlCopyNode(cur, 1); 5945 if (nw != NULL) { 5946 nw->_private = cur->_private; 5947 if (firstChild == NULL){ 5948 firstChild = nw; 5949 } 5950 xmlAddChild(ctxt->node, nw); 5951 } 5952 if (cur == ent->last) 5953 break; 5954 cur = cur->next; 5955 } 5956#ifdef LIBXML_LEGACY_ENABLED 5957 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5958 xmlAddEntityReference(ent, firstChild, nw); 5959#endif /* LIBXML_LEGACY_ENABLED */ 5960 } else if (list == NULL) { 5961 xmlNodePtr nw = NULL, cur, next, last, 5962 firstChild = NULL; 5963 /* 5964 * Copy the entity child list and make it the new 5965 * entity child list. The goal is to make sure any 5966 * ID or REF referenced will be the one from the 5967 * document content and not the entity copy. 5968 */ 5969 cur = ent->children; 5970 ent->children = NULL; 5971 last = ent->last; 5972 ent->last = NULL; 5973 while (cur != NULL) { 5974 next = cur->next; 5975 cur->next = NULL; 5976 cur->parent = NULL; 5977 nw = xmlCopyNode(cur, 1); 5978 if (nw != NULL) { 5979 nw->_private = cur->_private; 5980 if (firstChild == NULL){ 5981 firstChild = cur; 5982 } 5983 xmlAddChild((xmlNodePtr) ent, nw); 5984 xmlAddChild(ctxt->node, cur); 5985 } 5986 if (cur == last) 5987 break; 5988 cur = next; 5989 } 5990 ent->owner = 1; 5991#ifdef LIBXML_LEGACY_ENABLED 5992 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5993 xmlAddEntityReference(ent, firstChild, nw); 5994#endif /* LIBXML_LEGACY_ENABLED */ 5995 } else { 5996 /* 5997 * the name change is to avoid coalescing of the 5998 * node with a possible previous text one which 5999 * would make ent->children a dangling pointer 6000 */ 6001 if (ent->children->type == XML_TEXT_NODE) 6002 ent->children->name = xmlStrdup(BAD_CAST "nbktext"); 6003 if ((ent->last != ent->children) && 6004 (ent->last->type == XML_TEXT_NODE)) 6005 ent->last->name = xmlStrdup(BAD_CAST "nbktext"); 6006 xmlAddChildList(ctxt->node, ent->children); 6007 } 6008 6009 /* 6010 * This is to avoid a nasty side effect, see 6011 * characters() in SAX.c 6012 */ 6013 ctxt->nodemem = 0; 6014 ctxt->nodelen = 0; 6015 return; 6016 } else { 6017 /* 6018 * Probably running in SAX mode 6019 */ 6020 xmlParserInputPtr input; 6021 6022 input = xmlNewEntityInputStream(ctxt, ent); 6023 xmlPushInput(ctxt, input); 6024 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 6025 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6026 (IS_BLANK_CH(NXT(5)))) { 6027 xmlParseTextDecl(ctxt); 6028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6029 /* 6030 * The XML REC instructs us to stop parsing right here 6031 */ 6032 ctxt->instate = XML_PARSER_EOF; 6033 return; 6034 } 6035 if (input->standalone == 1) { 6036 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE, 6037 NULL); 6038 } 6039 } 6040 return; 6041 } 6042 } 6043 } else { 6044 val = ent->content; 6045 if (val == NULL) return; 6046 /* 6047 * inline the entity. 6048 */ 6049 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6050 (!ctxt->disableSAX)) 6051 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6052 } 6053 } 6054} 6055 6056/** 6057 * xmlParseEntityRef: 6058 * @ctxt: an XML parser context 6059 * 6060 * parse ENTITY references declarations 6061 * 6062 * [68] EntityRef ::= '&' Name ';' 6063 * 6064 * [ WFC: Entity Declared ] 6065 * In a document without any DTD, a document with only an internal DTD 6066 * subset which contains no parameter entity references, or a document 6067 * with "standalone='yes'", the Name given in the entity reference 6068 * must match that in an entity declaration, except that well-formed 6069 * documents need not declare any of the following entities: amp, lt, 6070 * gt, apos, quot. The declaration of a parameter entity must precede 6071 * any reference to it. Similarly, the declaration of a general entity 6072 * must precede any reference to it which appears in a default value in an 6073 * attribute-list declaration. Note that if entities are declared in the 6074 * external subset or in external parameter entities, a non-validating 6075 * processor is not obligated to read and process their declarations; 6076 * for such documents, the rule that an entity must be declared is a 6077 * well-formedness constraint only if standalone='yes'. 6078 * 6079 * [ WFC: Parsed Entity ] 6080 * An entity reference must not contain the name of an unparsed entity 6081 * 6082 * Returns the xmlEntityPtr if found, or NULL otherwise. 6083 */ 6084xmlEntityPtr 6085xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 6086 const xmlChar *name; 6087 xmlEntityPtr ent = NULL; 6088 6089 GROW; 6090 6091 if (RAW == '&') { 6092 NEXT; 6093 name = xmlParseName(ctxt); 6094 if (name == NULL) { 6095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6096 "xmlParseEntityRef: no name\n"); 6097 } else { 6098 if (RAW == ';') { 6099 NEXT; 6100 /* 6101 * Ask first SAX for entity resolution, otherwise try the 6102 * predefined set. 6103 */ 6104 if (ctxt->sax != NULL) { 6105 if (ctxt->sax->getEntity != NULL) 6106 ent = ctxt->sax->getEntity(ctxt->userData, name); 6107 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 6108 ent = xmlGetPredefinedEntity(name); 6109 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 6110 (ctxt->userData==ctxt)) { 6111 ent = xmlSAX2GetEntity(ctxt, name); 6112 } 6113 } 6114 /* 6115 * [ WFC: Entity Declared ] 6116 * In a document without any DTD, a document with only an 6117 * internal DTD subset which contains no parameter entity 6118 * references, or a document with "standalone='yes'", the 6119 * Name given in the entity reference must match that in an 6120 * entity declaration, except that well-formed documents 6121 * need not declare any of the following entities: amp, lt, 6122 * gt, apos, quot. 6123 * The declaration of a parameter entity must precede any 6124 * reference to it. 6125 * Similarly, the declaration of a general entity must 6126 * precede any reference to it which appears in a default 6127 * value in an attribute-list declaration. Note that if 6128 * entities are declared in the external subset or in 6129 * external parameter entities, a non-validating processor 6130 * is not obligated to read and process their declarations; 6131 * for such documents, the rule that an entity must be 6132 * declared is a well-formedness constraint only if 6133 * standalone='yes'. 6134 */ 6135 if (ent == NULL) { 6136 if ((ctxt->standalone == 1) || 6137 ((ctxt->hasExternalSubset == 0) && 6138 (ctxt->hasPErefs == 0))) { 6139 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6140 "Entity '%s' not defined\n", name); 6141 } else { 6142 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6143 "Entity '%s' not defined\n", name); 6144 } 6145 ctxt->valid = 0; 6146 } 6147 6148 /* 6149 * [ WFC: Parsed Entity ] 6150 * An entity reference must not contain the name of an 6151 * unparsed entity 6152 */ 6153 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6154 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6155 "Entity reference to unparsed entity %s\n", name); 6156 } 6157 6158 /* 6159 * [ WFC: No External Entity References ] 6160 * Attribute values cannot contain direct or indirect 6161 * entity references to external entities. 6162 */ 6163 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6164 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6165 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6166 "Attribute references external entity '%s'\n", name); 6167 } 6168 /* 6169 * [ WFC: No < in Attribute Values ] 6170 * The replacement text of any entity referred to directly or 6171 * indirectly in an attribute value (other than "<") must 6172 * not contain a <. 6173 */ 6174 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6175 (ent != NULL) && 6176 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6177 (ent->content != NULL) && 6178 (xmlStrchr(ent->content, '<'))) { 6179 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6180 "'<' in entity '%s' is not allowed in attributes values\n", name); 6181 } 6182 6183 /* 6184 * Internal check, no parameter entities here ... 6185 */ 6186 else { 6187 switch (ent->etype) { 6188 case XML_INTERNAL_PARAMETER_ENTITY: 6189 case XML_EXTERNAL_PARAMETER_ENTITY: 6190 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6191 "Attempt to reference the parameter entity '%s'\n", 6192 name); 6193 break; 6194 default: 6195 break; 6196 } 6197 } 6198 6199 /* 6200 * [ WFC: No Recursion ] 6201 * A parsed entity must not contain a recursive reference 6202 * to itself, either directly or indirectly. 6203 * Done somewhere else 6204 */ 6205 6206 } else { 6207 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6208 } 6209 } 6210 } 6211 return(ent); 6212} 6213 6214/** 6215 * xmlParseStringEntityRef: 6216 * @ctxt: an XML parser context 6217 * @str: a pointer to an index in the string 6218 * 6219 * parse ENTITY references declarations, but this version parses it from 6220 * a string value. 6221 * 6222 * [68] EntityRef ::= '&' Name ';' 6223 * 6224 * [ WFC: Entity Declared ] 6225 * In a document without any DTD, a document with only an internal DTD 6226 * subset which contains no parameter entity references, or a document 6227 * with "standalone='yes'", the Name given in the entity reference 6228 * must match that in an entity declaration, except that well-formed 6229 * documents need not declare any of the following entities: amp, lt, 6230 * gt, apos, quot. The declaration of a parameter entity must precede 6231 * any reference to it. Similarly, the declaration of a general entity 6232 * must precede any reference to it which appears in a default value in an 6233 * attribute-list declaration. Note that if entities are declared in the 6234 * external subset or in external parameter entities, a non-validating 6235 * processor is not obligated to read and process their declarations; 6236 * for such documents, the rule that an entity must be declared is a 6237 * well-formedness constraint only if standalone='yes'. 6238 * 6239 * [ WFC: Parsed Entity ] 6240 * An entity reference must not contain the name of an unparsed entity 6241 * 6242 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 6243 * is updated to the current location in the string. 6244 */ 6245xmlEntityPtr 6246xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 6247 xmlChar *name; 6248 const xmlChar *ptr; 6249 xmlChar cur; 6250 xmlEntityPtr ent = NULL; 6251 6252 if ((str == NULL) || (*str == NULL)) 6253 return(NULL); 6254 ptr = *str; 6255 cur = *ptr; 6256 if (cur == '&') { 6257 ptr++; 6258 cur = *ptr; 6259 name = xmlParseStringName(ctxt, &ptr); 6260 if (name == NULL) { 6261 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6262 "xmlParseStringEntityRef: no name\n"); 6263 } else { 6264 if (*ptr == ';') { 6265 ptr++; 6266 /* 6267 * Ask first SAX for entity resolution, otherwise try the 6268 * predefined set. 6269 */ 6270 if (ctxt->sax != NULL) { 6271 if (ctxt->sax->getEntity != NULL) 6272 ent = ctxt->sax->getEntity(ctxt->userData, name); 6273 if (ent == NULL) 6274 ent = xmlGetPredefinedEntity(name); 6275 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6276 ent = xmlSAX2GetEntity(ctxt, name); 6277 } 6278 } 6279 /* 6280 * [ WFC: Entity Declared ] 6281 * In a document without any DTD, a document with only an 6282 * internal DTD subset which contains no parameter entity 6283 * references, or a document with "standalone='yes'", the 6284 * Name given in the entity reference must match that in an 6285 * entity declaration, except that well-formed documents 6286 * need not declare any of the following entities: amp, lt, 6287 * gt, apos, quot. 6288 * The declaration of a parameter entity must precede any 6289 * reference to it. 6290 * Similarly, the declaration of a general entity must 6291 * precede any reference to it which appears in a default 6292 * value in an attribute-list declaration. Note that if 6293 * entities are declared in the external subset or in 6294 * external parameter entities, a non-validating processor 6295 * is not obligated to read and process their declarations; 6296 * for such documents, the rule that an entity must be 6297 * declared is a well-formedness constraint only if 6298 * standalone='yes'. 6299 */ 6300 if (ent == NULL) { 6301 if ((ctxt->standalone == 1) || 6302 ((ctxt->hasExternalSubset == 0) && 6303 (ctxt->hasPErefs == 0))) { 6304 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6305 "Entity '%s' not defined\n", name); 6306 } else { 6307 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6308 "Entity '%s' not defined\n", 6309 name); 6310 } 6311 /* TODO ? check regressions ctxt->valid = 0; */ 6312 } 6313 6314 /* 6315 * [ WFC: Parsed Entity ] 6316 * An entity reference must not contain the name of an 6317 * unparsed entity 6318 */ 6319 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6320 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6321 "Entity reference to unparsed entity %s\n", name); 6322 } 6323 6324 /* 6325 * [ WFC: No External Entity References ] 6326 * Attribute values cannot contain direct or indirect 6327 * entity references to external entities. 6328 */ 6329 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6330 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6331 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6332 "Attribute references external entity '%s'\n", name); 6333 } 6334 /* 6335 * [ WFC: No < in Attribute Values ] 6336 * The replacement text of any entity referred to directly or 6337 * indirectly in an attribute value (other than "<") must 6338 * not contain a <. 6339 */ 6340 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6341 (ent != NULL) && 6342 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6343 (ent->content != NULL) && 6344 (xmlStrchr(ent->content, '<'))) { 6345 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6346 "'<' in entity '%s' is not allowed in attributes values\n", 6347 name); 6348 } 6349 6350 /* 6351 * Internal check, no parameter entities here ... 6352 */ 6353 else { 6354 switch (ent->etype) { 6355 case XML_INTERNAL_PARAMETER_ENTITY: 6356 case XML_EXTERNAL_PARAMETER_ENTITY: 6357 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6358 "Attempt to reference the parameter entity '%s'\n", 6359 name); 6360 break; 6361 default: 6362 break; 6363 } 6364 } 6365 6366 /* 6367 * [ WFC: No Recursion ] 6368 * A parsed entity must not contain a recursive reference 6369 * to itself, either directly or indirectly. 6370 * Done somewhere else 6371 */ 6372 6373 } else { 6374 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6375 } 6376 xmlFree(name); 6377 } 6378 } 6379 *str = ptr; 6380 return(ent); 6381} 6382 6383/** 6384 * xmlParsePEReference: 6385 * @ctxt: an XML parser context 6386 * 6387 * parse PEReference declarations 6388 * The entity content is handled directly by pushing it's content as 6389 * a new input stream. 6390 * 6391 * [69] PEReference ::= '%' Name ';' 6392 * 6393 * [ WFC: No Recursion ] 6394 * A parsed entity must not contain a recursive 6395 * reference to itself, either directly or indirectly. 6396 * 6397 * [ WFC: Entity Declared ] 6398 * In a document without any DTD, a document with only an internal DTD 6399 * subset which contains no parameter entity references, or a document 6400 * with "standalone='yes'", ... ... The declaration of a parameter 6401 * entity must precede any reference to it... 6402 * 6403 * [ VC: Entity Declared ] 6404 * In a document with an external subset or external parameter entities 6405 * with "standalone='no'", ... ... The declaration of a parameter entity 6406 * must precede any reference to it... 6407 * 6408 * [ WFC: In DTD ] 6409 * Parameter-entity references may only appear in the DTD. 6410 * NOTE: misleading but this is handled. 6411 */ 6412void 6413xmlParsePEReference(xmlParserCtxtPtr ctxt) 6414{ 6415 const xmlChar *name; 6416 xmlEntityPtr entity = NULL; 6417 xmlParserInputPtr input; 6418 6419 if (RAW == '%') { 6420 NEXT; 6421 name = xmlParseName(ctxt); 6422 if (name == NULL) { 6423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6424 "xmlParsePEReference: no name\n"); 6425 } else { 6426 if (RAW == ';') { 6427 NEXT; 6428 if ((ctxt->sax != NULL) && 6429 (ctxt->sax->getParameterEntity != NULL)) 6430 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6431 name); 6432 if (entity == NULL) { 6433 /* 6434 * [ WFC: Entity Declared ] 6435 * In a document without any DTD, a document with only an 6436 * internal DTD subset which contains no parameter entity 6437 * references, or a document with "standalone='yes'", ... 6438 * ... The declaration of a parameter entity must precede 6439 * any reference to it... 6440 */ 6441 if ((ctxt->standalone == 1) || 6442 ((ctxt->hasExternalSubset == 0) && 6443 (ctxt->hasPErefs == 0))) { 6444 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6445 "PEReference: %%%s; not found\n", 6446 name); 6447 } else { 6448 /* 6449 * [ VC: Entity Declared ] 6450 * In a document with an external subset or external 6451 * parameter entities with "standalone='no'", ... 6452 * ... The declaration of a parameter entity must 6453 * precede any reference to it... 6454 */ 6455 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6456 "PEReference: %%%s; not found\n", 6457 name, NULL); 6458 ctxt->valid = 0; 6459 } 6460 } else { 6461 /* 6462 * Internal checking in case the entity quest barfed 6463 */ 6464 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6465 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6466 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6467 "Internal: %%%s; is not a parameter entity\n", 6468 name, NULL); 6469 } else if (ctxt->input->free != deallocblankswrapper) { 6470 input = 6471 xmlNewBlanksWrapperInputStream(ctxt, entity); 6472 xmlPushInput(ctxt, input); 6473 } else { 6474 /* 6475 * TODO !!! 6476 * handle the extra spaces added before and after 6477 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6478 */ 6479 input = xmlNewEntityInputStream(ctxt, entity); 6480 xmlPushInput(ctxt, input); 6481 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6482 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6483 (IS_BLANK_CH(NXT(5)))) { 6484 xmlParseTextDecl(ctxt); 6485 if (ctxt->errNo == 6486 XML_ERR_UNSUPPORTED_ENCODING) { 6487 /* 6488 * The XML REC instructs us to stop parsing 6489 * right here 6490 */ 6491 ctxt->instate = XML_PARSER_EOF; 6492 return; 6493 } 6494 } 6495 } 6496 } 6497 ctxt->hasPErefs = 1; 6498 } else { 6499 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6500 } 6501 } 6502 } 6503} 6504 6505/** 6506 * xmlParseStringPEReference: 6507 * @ctxt: an XML parser context 6508 * @str: a pointer to an index in the string 6509 * 6510 * parse PEReference declarations 6511 * 6512 * [69] PEReference ::= '%' Name ';' 6513 * 6514 * [ WFC: No Recursion ] 6515 * A parsed entity must not contain a recursive 6516 * reference to itself, either directly or indirectly. 6517 * 6518 * [ WFC: Entity Declared ] 6519 * In a document without any DTD, a document with only an internal DTD 6520 * subset which contains no parameter entity references, or a document 6521 * with "standalone='yes'", ... ... The declaration of a parameter 6522 * entity must precede any reference to it... 6523 * 6524 * [ VC: Entity Declared ] 6525 * In a document with an external subset or external parameter entities 6526 * with "standalone='no'", ... ... The declaration of a parameter entity 6527 * must precede any reference to it... 6528 * 6529 * [ WFC: In DTD ] 6530 * Parameter-entity references may only appear in the DTD. 6531 * NOTE: misleading but this is handled. 6532 * 6533 * Returns the string of the entity content. 6534 * str is updated to the current value of the index 6535 */ 6536xmlEntityPtr 6537xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6538 const xmlChar *ptr; 6539 xmlChar cur; 6540 xmlChar *name; 6541 xmlEntityPtr entity = NULL; 6542 6543 if ((str == NULL) || (*str == NULL)) return(NULL); 6544 ptr = *str; 6545 cur = *ptr; 6546 if (cur == '%') { 6547 ptr++; 6548 cur = *ptr; 6549 name = xmlParseStringName(ctxt, &ptr); 6550 if (name == NULL) { 6551 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6552 "xmlParseStringPEReference: no name\n"); 6553 } else { 6554 cur = *ptr; 6555 if (cur == ';') { 6556 ptr++; 6557 cur = *ptr; 6558 if ((ctxt->sax != NULL) && 6559 (ctxt->sax->getParameterEntity != NULL)) 6560 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6561 name); 6562 if (entity == NULL) { 6563 /* 6564 * [ WFC: Entity Declared ] 6565 * In a document without any DTD, a document with only an 6566 * internal DTD subset which contains no parameter entity 6567 * references, or a document with "standalone='yes'", ... 6568 * ... The declaration of a parameter entity must precede 6569 * any reference to it... 6570 */ 6571 if ((ctxt->standalone == 1) || 6572 ((ctxt->hasExternalSubset == 0) && 6573 (ctxt->hasPErefs == 0))) { 6574 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6575 "PEReference: %%%s; not found\n", name); 6576 } else { 6577 /* 6578 * [ VC: Entity Declared ] 6579 * In a document with an external subset or external 6580 * parameter entities with "standalone='no'", ... 6581 * ... The declaration of a parameter entity must 6582 * precede any reference to it... 6583 */ 6584 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6585 "PEReference: %%%s; not found\n", 6586 name, NULL); 6587 ctxt->valid = 0; 6588 } 6589 } else { 6590 /* 6591 * Internal checking in case the entity quest barfed 6592 */ 6593 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6594 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6595 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6596 "%%%s; is not a parameter entity\n", 6597 name, NULL); 6598 } 6599 } 6600 ctxt->hasPErefs = 1; 6601 } else { 6602 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6603 } 6604 xmlFree(name); 6605 } 6606 } 6607 *str = ptr; 6608 return(entity); 6609} 6610 6611/** 6612 * xmlParseDocTypeDecl: 6613 * @ctxt: an XML parser context 6614 * 6615 * parse a DOCTYPE declaration 6616 * 6617 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6618 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6619 * 6620 * [ VC: Root Element Type ] 6621 * The Name in the document type declaration must match the element 6622 * type of the root element. 6623 */ 6624 6625void 6626xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6627 const xmlChar *name = NULL; 6628 xmlChar *ExternalID = NULL; 6629 xmlChar *URI = NULL; 6630 6631 /* 6632 * We know that '<!DOCTYPE' has been detected. 6633 */ 6634 SKIP(9); 6635 6636 SKIP_BLANKS; 6637 6638 /* 6639 * Parse the DOCTYPE name. 6640 */ 6641 name = xmlParseName(ctxt); 6642 if (name == NULL) { 6643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6644 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6645 } 6646 ctxt->intSubName = name; 6647 6648 SKIP_BLANKS; 6649 6650 /* 6651 * Check for SystemID and ExternalID 6652 */ 6653 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6654 6655 if ((URI != NULL) || (ExternalID != NULL)) { 6656 ctxt->hasExternalSubset = 1; 6657 } 6658 ctxt->extSubURI = URI; 6659 ctxt->extSubSystem = ExternalID; 6660 6661 SKIP_BLANKS; 6662 6663 /* 6664 * Create and update the internal subset. 6665 */ 6666 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6667 (!ctxt->disableSAX)) 6668 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6669 6670 /* 6671 * Is there any internal subset declarations ? 6672 * they are handled separately in xmlParseInternalSubset() 6673 */ 6674 if (RAW == '[') 6675 return; 6676 6677 /* 6678 * We should be at the end of the DOCTYPE declaration. 6679 */ 6680 if (RAW != '>') { 6681 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6682 } 6683 NEXT; 6684} 6685 6686/** 6687 * xmlParseInternalSubset: 6688 * @ctxt: an XML parser context 6689 * 6690 * parse the internal subset declaration 6691 * 6692 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6693 */ 6694 6695static void 6696xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6697 /* 6698 * Is there any DTD definition ? 6699 */ 6700 if (RAW == '[') { 6701 ctxt->instate = XML_PARSER_DTD; 6702 NEXT; 6703 /* 6704 * Parse the succession of Markup declarations and 6705 * PEReferences. 6706 * Subsequence (markupdecl | PEReference | S)* 6707 */ 6708 while (RAW != ']') { 6709 const xmlChar *check = CUR_PTR; 6710 unsigned int cons = ctxt->input->consumed; 6711 6712 SKIP_BLANKS; 6713 xmlParseMarkupDecl(ctxt); 6714 xmlParsePEReference(ctxt); 6715 6716 /* 6717 * Pop-up of finished entities. 6718 */ 6719 while ((RAW == 0) && (ctxt->inputNr > 1)) 6720 xmlPopInput(ctxt); 6721 6722 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6723 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6724 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6725 break; 6726 } 6727 } 6728 if (RAW == ']') { 6729 NEXT; 6730 SKIP_BLANKS; 6731 } 6732 } 6733 6734 /* 6735 * We should be at the end of the DOCTYPE declaration. 6736 */ 6737 if (RAW != '>') { 6738 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6739 } 6740 NEXT; 6741} 6742 6743#ifdef LIBXML_SAX1_ENABLED 6744/** 6745 * xmlParseAttribute: 6746 * @ctxt: an XML parser context 6747 * @value: a xmlChar ** used to store the value of the attribute 6748 * 6749 * parse an attribute 6750 * 6751 * [41] Attribute ::= Name Eq AttValue 6752 * 6753 * [ WFC: No External Entity References ] 6754 * Attribute values cannot contain direct or indirect entity references 6755 * to external entities. 6756 * 6757 * [ WFC: No < in Attribute Values ] 6758 * The replacement text of any entity referred to directly or indirectly in 6759 * an attribute value (other than "<") must not contain a <. 6760 * 6761 * [ VC: Attribute Value Type ] 6762 * The attribute must have been declared; the value must be of the type 6763 * declared for it. 6764 * 6765 * [25] Eq ::= S? '=' S? 6766 * 6767 * With namespace: 6768 * 6769 * [NS 11] Attribute ::= QName Eq AttValue 6770 * 6771 * Also the case QName == xmlns:??? is handled independently as a namespace 6772 * definition. 6773 * 6774 * Returns the attribute name, and the value in *value. 6775 */ 6776 6777const xmlChar * 6778xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6779 const xmlChar *name; 6780 xmlChar *val; 6781 6782 *value = NULL; 6783 GROW; 6784 name = xmlParseName(ctxt); 6785 if (name == NULL) { 6786 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6787 "error parsing attribute name\n"); 6788 return(NULL); 6789 } 6790 6791 /* 6792 * read the value 6793 */ 6794 SKIP_BLANKS; 6795 if (RAW == '=') { 6796 NEXT; 6797 SKIP_BLANKS; 6798 val = xmlParseAttValue(ctxt); 6799 ctxt->instate = XML_PARSER_CONTENT; 6800 } else { 6801 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6802 "Specification mandate value for attribute %s\n", name); 6803 return(NULL); 6804 } 6805 6806 /* 6807 * Check that xml:lang conforms to the specification 6808 * No more registered as an error, just generate a warning now 6809 * since this was deprecated in XML second edition 6810 */ 6811 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6812 if (!xmlCheckLanguageID(val)) { 6813 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 6814 "Malformed value for xml:lang : %s\n", 6815 val, NULL); 6816 } 6817 } 6818 6819 /* 6820 * Check that xml:space conforms to the specification 6821 */ 6822 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6823 if (xmlStrEqual(val, BAD_CAST "default")) 6824 *(ctxt->space) = 0; 6825 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6826 *(ctxt->space) = 1; 6827 else { 6828 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6829"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 6830 val); 6831 } 6832 } 6833 6834 *value = val; 6835 return(name); 6836} 6837 6838/** 6839 * xmlParseStartTag: 6840 * @ctxt: an XML parser context 6841 * 6842 * parse a start of tag either for rule element or 6843 * EmptyElement. In both case we don't parse the tag closing chars. 6844 * 6845 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6846 * 6847 * [ WFC: Unique Att Spec ] 6848 * No attribute name may appear more than once in the same start-tag or 6849 * empty-element tag. 6850 * 6851 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6852 * 6853 * [ WFC: Unique Att Spec ] 6854 * No attribute name may appear more than once in the same start-tag or 6855 * empty-element tag. 6856 * 6857 * With namespace: 6858 * 6859 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6860 * 6861 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6862 * 6863 * Returns the element name parsed 6864 */ 6865 6866const xmlChar * 6867xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6868 const xmlChar *name; 6869 const xmlChar *attname; 6870 xmlChar *attvalue; 6871 const xmlChar **atts = ctxt->atts; 6872 int nbatts = 0; 6873 int maxatts = ctxt->maxatts; 6874 int i; 6875 6876 if (RAW != '<') return(NULL); 6877 NEXT1; 6878 6879 name = xmlParseName(ctxt); 6880 if (name == NULL) { 6881 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6882 "xmlParseStartTag: invalid element name\n"); 6883 return(NULL); 6884 } 6885 6886 /* 6887 * Now parse the attributes, it ends up with the ending 6888 * 6889 * (S Attribute)* S? 6890 */ 6891 SKIP_BLANKS; 6892 GROW; 6893 6894 while ((RAW != '>') && 6895 ((RAW != '/') || (NXT(1) != '>')) && 6896 (IS_BYTE_CHAR(RAW))) { 6897 const xmlChar *q = CUR_PTR; 6898 unsigned int cons = ctxt->input->consumed; 6899 6900 attname = xmlParseAttribute(ctxt, &attvalue); 6901 if ((attname != NULL) && (attvalue != NULL)) { 6902 /* 6903 * [ WFC: Unique Att Spec ] 6904 * No attribute name may appear more than once in the same 6905 * start-tag or empty-element tag. 6906 */ 6907 for (i = 0; i < nbatts;i += 2) { 6908 if (xmlStrEqual(atts[i], attname)) { 6909 xmlErrAttributeDup(ctxt, NULL, attname); 6910 xmlFree(attvalue); 6911 goto failed; 6912 } 6913 } 6914 /* 6915 * Add the pair to atts 6916 */ 6917 if (atts == NULL) { 6918 maxatts = 22; /* allow for 10 attrs by default */ 6919 atts = (const xmlChar **) 6920 xmlMalloc(maxatts * sizeof(xmlChar *)); 6921 if (atts == NULL) { 6922 xmlErrMemory(ctxt, NULL); 6923 if (attvalue != NULL) 6924 xmlFree(attvalue); 6925 goto failed; 6926 } 6927 ctxt->atts = atts; 6928 ctxt->maxatts = maxatts; 6929 } else if (nbatts + 4 > maxatts) { 6930 const xmlChar **n; 6931 6932 maxatts *= 2; 6933 n = (const xmlChar **) xmlRealloc((void *) atts, 6934 maxatts * sizeof(const xmlChar *)); 6935 if (n == NULL) { 6936 xmlErrMemory(ctxt, NULL); 6937 if (attvalue != NULL) 6938 xmlFree(attvalue); 6939 goto failed; 6940 } 6941 atts = n; 6942 ctxt->atts = atts; 6943 ctxt->maxatts = maxatts; 6944 } 6945 atts[nbatts++] = attname; 6946 atts[nbatts++] = attvalue; 6947 atts[nbatts] = NULL; 6948 atts[nbatts + 1] = NULL; 6949 } else { 6950 if (attvalue != NULL) 6951 xmlFree(attvalue); 6952 } 6953 6954failed: 6955 6956 GROW 6957 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6958 break; 6959 if (!IS_BLANK_CH(RAW)) { 6960 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6961 "attributes construct error\n"); 6962 } 6963 SKIP_BLANKS; 6964 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 6965 (attname == NULL) && (attvalue == NULL)) { 6966 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 6967 "xmlParseStartTag: problem parsing attributes\n"); 6968 break; 6969 } 6970 SHRINK; 6971 GROW; 6972 } 6973 6974 /* 6975 * SAX: Start of Element ! 6976 */ 6977 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6978 (!ctxt->disableSAX)) { 6979 if (nbatts > 0) 6980 ctxt->sax->startElement(ctxt->userData, name, atts); 6981 else 6982 ctxt->sax->startElement(ctxt->userData, name, NULL); 6983 } 6984 6985 if (atts != NULL) { 6986 /* Free only the content strings */ 6987 for (i = 1;i < nbatts;i+=2) 6988 if (atts[i] != NULL) 6989 xmlFree((xmlChar *) atts[i]); 6990 } 6991 return(name); 6992} 6993 6994/** 6995 * xmlParseEndTag1: 6996 * @ctxt: an XML parser context 6997 * @line: line of the start tag 6998 * @nsNr: number of namespaces on the start tag 6999 * 7000 * parse an end of tag 7001 * 7002 * [42] ETag ::= '</' Name S? '>' 7003 * 7004 * With namespace 7005 * 7006 * [NS 9] ETag ::= '</' QName S? '>' 7007 */ 7008 7009static void 7010xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 7011 const xmlChar *name; 7012 7013 GROW; 7014 if ((RAW != '<') || (NXT(1) != '/')) { 7015 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 7016 "xmlParseEndTag: '</' not found\n"); 7017 return; 7018 } 7019 SKIP(2); 7020 7021 name = xmlParseNameAndCompare(ctxt,ctxt->name); 7022 7023 /* 7024 * We should definitely be at the ending "S? '>'" part 7025 */ 7026 GROW; 7027 SKIP_BLANKS; 7028 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7029 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7030 } else 7031 NEXT1; 7032 7033 /* 7034 * [ WFC: Element Type Match ] 7035 * The Name in an element's end-tag must match the element type in the 7036 * start-tag. 7037 * 7038 */ 7039 if (name != (xmlChar*)1) { 7040 if (name == NULL) name = BAD_CAST "unparseable"; 7041 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7042 "Opening and ending tag mismatch: %s line %d and %s\n", 7043 ctxt->name, line, name); 7044 } 7045 7046 /* 7047 * SAX: End of Tag 7048 */ 7049 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7050 (!ctxt->disableSAX)) 7051 ctxt->sax->endElement(ctxt->userData, ctxt->name); 7052 7053 namePop(ctxt); 7054 spacePop(ctxt); 7055 return; 7056} 7057 7058/** 7059 * xmlParseEndTag: 7060 * @ctxt: an XML parser context 7061 * 7062 * parse an end of tag 7063 * 7064 * [42] ETag ::= '</' Name S? '>' 7065 * 7066 * With namespace 7067 * 7068 * [NS 9] ETag ::= '</' QName S? '>' 7069 */ 7070 7071void 7072xmlParseEndTag(xmlParserCtxtPtr ctxt) { 7073 xmlParseEndTag1(ctxt, 0); 7074} 7075#endif /* LIBXML_SAX1_ENABLED */ 7076 7077/************************************************************************ 7078 * * 7079 * SAX 2 specific operations * 7080 * * 7081 ************************************************************************/ 7082 7083static const xmlChar * 7084xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 7085 int len = 0, l; 7086 int c; 7087 int count = 0; 7088 7089 /* 7090 * Handler for more complex cases 7091 */ 7092 GROW; 7093 c = CUR_CHAR(l); 7094 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 7095 (!IS_LETTER(c) && (c != '_'))) { 7096 return(NULL); 7097 } 7098 7099 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 7100 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 7101 (c == '.') || (c == '-') || (c == '_') || 7102 (IS_COMBINING(c)) || 7103 (IS_EXTENDER(c)))) { 7104 if (count++ > 100) { 7105 count = 0; 7106 GROW; 7107 } 7108 len += l; 7109 NEXTL(l); 7110 c = CUR_CHAR(l); 7111 } 7112 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 7113} 7114 7115/* 7116 * xmlGetNamespace: 7117 * @ctxt: an XML parser context 7118 * @prefix: the prefix to lookup 7119 * 7120 * Lookup the namespace name for the @prefix (which ca be NULL) 7121 * The prefix must come from the @ctxt->dict dictionnary 7122 * 7123 * Returns the namespace name or NULL if not bound 7124 */ 7125static const xmlChar * 7126xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 7127 int i; 7128 7129 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 7130 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 7131 if (ctxt->nsTab[i] == prefix) { 7132 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 7133 return(NULL); 7134 return(ctxt->nsTab[i + 1]); 7135 } 7136 return(NULL); 7137} 7138 7139/** 7140 * xmlParseNCName: 7141 * @ctxt: an XML parser context 7142 * 7143 * parse an XML name. 7144 * 7145 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 7146 * CombiningChar | Extender 7147 * 7148 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 7149 * 7150 * Returns the Name parsed or NULL 7151 */ 7152 7153static const xmlChar * 7154xmlParseNCName(xmlParserCtxtPtr ctxt) { 7155 const xmlChar *in; 7156 const xmlChar *ret; 7157 int count = 0; 7158 7159 /* 7160 * Accelerator for simple ASCII names 7161 */ 7162 in = ctxt->input->cur; 7163 if (((*in >= 0x61) && (*in <= 0x7A)) || 7164 ((*in >= 0x41) && (*in <= 0x5A)) || 7165 (*in == '_')) { 7166 in++; 7167 while (((*in >= 0x61) && (*in <= 0x7A)) || 7168 ((*in >= 0x41) && (*in <= 0x5A)) || 7169 ((*in >= 0x30) && (*in <= 0x39)) || 7170 (*in == '_') || (*in == '-') || 7171 (*in == '.')) 7172 in++; 7173 if ((*in > 0) && (*in < 0x80)) { 7174 count = in - ctxt->input->cur; 7175 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 7176 ctxt->input->cur = in; 7177 ctxt->nbChars += count; 7178 ctxt->input->col += count; 7179 if (ret == NULL) { 7180 xmlErrMemory(ctxt, NULL); 7181 } 7182 return(ret); 7183 } 7184 } 7185 return(xmlParseNCNameComplex(ctxt)); 7186} 7187 7188/** 7189 * xmlParseQName: 7190 * @ctxt: an XML parser context 7191 * @prefix: pointer to store the prefix part 7192 * 7193 * parse an XML Namespace QName 7194 * 7195 * [6] QName ::= (Prefix ':')? LocalPart 7196 * [7] Prefix ::= NCName 7197 * [8] LocalPart ::= NCName 7198 * 7199 * Returns the Name parsed or NULL 7200 */ 7201 7202static const xmlChar * 7203xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 7204 const xmlChar *l, *p; 7205 7206 GROW; 7207 7208 l = xmlParseNCName(ctxt); 7209 if (l == NULL) { 7210 if (CUR == ':') { 7211 l = xmlParseName(ctxt); 7212 if (l != NULL) { 7213 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7214 "Failed to parse QName '%s'\n", l, NULL, NULL); 7215 *prefix = NULL; 7216 return(l); 7217 } 7218 } 7219 return(NULL); 7220 } 7221 if (CUR == ':') { 7222 NEXT; 7223 p = l; 7224 l = xmlParseNCName(ctxt); 7225 if (l == NULL) { 7226 xmlChar *tmp; 7227 7228 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7229 "Failed to parse QName '%s:'\n", p, NULL, NULL); 7230 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 7231 p = xmlDictLookup(ctxt->dict, tmp, -1); 7232 if (tmp != NULL) xmlFree(tmp); 7233 *prefix = NULL; 7234 return(p); 7235 } 7236 if (CUR == ':') { 7237 xmlChar *tmp; 7238 7239 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7240 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 7241 NEXT; 7242 tmp = (xmlChar *) xmlParseName(ctxt); 7243 if (tmp != NULL) { 7244 tmp = xmlBuildQName(tmp, l, NULL, 0); 7245 l = xmlDictLookup(ctxt->dict, tmp, -1); 7246 if (tmp != NULL) xmlFree(tmp); 7247 *prefix = p; 7248 return(l); 7249 } 7250 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 7251 l = xmlDictLookup(ctxt->dict, tmp, -1); 7252 if (tmp != NULL) xmlFree(tmp); 7253 *prefix = p; 7254 return(l); 7255 } 7256 *prefix = p; 7257 } else 7258 *prefix = NULL; 7259 return(l); 7260} 7261 7262/** 7263 * xmlParseQNameAndCompare: 7264 * @ctxt: an XML parser context 7265 * @name: the localname 7266 * @prefix: the prefix, if any. 7267 * 7268 * parse an XML name and compares for match 7269 * (specialized for endtag parsing) 7270 * 7271 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7272 * and the name for mismatch 7273 */ 7274 7275static const xmlChar * 7276xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7277 xmlChar const *prefix) { 7278 const xmlChar *cmp = name; 7279 const xmlChar *in; 7280 const xmlChar *ret; 7281 const xmlChar *prefix2; 7282 7283 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7284 7285 GROW; 7286 in = ctxt->input->cur; 7287 7288 cmp = prefix; 7289 while (*in != 0 && *in == *cmp) { 7290 ++in; 7291 ++cmp; 7292 } 7293 if ((*cmp == 0) && (*in == ':')) { 7294 in++; 7295 cmp = name; 7296 while (*in != 0 && *in == *cmp) { 7297 ++in; 7298 ++cmp; 7299 } 7300 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 7301 /* success */ 7302 ctxt->input->cur = in; 7303 return((const xmlChar*) 1); 7304 } 7305 } 7306 /* 7307 * all strings coms from the dictionary, equality can be done directly 7308 */ 7309 ret = xmlParseQName (ctxt, &prefix2); 7310 if ((ret == name) && (prefix == prefix2)) 7311 return((const xmlChar*) 1); 7312 return ret; 7313} 7314 7315/** 7316 * xmlParseAttValueInternal: 7317 * @ctxt: an XML parser context 7318 * @len: attribute len result 7319 * @alloc: whether the attribute was reallocated as a new string 7320 * @normalize: if 1 then further non-CDATA normalization must be done 7321 * 7322 * parse a value for an attribute. 7323 * NOTE: if no normalization is needed, the routine will return pointers 7324 * directly from the data buffer. 7325 * 7326 * 3.3.3 Attribute-Value Normalization: 7327 * Before the value of an attribute is passed to the application or 7328 * checked for validity, the XML processor must normalize it as follows: 7329 * - a character reference is processed by appending the referenced 7330 * character to the attribute value 7331 * - an entity reference is processed by recursively processing the 7332 * replacement text of the entity 7333 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7334 * appending #x20 to the normalized value, except that only a single 7335 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7336 * parsed entity or the literal entity value of an internal parsed entity 7337 * - other characters are processed by appending them to the normalized value 7338 * If the declared value is not CDATA, then the XML processor must further 7339 * process the normalized attribute value by discarding any leading and 7340 * trailing space (#x20) characters, and by replacing sequences of space 7341 * (#x20) characters by a single space (#x20) character. 7342 * All attributes for which no declaration has been read should be treated 7343 * by a non-validating parser as if declared CDATA. 7344 * 7345 * Returns the AttValue parsed or NULL. The value has to be freed by the 7346 * caller if it was copied, this can be detected by val[*len] == 0. 7347 */ 7348 7349static xmlChar * 7350xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7351 int normalize) 7352{ 7353 xmlChar limit = 0; 7354 const xmlChar *in = NULL, *start, *end, *last; 7355 xmlChar *ret = NULL; 7356 7357 GROW; 7358 in = (xmlChar *) CUR_PTR; 7359 if (*in != '"' && *in != '\'') { 7360 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7361 return (NULL); 7362 } 7363 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7364 7365 /* 7366 * try to handle in this routine the most common case where no 7367 * allocation of a new string is required and where content is 7368 * pure ASCII. 7369 */ 7370 limit = *in++; 7371 end = ctxt->input->end; 7372 start = in; 7373 if (in >= end) { 7374 const xmlChar *oldbase = ctxt->input->base; 7375 GROW; 7376 if (oldbase != ctxt->input->base) { 7377 long delta = ctxt->input->base - oldbase; 7378 start = start + delta; 7379 in = in + delta; 7380 } 7381 end = ctxt->input->end; 7382 } 7383 if (normalize) { 7384 /* 7385 * Skip any leading spaces 7386 */ 7387 while ((in < end) && (*in != limit) && 7388 ((*in == 0x20) || (*in == 0x9) || 7389 (*in == 0xA) || (*in == 0xD))) { 7390 in++; 7391 start = in; 7392 if (in >= end) { 7393 const xmlChar *oldbase = ctxt->input->base; 7394 GROW; 7395 if (oldbase != ctxt->input->base) { 7396 long delta = ctxt->input->base - oldbase; 7397 start = start + delta; 7398 in = in + delta; 7399 } 7400 end = ctxt->input->end; 7401 } 7402 } 7403 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7404 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7405 if ((*in++ == 0x20) && (*in == 0x20)) break; 7406 if (in >= end) { 7407 const xmlChar *oldbase = ctxt->input->base; 7408 GROW; 7409 if (oldbase != ctxt->input->base) { 7410 long delta = ctxt->input->base - oldbase; 7411 start = start + delta; 7412 in = in + delta; 7413 } 7414 end = ctxt->input->end; 7415 } 7416 } 7417 last = in; 7418 /* 7419 * skip the trailing blanks 7420 */ 7421 while ((last[-1] == 0x20) && (last > start)) last--; 7422 while ((in < end) && (*in != limit) && 7423 ((*in == 0x20) || (*in == 0x9) || 7424 (*in == 0xA) || (*in == 0xD))) { 7425 in++; 7426 if (in >= end) { 7427 const xmlChar *oldbase = ctxt->input->base; 7428 GROW; 7429 if (oldbase != ctxt->input->base) { 7430 long delta = ctxt->input->base - oldbase; 7431 start = start + delta; 7432 in = in + delta; 7433 last = last + delta; 7434 } 7435 end = ctxt->input->end; 7436 } 7437 } 7438 if (*in != limit) goto need_complex; 7439 } else { 7440 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7441 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7442 in++; 7443 if (in >= end) { 7444 const xmlChar *oldbase = ctxt->input->base; 7445 GROW; 7446 if (oldbase != ctxt->input->base) { 7447 long delta = ctxt->input->base - oldbase; 7448 start = start + delta; 7449 in = in + delta; 7450 } 7451 end = ctxt->input->end; 7452 } 7453 } 7454 last = in; 7455 if (*in != limit) goto need_complex; 7456 } 7457 in++; 7458 if (len != NULL) { 7459 *len = last - start; 7460 ret = (xmlChar *) start; 7461 } else { 7462 if (alloc) *alloc = 1; 7463 ret = xmlStrndup(start, last - start); 7464 } 7465 CUR_PTR = in; 7466 if (alloc) *alloc = 0; 7467 return ret; 7468need_complex: 7469 if (alloc) *alloc = 1; 7470 return xmlParseAttValueComplex(ctxt, len, normalize); 7471} 7472 7473/** 7474 * xmlParseAttribute2: 7475 * @ctxt: an XML parser context 7476 * @pref: the element prefix 7477 * @elem: the element name 7478 * @prefix: a xmlChar ** used to store the value of the attribute prefix 7479 * @value: a xmlChar ** used to store the value of the attribute 7480 * @len: an int * to save the length of the attribute 7481 * @alloc: an int * to indicate if the attribute was allocated 7482 * 7483 * parse an attribute in the new SAX2 framework. 7484 * 7485 * Returns the attribute name, and the value in *value, . 7486 */ 7487 7488static const xmlChar * 7489xmlParseAttribute2(xmlParserCtxtPtr ctxt, 7490 const xmlChar *pref, const xmlChar *elem, 7491 const xmlChar **prefix, xmlChar **value, 7492 int *len, int *alloc) { 7493 const xmlChar *name; 7494 xmlChar *val; 7495 int normalize = 0; 7496 7497 *value = NULL; 7498 GROW; 7499 name = xmlParseQName(ctxt, prefix); 7500 if (name == NULL) { 7501 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7502 "error parsing attribute name\n"); 7503 return(NULL); 7504 } 7505 7506 /* 7507 * get the type if needed 7508 */ 7509 if (ctxt->attsSpecial != NULL) { 7510 int type; 7511 7512 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 7513 pref, elem, *prefix, name); 7514 if (type != 0) normalize = 1; 7515 } 7516 7517 /* 7518 * read the value 7519 */ 7520 SKIP_BLANKS; 7521 if (RAW == '=') { 7522 NEXT; 7523 SKIP_BLANKS; 7524 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 7525 ctxt->instate = XML_PARSER_CONTENT; 7526 } else { 7527 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7528 "Specification mandate value for attribute %s\n", name); 7529 return(NULL); 7530 } 7531 7532 /* 7533 * Check that xml:lang conforms to the specification 7534 * No more registered as an error, just generate a warning now 7535 * since this was deprecated in XML second edition 7536 */ 7537 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7538 if (!xmlCheckLanguageID(val)) { 7539 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7540 "Malformed value for xml:lang : %s\n", 7541 val, NULL); 7542 } 7543 } 7544 7545 /* 7546 * Check that xml:space conforms to the specification 7547 */ 7548 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7549 if (xmlStrEqual(val, BAD_CAST "default")) 7550 *(ctxt->space) = 0; 7551 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7552 *(ctxt->space) = 1; 7553 else { 7554 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7555"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7556 val); 7557 } 7558 } 7559 7560 *value = val; 7561 return(name); 7562} 7563 7564/** 7565 * xmlParseStartTag2: 7566 * @ctxt: an XML parser context 7567 * 7568 * parse a start of tag either for rule element or 7569 * EmptyElement. In both case we don't parse the tag closing chars. 7570 * This routine is called when running SAX2 parsing 7571 * 7572 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7573 * 7574 * [ WFC: Unique Att Spec ] 7575 * No attribute name may appear more than once in the same start-tag or 7576 * empty-element tag. 7577 * 7578 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7579 * 7580 * [ WFC: Unique Att Spec ] 7581 * No attribute name may appear more than once in the same start-tag or 7582 * empty-element tag. 7583 * 7584 * With namespace: 7585 * 7586 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7587 * 7588 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7589 * 7590 * Returns the element name parsed 7591 */ 7592 7593static const xmlChar * 7594xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 7595 const xmlChar **URI) { 7596 const xmlChar *localname; 7597 const xmlChar *prefix; 7598 const xmlChar *attname; 7599 const xmlChar *aprefix; 7600 const xmlChar *nsname; 7601 xmlChar *attvalue; 7602 const xmlChar **atts = ctxt->atts; 7603 int maxatts = ctxt->maxatts; 7604 int nratts, nbatts, nbdef; 7605 int i, j, nbNs, attval; 7606 const xmlChar *base; 7607 unsigned long cur; 7608 7609 if (RAW != '<') return(NULL); 7610 NEXT1; 7611 7612 /* 7613 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 7614 * point since the attribute values may be stored as pointers to 7615 * the buffer and calling SHRINK would destroy them ! 7616 * The Shrinking is only possible once the full set of attribute 7617 * callbacks have been done. 7618 */ 7619reparse: 7620 SHRINK; 7621 base = ctxt->input->base; 7622 cur = ctxt->input->cur - ctxt->input->base; 7623 nbatts = 0; 7624 nratts = 0; 7625 nbdef = 0; 7626 nbNs = 0; 7627 attval = 0; 7628 7629 localname = xmlParseQName(ctxt, &prefix); 7630 if (localname == NULL) { 7631 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7632 "StartTag: invalid element name\n"); 7633 return(NULL); 7634 } 7635 7636 /* 7637 * Now parse the attributes, it ends up with the ending 7638 * 7639 * (S Attribute)* S? 7640 */ 7641 SKIP_BLANKS; 7642 GROW; 7643 if (ctxt->input->base != base) goto base_changed; 7644 7645 while ((RAW != '>') && 7646 ((RAW != '/') || (NXT(1) != '>')) && 7647 (IS_BYTE_CHAR(RAW))) { 7648 const xmlChar *q = CUR_PTR; 7649 unsigned int cons = ctxt->input->consumed; 7650 int len = -1, alloc = 0; 7651 7652 attname = xmlParseAttribute2(ctxt, prefix, localname, 7653 &aprefix, &attvalue, &len, &alloc); 7654 if ((attname != NULL) && (attvalue != NULL)) { 7655 if (len < 0) len = xmlStrlen(attvalue); 7656 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7657 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7658 xmlURIPtr uri; 7659 7660 if (*URL != 0) { 7661 uri = xmlParseURI((const char *) URL); 7662 if (uri == NULL) { 7663 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7664 "xmlns: %s not a valid URI\n", 7665 URL, NULL); 7666 } else { 7667 if (uri->scheme == NULL) { 7668 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7669 "xmlns: URI %s is not absolute\n", 7670 URL, NULL); 7671 } 7672 xmlFreeURI(uri); 7673 } 7674 } 7675 /* 7676 * check that it's not a defined namespace 7677 */ 7678 for (j = 1;j <= nbNs;j++) 7679 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7680 break; 7681 if (j <= nbNs) 7682 xmlErrAttributeDup(ctxt, NULL, attname); 7683 else 7684 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 7685 if (alloc != 0) xmlFree(attvalue); 7686 SKIP_BLANKS; 7687 continue; 7688 } 7689 if (aprefix == ctxt->str_xmlns) { 7690 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7691 xmlURIPtr uri; 7692 7693 if (attname == ctxt->str_xml) { 7694 if (URL != ctxt->str_xml_ns) { 7695 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 7696 "xml namespace prefix mapped to wrong URI\n", 7697 NULL, NULL, NULL); 7698 } 7699 /* 7700 * Do not keep a namespace definition node 7701 */ 7702 if (alloc != 0) xmlFree(attvalue); 7703 SKIP_BLANKS; 7704 continue; 7705 } 7706 uri = xmlParseURI((const char *) URL); 7707 if (uri == NULL) { 7708 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7709 "xmlns:%s: '%s' is not a valid URI\n", 7710 attname, URL); 7711 } else { 7712 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 7713 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7714 "xmlns:%s: URI %s is not absolute\n", 7715 attname, URL); 7716 } 7717 xmlFreeURI(uri); 7718 } 7719 7720 /* 7721 * check that it's not a defined namespace 7722 */ 7723 for (j = 1;j <= nbNs;j++) 7724 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7725 break; 7726 if (j <= nbNs) 7727 xmlErrAttributeDup(ctxt, aprefix, attname); 7728 else 7729 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 7730 if (alloc != 0) xmlFree(attvalue); 7731 SKIP_BLANKS; 7732 continue; 7733 } 7734 7735 /* 7736 * Add the pair to atts 7737 */ 7738 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7739 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7740 if (attvalue[len] == 0) 7741 xmlFree(attvalue); 7742 goto failed; 7743 } 7744 maxatts = ctxt->maxatts; 7745 atts = ctxt->atts; 7746 } 7747 ctxt->attallocs[nratts++] = alloc; 7748 atts[nbatts++] = attname; 7749 atts[nbatts++] = aprefix; 7750 atts[nbatts++] = NULL; /* the URI will be fetched later */ 7751 atts[nbatts++] = attvalue; 7752 attvalue += len; 7753 atts[nbatts++] = attvalue; 7754 /* 7755 * tag if some deallocation is needed 7756 */ 7757 if (alloc != 0) attval = 1; 7758 } else { 7759 if ((attvalue != NULL) && (attvalue[len] == 0)) 7760 xmlFree(attvalue); 7761 } 7762 7763failed: 7764 7765 GROW 7766 if (ctxt->input->base != base) goto base_changed; 7767 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7768 break; 7769 if (!IS_BLANK_CH(RAW)) { 7770 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7771 "attributes construct error\n"); 7772 } 7773 SKIP_BLANKS; 7774 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7775 (attname == NULL) && (attvalue == NULL)) { 7776 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7777 "xmlParseStartTag: problem parsing attributes\n"); 7778 break; 7779 } 7780 GROW; 7781 if (ctxt->input->base != base) goto base_changed; 7782 } 7783 7784 /* 7785 * The attributes checkings 7786 */ 7787 for (i = 0; i < nbatts;i += 5) { 7788 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 7789 if ((atts[i + 1] != NULL) && (nsname == NULL)) { 7790 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7791 "Namespace prefix %s for %s on %s is not defined\n", 7792 atts[i + 1], atts[i], localname); 7793 } 7794 atts[i + 2] = nsname; 7795 /* 7796 * [ WFC: Unique Att Spec ] 7797 * No attribute name may appear more than once in the same 7798 * start-tag or empty-element tag. 7799 * As extended by the Namespace in XML REC. 7800 */ 7801 for (j = 0; j < i;j += 5) { 7802 if (atts[i] == atts[j]) { 7803 if (atts[i+1] == atts[j+1]) { 7804 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 7805 break; 7806 } 7807 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 7808 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 7809 "Namespaced Attribute %s in '%s' redefined\n", 7810 atts[i], nsname, NULL); 7811 break; 7812 } 7813 } 7814 } 7815 } 7816 7817 /* 7818 * The attributes defaulting 7819 */ 7820 if (ctxt->attsDefault != NULL) { 7821 xmlDefAttrsPtr defaults; 7822 7823 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 7824 if (defaults != NULL) { 7825 for (i = 0;i < defaults->nbAttrs;i++) { 7826 attname = defaults->values[4 * i]; 7827 aprefix = defaults->values[4 * i + 1]; 7828 7829 /* 7830 * special work for namespaces defaulted defs 7831 */ 7832 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7833 /* 7834 * check that it's not a defined namespace 7835 */ 7836 for (j = 1;j <= nbNs;j++) 7837 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7838 break; 7839 if (j <= nbNs) continue; 7840 7841 nsname = xmlGetNamespace(ctxt, NULL); 7842 if (nsname != defaults->values[4 * i + 2]) { 7843 if (nsPush(ctxt, NULL, 7844 defaults->values[4 * i + 2]) > 0) 7845 nbNs++; 7846 } 7847 } else if (aprefix == ctxt->str_xmlns) { 7848 /* 7849 * check that it's not a defined namespace 7850 */ 7851 for (j = 1;j <= nbNs;j++) 7852 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7853 break; 7854 if (j <= nbNs) continue; 7855 7856 nsname = xmlGetNamespace(ctxt, attname); 7857 if (nsname != defaults->values[2]) { 7858 if (nsPush(ctxt, attname, 7859 defaults->values[4 * i + 2]) > 0) 7860 nbNs++; 7861 } 7862 } else { 7863 /* 7864 * check that it's not a defined attribute 7865 */ 7866 for (j = 0;j < nbatts;j+=5) { 7867 if ((attname == atts[j]) && (aprefix == atts[j+1])) 7868 break; 7869 } 7870 if (j < nbatts) continue; 7871 7872 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7873 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7874 return(NULL); 7875 } 7876 maxatts = ctxt->maxatts; 7877 atts = ctxt->atts; 7878 } 7879 atts[nbatts++] = attname; 7880 atts[nbatts++] = aprefix; 7881 if (aprefix == NULL) 7882 atts[nbatts++] = NULL; 7883 else 7884 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 7885 atts[nbatts++] = defaults->values[4 * i + 2]; 7886 atts[nbatts++] = defaults->values[4 * i + 3]; 7887 nbdef++; 7888 } 7889 } 7890 } 7891 } 7892 7893 nsname = xmlGetNamespace(ctxt, prefix); 7894 if ((prefix != NULL) && (nsname == NULL)) { 7895 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7896 "Namespace prefix %s on %s is not defined\n", 7897 prefix, localname, NULL); 7898 } 7899 *pref = prefix; 7900 *URI = nsname; 7901 7902 /* 7903 * SAX: Start of Element ! 7904 */ 7905 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 7906 (!ctxt->disableSAX)) { 7907 if (nbNs > 0) 7908 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7909 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 7910 nbatts / 5, nbdef, atts); 7911 else 7912 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7913 nsname, 0, NULL, nbatts / 5, nbdef, atts); 7914 } 7915 7916 /* 7917 * Free up attribute allocated strings if needed 7918 */ 7919 if (attval != 0) { 7920 for (i = 3,j = 0; j < nratts;i += 5,j++) 7921 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7922 xmlFree((xmlChar *) atts[i]); 7923 } 7924 7925 return(localname); 7926 7927base_changed: 7928 /* 7929 * the attribute strings are valid iif the base didn't changed 7930 */ 7931 if (attval != 0) { 7932 for (i = 3,j = 0; j < nratts;i += 5,j++) 7933 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7934 xmlFree((xmlChar *) atts[i]); 7935 } 7936 ctxt->input->cur = ctxt->input->base + cur; 7937 if (ctxt->wellFormed == 1) { 7938 goto reparse; 7939 } 7940 return(NULL); 7941} 7942 7943/** 7944 * xmlParseEndTag2: 7945 * @ctxt: an XML parser context 7946 * @line: line of the start tag 7947 * @nsNr: number of namespaces on the start tag 7948 * 7949 * parse an end of tag 7950 * 7951 * [42] ETag ::= '</' Name S? '>' 7952 * 7953 * With namespace 7954 * 7955 * [NS 9] ETag ::= '</' QName S? '>' 7956 */ 7957 7958static void 7959xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 7960 const xmlChar *URI, int line, int nsNr) { 7961 const xmlChar *name; 7962 7963 GROW; 7964 if ((RAW != '<') || (NXT(1) != '/')) { 7965 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 7966 return; 7967 } 7968 SKIP(2); 7969 7970 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 7971 7972 /* 7973 * We should definitely be at the ending "S? '>'" part 7974 */ 7975 GROW; 7976 SKIP_BLANKS; 7977 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7978 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7979 } else 7980 NEXT1; 7981 7982 /* 7983 * [ WFC: Element Type Match ] 7984 * The Name in an element's end-tag must match the element type in the 7985 * start-tag. 7986 * 7987 */ 7988 if (name != (xmlChar*)1) { 7989 if (name == NULL) name = BAD_CAST "unparseable"; 7990 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7991 "Opening and ending tag mismatch: %s line %d and %s\n", 7992 ctxt->name, line, name); 7993 } 7994 7995 /* 7996 * SAX: End of Tag 7997 */ 7998 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 7999 (!ctxt->disableSAX)) 8000 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 8001 8002 spacePop(ctxt); 8003 if (nsNr != 0) 8004 nsPop(ctxt, nsNr); 8005 return; 8006} 8007 8008/** 8009 * xmlParseCDSect: 8010 * @ctxt: an XML parser context 8011 * 8012 * Parse escaped pure raw content. 8013 * 8014 * [18] CDSect ::= CDStart CData CDEnd 8015 * 8016 * [19] CDStart ::= '<![CDATA[' 8017 * 8018 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 8019 * 8020 * [21] CDEnd ::= ']]>' 8021 */ 8022void 8023xmlParseCDSect(xmlParserCtxtPtr ctxt) { 8024 xmlChar *buf = NULL; 8025 int len = 0; 8026 int size = XML_PARSER_BUFFER_SIZE; 8027 int r, rl; 8028 int s, sl; 8029 int cur, l; 8030 int count = 0; 8031 8032 /* Check 2.6.0 was NXT(0) not RAW */ 8033 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8034 SKIP(9); 8035 } else 8036 return; 8037 8038 ctxt->instate = XML_PARSER_CDATA_SECTION; 8039 r = CUR_CHAR(rl); 8040 if (!IS_CHAR(r)) { 8041 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8042 ctxt->instate = XML_PARSER_CONTENT; 8043 return; 8044 } 8045 NEXTL(rl); 8046 s = CUR_CHAR(sl); 8047 if (!IS_CHAR(s)) { 8048 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8049 ctxt->instate = XML_PARSER_CONTENT; 8050 return; 8051 } 8052 NEXTL(sl); 8053 cur = CUR_CHAR(l); 8054 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8055 if (buf == NULL) { 8056 xmlErrMemory(ctxt, NULL); 8057 return; 8058 } 8059 while (IS_CHAR(cur) && 8060 ((r != ']') || (s != ']') || (cur != '>'))) { 8061 if (len + 5 >= size) { 8062 size *= 2; 8063 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8064 if (buf == NULL) { 8065 xmlErrMemory(ctxt, NULL); 8066 return; 8067 } 8068 } 8069 COPY_BUF(rl,buf,len,r); 8070 r = s; 8071 rl = sl; 8072 s = cur; 8073 sl = l; 8074 count++; 8075 if (count > 50) { 8076 GROW; 8077 count = 0; 8078 } 8079 NEXTL(l); 8080 cur = CUR_CHAR(l); 8081 } 8082 buf[len] = 0; 8083 ctxt->instate = XML_PARSER_CONTENT; 8084 if (cur != '>') { 8085 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 8086 "CData section not finished\n%.50s\n", buf); 8087 xmlFree(buf); 8088 return; 8089 } 8090 NEXTL(l); 8091 8092 /* 8093 * OK the buffer is to be consumed as cdata. 8094 */ 8095 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8096 if (ctxt->sax->cdataBlock != NULL) 8097 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 8098 else if (ctxt->sax->characters != NULL) 8099 ctxt->sax->characters(ctxt->userData, buf, len); 8100 } 8101 xmlFree(buf); 8102} 8103 8104/** 8105 * xmlParseContent: 8106 * @ctxt: an XML parser context 8107 * 8108 * Parse a content: 8109 * 8110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8111 */ 8112 8113void 8114xmlParseContent(xmlParserCtxtPtr ctxt) { 8115 GROW; 8116 while ((RAW != 0) && 8117 ((RAW != '<') || (NXT(1) != '/'))) { 8118 const xmlChar *test = CUR_PTR; 8119 unsigned int cons = ctxt->input->consumed; 8120 const xmlChar *cur = ctxt->input->cur; 8121 8122 /* 8123 * First case : a Processing Instruction. 8124 */ 8125 if ((*cur == '<') && (cur[1] == '?')) { 8126 xmlParsePI(ctxt); 8127 } 8128 8129 /* 8130 * Second case : a CDSection 8131 */ 8132 /* 2.6.0 test was *cur not RAW */ 8133 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8134 xmlParseCDSect(ctxt); 8135 } 8136 8137 /* 8138 * Third case : a comment 8139 */ 8140 else if ((*cur == '<') && (NXT(1) == '!') && 8141 (NXT(2) == '-') && (NXT(3) == '-')) { 8142 xmlParseComment(ctxt); 8143 ctxt->instate = XML_PARSER_CONTENT; 8144 } 8145 8146 /* 8147 * Fourth case : a sub-element. 8148 */ 8149 else if (*cur == '<') { 8150 xmlParseElement(ctxt); 8151 } 8152 8153 /* 8154 * Fifth case : a reference. If if has not been resolved, 8155 * parsing returns it's Name, create the node 8156 */ 8157 8158 else if (*cur == '&') { 8159 xmlParseReference(ctxt); 8160 } 8161 8162 /* 8163 * Last case, text. Note that References are handled directly. 8164 */ 8165 else { 8166 xmlParseCharData(ctxt, 0); 8167 } 8168 8169 GROW; 8170 /* 8171 * Pop-up of finished entities. 8172 */ 8173 while ((RAW == 0) && (ctxt->inputNr > 1)) 8174 xmlPopInput(ctxt); 8175 SHRINK; 8176 8177 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8178 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8179 "detected an error in element content\n"); 8180 ctxt->instate = XML_PARSER_EOF; 8181 break; 8182 } 8183 } 8184} 8185 8186/** 8187 * xmlParseElement: 8188 * @ctxt: an XML parser context 8189 * 8190 * parse an XML element, this is highly recursive 8191 * 8192 * [39] element ::= EmptyElemTag | STag content ETag 8193 * 8194 * [ WFC: Element Type Match ] 8195 * The Name in an element's end-tag must match the element type in the 8196 * start-tag. 8197 * 8198 */ 8199 8200void 8201xmlParseElement(xmlParserCtxtPtr ctxt) { 8202 const xmlChar *name; 8203 const xmlChar *prefix; 8204 const xmlChar *URI; 8205 xmlParserNodeInfo node_info; 8206 int line; 8207 xmlNodePtr ret; 8208 int nsNr = ctxt->nsNr; 8209 8210 /* Capture start position */ 8211 if (ctxt->record_info) { 8212 node_info.begin_pos = ctxt->input->consumed + 8213 (CUR_PTR - ctxt->input->base); 8214 node_info.begin_line = ctxt->input->line; 8215 } 8216 8217 if (ctxt->spaceNr == 0) 8218 spacePush(ctxt, -1); 8219 else 8220 spacePush(ctxt, *ctxt->space); 8221 8222 line = ctxt->input->line; 8223#ifdef LIBXML_SAX1_ENABLED 8224 if (ctxt->sax2) 8225#endif /* LIBXML_SAX1_ENABLED */ 8226 name = xmlParseStartTag2(ctxt, &prefix, &URI); 8227#ifdef LIBXML_SAX1_ENABLED 8228 else 8229 name = xmlParseStartTag(ctxt); 8230#endif /* LIBXML_SAX1_ENABLED */ 8231 if (name == NULL) { 8232 spacePop(ctxt); 8233 return; 8234 } 8235 namePush(ctxt, name); 8236 ret = ctxt->node; 8237 8238#ifdef LIBXML_VALID_ENABLED 8239 /* 8240 * [ VC: Root Element Type ] 8241 * The Name in the document type declaration must match the element 8242 * type of the root element. 8243 */ 8244 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8245 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8246 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8247#endif /* LIBXML_VALID_ENABLED */ 8248 8249 /* 8250 * Check for an Empty Element. 8251 */ 8252 if ((RAW == '/') && (NXT(1) == '>')) { 8253 SKIP(2); 8254 if (ctxt->sax2) { 8255 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8256 (!ctxt->disableSAX)) 8257 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8258#ifdef LIBXML_SAX1_ENABLED 8259 } else { 8260 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8261 (!ctxt->disableSAX)) 8262 ctxt->sax->endElement(ctxt->userData, name); 8263#endif /* LIBXML_SAX1_ENABLED */ 8264 } 8265 namePop(ctxt); 8266 spacePop(ctxt); 8267 if (nsNr != ctxt->nsNr) 8268 nsPop(ctxt, ctxt->nsNr - nsNr); 8269 if ( ret != NULL && ctxt->record_info ) { 8270 node_info.end_pos = ctxt->input->consumed + 8271 (CUR_PTR - ctxt->input->base); 8272 node_info.end_line = ctxt->input->line; 8273 node_info.node = ret; 8274 xmlParserAddNodeInfo(ctxt, &node_info); 8275 } 8276 return; 8277 } 8278 if (RAW == '>') { 8279 NEXT1; 8280 } else { 8281 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 8282 "Couldn't find end of Start Tag %s line %d\n", 8283 name, line, NULL); 8284 8285 /* 8286 * end of parsing of this node. 8287 */ 8288 nodePop(ctxt); 8289 namePop(ctxt); 8290 spacePop(ctxt); 8291 if (nsNr != ctxt->nsNr) 8292 nsPop(ctxt, ctxt->nsNr - nsNr); 8293 8294 /* 8295 * Capture end position and add node 8296 */ 8297 if ( ret != NULL && ctxt->record_info ) { 8298 node_info.end_pos = ctxt->input->consumed + 8299 (CUR_PTR - ctxt->input->base); 8300 node_info.end_line = ctxt->input->line; 8301 node_info.node = ret; 8302 xmlParserAddNodeInfo(ctxt, &node_info); 8303 } 8304 return; 8305 } 8306 8307 /* 8308 * Parse the content of the element: 8309 */ 8310 xmlParseContent(ctxt); 8311 if (!IS_BYTE_CHAR(RAW)) { 8312 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 8313 "Premature end of data in tag %s line %d\n", 8314 name, line, NULL); 8315 8316 /* 8317 * end of parsing of this node. 8318 */ 8319 nodePop(ctxt); 8320 namePop(ctxt); 8321 spacePop(ctxt); 8322 if (nsNr != ctxt->nsNr) 8323 nsPop(ctxt, ctxt->nsNr - nsNr); 8324 return; 8325 } 8326 8327 /* 8328 * parse the end of tag: '</' should be here. 8329 */ 8330 if (ctxt->sax2) { 8331 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr); 8332 namePop(ctxt); 8333 } 8334#ifdef LIBXML_SAX1_ENABLED 8335 else 8336 xmlParseEndTag1(ctxt, line); 8337#endif /* LIBXML_SAX1_ENABLED */ 8338 8339 /* 8340 * Capture end position and add node 8341 */ 8342 if ( ret != NULL && ctxt->record_info ) { 8343 node_info.end_pos = ctxt->input->consumed + 8344 (CUR_PTR - ctxt->input->base); 8345 node_info.end_line = ctxt->input->line; 8346 node_info.node = ret; 8347 xmlParserAddNodeInfo(ctxt, &node_info); 8348 } 8349} 8350 8351/** 8352 * xmlParseVersionNum: 8353 * @ctxt: an XML parser context 8354 * 8355 * parse the XML version value. 8356 * 8357 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 8358 * 8359 * Returns the string giving the XML version number, or NULL 8360 */ 8361xmlChar * 8362xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 8363 xmlChar *buf = NULL; 8364 int len = 0; 8365 int size = 10; 8366 xmlChar cur; 8367 8368 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8369 if (buf == NULL) { 8370 xmlErrMemory(ctxt, NULL); 8371 return(NULL); 8372 } 8373 cur = CUR; 8374 while (((cur >= 'a') && (cur <= 'z')) || 8375 ((cur >= 'A') && (cur <= 'Z')) || 8376 ((cur >= '0') && (cur <= '9')) || 8377 (cur == '_') || (cur == '.') || 8378 (cur == ':') || (cur == '-')) { 8379 if (len + 1 >= size) { 8380 size *= 2; 8381 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8382 if (buf == NULL) { 8383 xmlErrMemory(ctxt, NULL); 8384 return(NULL); 8385 } 8386 } 8387 buf[len++] = cur; 8388 NEXT; 8389 cur=CUR; 8390 } 8391 buf[len] = 0; 8392 return(buf); 8393} 8394 8395/** 8396 * xmlParseVersionInfo: 8397 * @ctxt: an XML parser context 8398 * 8399 * parse the XML version. 8400 * 8401 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 8402 * 8403 * [25] Eq ::= S? '=' S? 8404 * 8405 * Returns the version string, e.g. "1.0" 8406 */ 8407 8408xmlChar * 8409xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 8410 xmlChar *version = NULL; 8411 8412 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 8413 SKIP(7); 8414 SKIP_BLANKS; 8415 if (RAW != '=') { 8416 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8417 return(NULL); 8418 } 8419 NEXT; 8420 SKIP_BLANKS; 8421 if (RAW == '"') { 8422 NEXT; 8423 version = xmlParseVersionNum(ctxt); 8424 if (RAW != '"') { 8425 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8426 } else 8427 NEXT; 8428 } else if (RAW == '\''){ 8429 NEXT; 8430 version = xmlParseVersionNum(ctxt); 8431 if (RAW != '\'') { 8432 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8433 } else 8434 NEXT; 8435 } else { 8436 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8437 } 8438 } 8439 return(version); 8440} 8441 8442/** 8443 * xmlParseEncName: 8444 * @ctxt: an XML parser context 8445 * 8446 * parse the XML encoding name 8447 * 8448 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 8449 * 8450 * Returns the encoding name value or NULL 8451 */ 8452xmlChar * 8453xmlParseEncName(xmlParserCtxtPtr ctxt) { 8454 xmlChar *buf = NULL; 8455 int len = 0; 8456 int size = 10; 8457 xmlChar cur; 8458 8459 cur = CUR; 8460 if (((cur >= 'a') && (cur <= 'z')) || 8461 ((cur >= 'A') && (cur <= 'Z'))) { 8462 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8463 if (buf == NULL) { 8464 xmlErrMemory(ctxt, NULL); 8465 return(NULL); 8466 } 8467 8468 buf[len++] = cur; 8469 NEXT; 8470 cur = CUR; 8471 while (((cur >= 'a') && (cur <= 'z')) || 8472 ((cur >= 'A') && (cur <= 'Z')) || 8473 ((cur >= '0') && (cur <= '9')) || 8474 (cur == '.') || (cur == '_') || 8475 (cur == '-')) { 8476 if (len + 1 >= size) { 8477 size *= 2; 8478 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8479 if (buf == NULL) { 8480 xmlErrMemory(ctxt, NULL); 8481 return(NULL); 8482 } 8483 } 8484 buf[len++] = cur; 8485 NEXT; 8486 cur = CUR; 8487 if (cur == 0) { 8488 SHRINK; 8489 GROW; 8490 cur = CUR; 8491 } 8492 } 8493 buf[len] = 0; 8494 } else { 8495 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 8496 } 8497 return(buf); 8498} 8499 8500/** 8501 * xmlParseEncodingDecl: 8502 * @ctxt: an XML parser context 8503 * 8504 * parse the XML encoding declaration 8505 * 8506 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 8507 * 8508 * this setups the conversion filters. 8509 * 8510 * Returns the encoding value or NULL 8511 */ 8512 8513const xmlChar * 8514xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 8515 xmlChar *encoding = NULL; 8516 8517 SKIP_BLANKS; 8518 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 8519 SKIP(8); 8520 SKIP_BLANKS; 8521 if (RAW != '=') { 8522 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8523 return(NULL); 8524 } 8525 NEXT; 8526 SKIP_BLANKS; 8527 if (RAW == '"') { 8528 NEXT; 8529 encoding = xmlParseEncName(ctxt); 8530 if (RAW != '"') { 8531 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8532 } else 8533 NEXT; 8534 } else if (RAW == '\''){ 8535 NEXT; 8536 encoding = xmlParseEncName(ctxt); 8537 if (RAW != '\'') { 8538 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8539 } else 8540 NEXT; 8541 } else { 8542 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8543 } 8544 /* 8545 * UTF-16 encoding stwich has already taken place at this stage, 8546 * more over the little-endian/big-endian selection is already done 8547 */ 8548 if ((encoding != NULL) && 8549 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 8550 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 8551 if (ctxt->encoding != NULL) 8552 xmlFree((xmlChar *) ctxt->encoding); 8553 ctxt->encoding = encoding; 8554 } 8555 /* 8556 * UTF-8 encoding is handled natively 8557 */ 8558 else if ((encoding != NULL) && 8559 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 8560 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 8561 if (ctxt->encoding != NULL) 8562 xmlFree((xmlChar *) ctxt->encoding); 8563 ctxt->encoding = encoding; 8564 } 8565 else if (encoding != NULL) { 8566 xmlCharEncodingHandlerPtr handler; 8567 8568 if (ctxt->input->encoding != NULL) 8569 xmlFree((xmlChar *) ctxt->input->encoding); 8570 ctxt->input->encoding = encoding; 8571 8572 handler = xmlFindCharEncodingHandler((const char *) encoding); 8573 if (handler != NULL) { 8574 xmlSwitchToEncoding(ctxt, handler); 8575 } else { 8576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 8577 "Unsupported encoding %s\n", encoding); 8578 return(NULL); 8579 } 8580 } 8581 } 8582 return(encoding); 8583} 8584 8585/** 8586 * xmlParseSDDecl: 8587 * @ctxt: an XML parser context 8588 * 8589 * parse the XML standalone declaration 8590 * 8591 * [32] SDDecl ::= S 'standalone' Eq 8592 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 8593 * 8594 * [ VC: Standalone Document Declaration ] 8595 * TODO The standalone document declaration must have the value "no" 8596 * if any external markup declarations contain declarations of: 8597 * - attributes with default values, if elements to which these 8598 * attributes apply appear in the document without specifications 8599 * of values for these attributes, or 8600 * - entities (other than amp, lt, gt, apos, quot), if references 8601 * to those entities appear in the document, or 8602 * - attributes with values subject to normalization, where the 8603 * attribute appears in the document with a value which will change 8604 * as a result of normalization, or 8605 * - element types with element content, if white space occurs directly 8606 * within any instance of those types. 8607 * 8608 * Returns 1 if standalone, 0 otherwise 8609 */ 8610 8611int 8612xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 8613 int standalone = -1; 8614 8615 SKIP_BLANKS; 8616 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 8617 SKIP(10); 8618 SKIP_BLANKS; 8619 if (RAW != '=') { 8620 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8621 return(standalone); 8622 } 8623 NEXT; 8624 SKIP_BLANKS; 8625 if (RAW == '\''){ 8626 NEXT; 8627 if ((RAW == 'n') && (NXT(1) == 'o')) { 8628 standalone = 0; 8629 SKIP(2); 8630 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8631 (NXT(2) == 's')) { 8632 standalone = 1; 8633 SKIP(3); 8634 } else { 8635 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8636 } 8637 if (RAW != '\'') { 8638 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8639 } else 8640 NEXT; 8641 } else if (RAW == '"'){ 8642 NEXT; 8643 if ((RAW == 'n') && (NXT(1) == 'o')) { 8644 standalone = 0; 8645 SKIP(2); 8646 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8647 (NXT(2) == 's')) { 8648 standalone = 1; 8649 SKIP(3); 8650 } else { 8651 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8652 } 8653 if (RAW != '"') { 8654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8655 } else 8656 NEXT; 8657 } else { 8658 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8659 } 8660 } 8661 return(standalone); 8662} 8663 8664/** 8665 * xmlParseXMLDecl: 8666 * @ctxt: an XML parser context 8667 * 8668 * parse an XML declaration header 8669 * 8670 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 8671 */ 8672 8673void 8674xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 8675 xmlChar *version; 8676 8677 /* 8678 * We know that '<?xml' is here. 8679 */ 8680 SKIP(5); 8681 8682 if (!IS_BLANK_CH(RAW)) { 8683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8684 "Blank needed after '<?xml'\n"); 8685 } 8686 SKIP_BLANKS; 8687 8688 /* 8689 * We must have the VersionInfo here. 8690 */ 8691 version = xmlParseVersionInfo(ctxt); 8692 if (version == NULL) { 8693 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 8694 } else { 8695 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 8696 /* 8697 * TODO: Blueberry should be detected here 8698 */ 8699 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 8700 "Unsupported version '%s'\n", 8701 version, NULL); 8702 } 8703 if (ctxt->version != NULL) 8704 xmlFree((void *) ctxt->version); 8705 ctxt->version = version; 8706 } 8707 8708 /* 8709 * We may have the encoding declaration 8710 */ 8711 if (!IS_BLANK_CH(RAW)) { 8712 if ((RAW == '?') && (NXT(1) == '>')) { 8713 SKIP(2); 8714 return; 8715 } 8716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8717 } 8718 xmlParseEncodingDecl(ctxt); 8719 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8720 /* 8721 * The XML REC instructs us to stop parsing right here 8722 */ 8723 return; 8724 } 8725 8726 /* 8727 * We may have the standalone status. 8728 */ 8729 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 8730 if ((RAW == '?') && (NXT(1) == '>')) { 8731 SKIP(2); 8732 return; 8733 } 8734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8735 } 8736 SKIP_BLANKS; 8737 ctxt->input->standalone = xmlParseSDDecl(ctxt); 8738 8739 SKIP_BLANKS; 8740 if ((RAW == '?') && (NXT(1) == '>')) { 8741 SKIP(2); 8742 } else if (RAW == '>') { 8743 /* Deprecated old WD ... */ 8744 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8745 NEXT; 8746 } else { 8747 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8748 MOVETO_ENDTAG(CUR_PTR); 8749 NEXT; 8750 } 8751} 8752 8753/** 8754 * xmlParseMisc: 8755 * @ctxt: an XML parser context 8756 * 8757 * parse an XML Misc* optional field. 8758 * 8759 * [27] Misc ::= Comment | PI | S 8760 */ 8761 8762void 8763xmlParseMisc(xmlParserCtxtPtr ctxt) { 8764 while (((RAW == '<') && (NXT(1) == '?')) || 8765 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 8766 IS_BLANK_CH(CUR)) { 8767 if ((RAW == '<') && (NXT(1) == '?')) { 8768 xmlParsePI(ctxt); 8769 } else if (IS_BLANK_CH(CUR)) { 8770 NEXT; 8771 } else 8772 xmlParseComment(ctxt); 8773 } 8774} 8775 8776/** 8777 * xmlParseDocument: 8778 * @ctxt: an XML parser context 8779 * 8780 * parse an XML document (and build a tree if using the standard SAX 8781 * interface). 8782 * 8783 * [1] document ::= prolog element Misc* 8784 * 8785 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 8786 * 8787 * Returns 0, -1 in case of error. the parser context is augmented 8788 * as a result of the parsing. 8789 */ 8790 8791int 8792xmlParseDocument(xmlParserCtxtPtr ctxt) { 8793 xmlChar start[4]; 8794 xmlCharEncoding enc; 8795 8796 xmlInitParser(); 8797 8798 GROW; 8799 8800 /* 8801 * SAX: detecting the level. 8802 */ 8803 xmlDetectSAX2(ctxt); 8804 8805 /* 8806 * SAX: beginning of the document processing. 8807 */ 8808 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8809 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8810 8811 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 8812 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 8813 /* 8814 * Get the 4 first bytes and decode the charset 8815 * if enc != XML_CHAR_ENCODING_NONE 8816 * plug some encoding conversion routines. 8817 */ 8818 start[0] = RAW; 8819 start[1] = NXT(1); 8820 start[2] = NXT(2); 8821 start[3] = NXT(3); 8822 enc = xmlDetectCharEncoding(&start[0], 4); 8823 if (enc != XML_CHAR_ENCODING_NONE) { 8824 xmlSwitchEncoding(ctxt, enc); 8825 } 8826 } 8827 8828 8829 if (CUR == 0) { 8830 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8831 } 8832 8833 /* 8834 * Check for the XMLDecl in the Prolog. 8835 */ 8836 GROW; 8837 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8838 8839 /* 8840 * Note that we will switch encoding on the fly. 8841 */ 8842 xmlParseXMLDecl(ctxt); 8843 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8844 /* 8845 * The XML REC instructs us to stop parsing right here 8846 */ 8847 return(-1); 8848 } 8849 ctxt->standalone = ctxt->input->standalone; 8850 SKIP_BLANKS; 8851 } else { 8852 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8853 } 8854 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8855 ctxt->sax->startDocument(ctxt->userData); 8856 8857 /* 8858 * The Misc part of the Prolog 8859 */ 8860 GROW; 8861 xmlParseMisc(ctxt); 8862 8863 /* 8864 * Then possibly doc type declaration(s) and more Misc 8865 * (doctypedecl Misc*)? 8866 */ 8867 GROW; 8868 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 8869 8870 ctxt->inSubset = 1; 8871 xmlParseDocTypeDecl(ctxt); 8872 if (RAW == '[') { 8873 ctxt->instate = XML_PARSER_DTD; 8874 xmlParseInternalSubset(ctxt); 8875 } 8876 8877 /* 8878 * Create and update the external subset. 8879 */ 8880 ctxt->inSubset = 2; 8881 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 8882 (!ctxt->disableSAX)) 8883 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8884 ctxt->extSubSystem, ctxt->extSubURI); 8885 ctxt->inSubset = 0; 8886 8887 8888 ctxt->instate = XML_PARSER_PROLOG; 8889 xmlParseMisc(ctxt); 8890 } 8891 8892 /* 8893 * Time to start parsing the tree itself 8894 */ 8895 GROW; 8896 if (RAW != '<') { 8897 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 8898 "Start tag expected, '<' not found\n"); 8899 } else { 8900 ctxt->instate = XML_PARSER_CONTENT; 8901 xmlParseElement(ctxt); 8902 ctxt->instate = XML_PARSER_EPILOG; 8903 8904 8905 /* 8906 * The Misc part at the end 8907 */ 8908 xmlParseMisc(ctxt); 8909 8910 if (RAW != 0) { 8911 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 8912 } 8913 ctxt->instate = XML_PARSER_EOF; 8914 } 8915 8916 /* 8917 * SAX: end of the document processing. 8918 */ 8919 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8920 ctxt->sax->endDocument(ctxt->userData); 8921 8922 /* 8923 * Remove locally kept entity definitions if the tree was not built 8924 */ 8925 if ((ctxt->myDoc != NULL) && 8926 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 8927 xmlFreeDoc(ctxt->myDoc); 8928 ctxt->myDoc = NULL; 8929 } 8930 8931 if (! ctxt->wellFormed) { 8932 ctxt->valid = 0; 8933 return(-1); 8934 } 8935 return(0); 8936} 8937 8938/** 8939 * xmlParseExtParsedEnt: 8940 * @ctxt: an XML parser context 8941 * 8942 * parse a general parsed entity 8943 * An external general parsed entity is well-formed if it matches the 8944 * production labeled extParsedEnt. 8945 * 8946 * [78] extParsedEnt ::= TextDecl? content 8947 * 8948 * Returns 0, -1 in case of error. the parser context is augmented 8949 * as a result of the parsing. 8950 */ 8951 8952int 8953xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 8954 xmlChar start[4]; 8955 xmlCharEncoding enc; 8956 8957 xmlDefaultSAXHandlerInit(); 8958 8959 xmlDetectSAX2(ctxt); 8960 8961 GROW; 8962 8963 /* 8964 * SAX: beginning of the document processing. 8965 */ 8966 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8967 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8968 8969 /* 8970 * Get the 4 first bytes and decode the charset 8971 * if enc != XML_CHAR_ENCODING_NONE 8972 * plug some encoding conversion routines. 8973 */ 8974 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 8975 start[0] = RAW; 8976 start[1] = NXT(1); 8977 start[2] = NXT(2); 8978 start[3] = NXT(3); 8979 enc = xmlDetectCharEncoding(start, 4); 8980 if (enc != XML_CHAR_ENCODING_NONE) { 8981 xmlSwitchEncoding(ctxt, enc); 8982 } 8983 } 8984 8985 8986 if (CUR == 0) { 8987 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8988 } 8989 8990 /* 8991 * Check for the XMLDecl in the Prolog. 8992 */ 8993 GROW; 8994 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8995 8996 /* 8997 * Note that we will switch encoding on the fly. 8998 */ 8999 xmlParseXMLDecl(ctxt); 9000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9001 /* 9002 * The XML REC instructs us to stop parsing right here 9003 */ 9004 return(-1); 9005 } 9006 SKIP_BLANKS; 9007 } else { 9008 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9009 } 9010 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9011 ctxt->sax->startDocument(ctxt->userData); 9012 9013 /* 9014 * Doing validity checking on chunk doesn't make sense 9015 */ 9016 ctxt->instate = XML_PARSER_CONTENT; 9017 ctxt->validate = 0; 9018 ctxt->loadsubset = 0; 9019 ctxt->depth = 0; 9020 9021 xmlParseContent(ctxt); 9022 9023 if ((RAW == '<') && (NXT(1) == '/')) { 9024 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 9025 } else if (RAW != 0) { 9026 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 9027 } 9028 9029 /* 9030 * SAX: end of the document processing. 9031 */ 9032 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9033 ctxt->sax->endDocument(ctxt->userData); 9034 9035 if (! ctxt->wellFormed) return(-1); 9036 return(0); 9037} 9038 9039#ifdef LIBXML_PUSH_ENABLED 9040/************************************************************************ 9041 * * 9042 * Progressive parsing interfaces * 9043 * * 9044 ************************************************************************/ 9045 9046/** 9047 * xmlParseLookupSequence: 9048 * @ctxt: an XML parser context 9049 * @first: the first char to lookup 9050 * @next: the next char to lookup or zero 9051 * @third: the next char to lookup or zero 9052 * 9053 * Try to find if a sequence (first, next, third) or just (first next) or 9054 * (first) is available in the input stream. 9055 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 9056 * to avoid rescanning sequences of bytes, it DOES change the state of the 9057 * parser, do not use liberally. 9058 * 9059 * Returns the index to the current parsing point if the full sequence 9060 * is available, -1 otherwise. 9061 */ 9062static int 9063xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 9064 xmlChar next, xmlChar third) { 9065 int base, len; 9066 xmlParserInputPtr in; 9067 const xmlChar *buf; 9068 9069 in = ctxt->input; 9070 if (in == NULL) return(-1); 9071 base = in->cur - in->base; 9072 if (base < 0) return(-1); 9073 if (ctxt->checkIndex > base) 9074 base = ctxt->checkIndex; 9075 if (in->buf == NULL) { 9076 buf = in->base; 9077 len = in->length; 9078 } else { 9079 buf = in->buf->buffer->content; 9080 len = in->buf->buffer->use; 9081 } 9082 /* take into account the sequence length */ 9083 if (third) len -= 2; 9084 else if (next) len --; 9085 for (;base < len;base++) { 9086 if (buf[base] == first) { 9087 if (third != 0) { 9088 if ((buf[base + 1] != next) || 9089 (buf[base + 2] != third)) continue; 9090 } else if (next != 0) { 9091 if (buf[base + 1] != next) continue; 9092 } 9093 ctxt->checkIndex = 0; 9094#ifdef DEBUG_PUSH 9095 if (next == 0) 9096 xmlGenericError(xmlGenericErrorContext, 9097 "PP: lookup '%c' found at %d\n", 9098 first, base); 9099 else if (third == 0) 9100 xmlGenericError(xmlGenericErrorContext, 9101 "PP: lookup '%c%c' found at %d\n", 9102 first, next, base); 9103 else 9104 xmlGenericError(xmlGenericErrorContext, 9105 "PP: lookup '%c%c%c' found at %d\n", 9106 first, next, third, base); 9107#endif 9108 return(base - (in->cur - in->base)); 9109 } 9110 } 9111 ctxt->checkIndex = base; 9112#ifdef DEBUG_PUSH 9113 if (next == 0) 9114 xmlGenericError(xmlGenericErrorContext, 9115 "PP: lookup '%c' failed\n", first); 9116 else if (third == 0) 9117 xmlGenericError(xmlGenericErrorContext, 9118 "PP: lookup '%c%c' failed\n", first, next); 9119 else 9120 xmlGenericError(xmlGenericErrorContext, 9121 "PP: lookup '%c%c%c' failed\n", first, next, third); 9122#endif 9123 return(-1); 9124} 9125 9126/** 9127 * xmlParseGetLasts: 9128 * @ctxt: an XML parser context 9129 * @lastlt: pointer to store the last '<' from the input 9130 * @lastgt: pointer to store the last '>' from the input 9131 * 9132 * Lookup the last < and > in the current chunk 9133 */ 9134static void 9135xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 9136 const xmlChar **lastgt) { 9137 const xmlChar *tmp; 9138 9139 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 9140 xmlGenericError(xmlGenericErrorContext, 9141 "Internal error: xmlParseGetLasts\n"); 9142 return; 9143 } 9144 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) { 9145 tmp = ctxt->input->end; 9146 tmp--; 9147 while ((tmp >= ctxt->input->base) && (*tmp != '<') && 9148 (*tmp != '>')) tmp--; 9149 if (tmp < ctxt->input->base) { 9150 *lastlt = NULL; 9151 *lastgt = NULL; 9152 } else if (*tmp == '<') { 9153 *lastlt = tmp; 9154 tmp--; 9155 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 9156 if (tmp < ctxt->input->base) 9157 *lastgt = NULL; 9158 else 9159 *lastgt = tmp; 9160 } else { 9161 *lastgt = tmp; 9162 tmp--; 9163 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 9164 if (tmp < ctxt->input->base) 9165 *lastlt = NULL; 9166 else 9167 *lastlt = tmp; 9168 } 9169 9170 } else { 9171 *lastlt = NULL; 9172 *lastgt = NULL; 9173 } 9174} 9175/** 9176 * xmlParseTryOrFinish: 9177 * @ctxt: an XML parser context 9178 * @terminate: last chunk indicator 9179 * 9180 * Try to progress on parsing 9181 * 9182 * Returns zero if no parsing was possible 9183 */ 9184static int 9185xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 9186 int ret = 0; 9187 int avail; 9188 xmlChar cur, next; 9189 const xmlChar *lastlt, *lastgt; 9190 9191#ifdef DEBUG_PUSH 9192 switch (ctxt->instate) { 9193 case XML_PARSER_EOF: 9194 xmlGenericError(xmlGenericErrorContext, 9195 "PP: try EOF\n"); break; 9196 case XML_PARSER_START: 9197 xmlGenericError(xmlGenericErrorContext, 9198 "PP: try START\n"); break; 9199 case XML_PARSER_MISC: 9200 xmlGenericError(xmlGenericErrorContext, 9201 "PP: try MISC\n");break; 9202 case XML_PARSER_COMMENT: 9203 xmlGenericError(xmlGenericErrorContext, 9204 "PP: try COMMENT\n");break; 9205 case XML_PARSER_PROLOG: 9206 xmlGenericError(xmlGenericErrorContext, 9207 "PP: try PROLOG\n");break; 9208 case XML_PARSER_START_TAG: 9209 xmlGenericError(xmlGenericErrorContext, 9210 "PP: try START_TAG\n");break; 9211 case XML_PARSER_CONTENT: 9212 xmlGenericError(xmlGenericErrorContext, 9213 "PP: try CONTENT\n");break; 9214 case XML_PARSER_CDATA_SECTION: 9215 xmlGenericError(xmlGenericErrorContext, 9216 "PP: try CDATA_SECTION\n");break; 9217 case XML_PARSER_END_TAG: 9218 xmlGenericError(xmlGenericErrorContext, 9219 "PP: try END_TAG\n");break; 9220 case XML_PARSER_ENTITY_DECL: 9221 xmlGenericError(xmlGenericErrorContext, 9222 "PP: try ENTITY_DECL\n");break; 9223 case XML_PARSER_ENTITY_VALUE: 9224 xmlGenericError(xmlGenericErrorContext, 9225 "PP: try ENTITY_VALUE\n");break; 9226 case XML_PARSER_ATTRIBUTE_VALUE: 9227 xmlGenericError(xmlGenericErrorContext, 9228 "PP: try ATTRIBUTE_VALUE\n");break; 9229 case XML_PARSER_DTD: 9230 xmlGenericError(xmlGenericErrorContext, 9231 "PP: try DTD\n");break; 9232 case XML_PARSER_EPILOG: 9233 xmlGenericError(xmlGenericErrorContext, 9234 "PP: try EPILOG\n");break; 9235 case XML_PARSER_PI: 9236 xmlGenericError(xmlGenericErrorContext, 9237 "PP: try PI\n");break; 9238 case XML_PARSER_IGNORE: 9239 xmlGenericError(xmlGenericErrorContext, 9240 "PP: try IGNORE\n");break; 9241 } 9242#endif 9243 9244 if (ctxt->input->cur - ctxt->input->base > 4096) { 9245 xmlSHRINK(ctxt); 9246 ctxt->checkIndex = 0; 9247 } 9248 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9249 9250 while (1) { 9251 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9252 return(0); 9253 9254 9255 /* 9256 * Pop-up of finished entities. 9257 */ 9258 while ((RAW == 0) && (ctxt->inputNr > 1)) 9259 xmlPopInput(ctxt); 9260 9261 if (ctxt->input ==NULL) break; 9262 if (ctxt->input->buf == NULL) 9263 avail = ctxt->input->length - 9264 (ctxt->input->cur - ctxt->input->base); 9265 else { 9266 /* 9267 * If we are operating on converted input, try to flush 9268 * remainng chars to avoid them stalling in the non-converted 9269 * buffer. 9270 */ 9271 if ((ctxt->input->buf->raw != NULL) && 9272 (ctxt->input->buf->raw->use > 0)) { 9273 int base = ctxt->input->base - 9274 ctxt->input->buf->buffer->content; 9275 int current = ctxt->input->cur - ctxt->input->base; 9276 9277 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 9278 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9279 ctxt->input->cur = ctxt->input->base + current; 9280 ctxt->input->end = 9281 &ctxt->input->buf->buffer->content[ 9282 ctxt->input->buf->buffer->use]; 9283 } 9284 avail = ctxt->input->buf->buffer->use - 9285 (ctxt->input->cur - ctxt->input->base); 9286 } 9287 if (avail < 1) 9288 goto done; 9289 switch (ctxt->instate) { 9290 case XML_PARSER_EOF: 9291 /* 9292 * Document parsing is done ! 9293 */ 9294 goto done; 9295 case XML_PARSER_START: 9296 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 9297 xmlChar start[4]; 9298 xmlCharEncoding enc; 9299 9300 /* 9301 * Very first chars read from the document flow. 9302 */ 9303 if (avail < 4) 9304 goto done; 9305 9306 /* 9307 * Get the 4 first bytes and decode the charset 9308 * if enc != XML_CHAR_ENCODING_NONE 9309 * plug some encoding conversion routines. 9310 */ 9311 start[0] = RAW; 9312 start[1] = NXT(1); 9313 start[2] = NXT(2); 9314 start[3] = NXT(3); 9315 enc = xmlDetectCharEncoding(start, 4); 9316 if (enc != XML_CHAR_ENCODING_NONE) { 9317 xmlSwitchEncoding(ctxt, enc); 9318 } 9319 break; 9320 } 9321 9322 if (avail < 2) 9323 goto done; 9324 cur = ctxt->input->cur[0]; 9325 next = ctxt->input->cur[1]; 9326 if (cur == 0) { 9327 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9328 ctxt->sax->setDocumentLocator(ctxt->userData, 9329 &xmlDefaultSAXLocator); 9330 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9331 ctxt->instate = XML_PARSER_EOF; 9332#ifdef DEBUG_PUSH 9333 xmlGenericError(xmlGenericErrorContext, 9334 "PP: entering EOF\n"); 9335#endif 9336 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9337 ctxt->sax->endDocument(ctxt->userData); 9338 goto done; 9339 } 9340 if ((cur == '<') && (next == '?')) { 9341 /* PI or XML decl */ 9342 if (avail < 5) return(ret); 9343 if ((!terminate) && 9344 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9345 return(ret); 9346 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9347 ctxt->sax->setDocumentLocator(ctxt->userData, 9348 &xmlDefaultSAXLocator); 9349 if ((ctxt->input->cur[2] == 'x') && 9350 (ctxt->input->cur[3] == 'm') && 9351 (ctxt->input->cur[4] == 'l') && 9352 (IS_BLANK_CH(ctxt->input->cur[5]))) { 9353 ret += 5; 9354#ifdef DEBUG_PUSH 9355 xmlGenericError(xmlGenericErrorContext, 9356 "PP: Parsing XML Decl\n"); 9357#endif 9358 xmlParseXMLDecl(ctxt); 9359 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9360 /* 9361 * The XML REC instructs us to stop parsing right 9362 * here 9363 */ 9364 ctxt->instate = XML_PARSER_EOF; 9365 return(0); 9366 } 9367 ctxt->standalone = ctxt->input->standalone; 9368 if ((ctxt->encoding == NULL) && 9369 (ctxt->input->encoding != NULL)) 9370 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 9371 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9372 (!ctxt->disableSAX)) 9373 ctxt->sax->startDocument(ctxt->userData); 9374 ctxt->instate = XML_PARSER_MISC; 9375#ifdef DEBUG_PUSH 9376 xmlGenericError(xmlGenericErrorContext, 9377 "PP: entering MISC\n"); 9378#endif 9379 } else { 9380 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9381 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9382 (!ctxt->disableSAX)) 9383 ctxt->sax->startDocument(ctxt->userData); 9384 ctxt->instate = XML_PARSER_MISC; 9385#ifdef DEBUG_PUSH 9386 xmlGenericError(xmlGenericErrorContext, 9387 "PP: entering MISC\n"); 9388#endif 9389 } 9390 } else { 9391 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9392 ctxt->sax->setDocumentLocator(ctxt->userData, 9393 &xmlDefaultSAXLocator); 9394 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9395 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9396 (!ctxt->disableSAX)) 9397 ctxt->sax->startDocument(ctxt->userData); 9398 ctxt->instate = XML_PARSER_MISC; 9399#ifdef DEBUG_PUSH 9400 xmlGenericError(xmlGenericErrorContext, 9401 "PP: entering MISC\n"); 9402#endif 9403 } 9404 break; 9405 case XML_PARSER_START_TAG: { 9406 const xmlChar *name; 9407 const xmlChar *prefix; 9408 const xmlChar *URI; 9409 int nsNr = ctxt->nsNr; 9410 9411 if ((avail < 2) && (ctxt->inputNr == 1)) 9412 goto done; 9413 cur = ctxt->input->cur[0]; 9414 if (cur != '<') { 9415 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9416 ctxt->instate = XML_PARSER_EOF; 9417 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9418 ctxt->sax->endDocument(ctxt->userData); 9419 goto done; 9420 } 9421 if (!terminate) { 9422 if (ctxt->progressive) { 9423 if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) 9424 goto done; 9425 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9426 goto done; 9427 } 9428 } 9429 if (ctxt->spaceNr == 0) 9430 spacePush(ctxt, -1); 9431 else 9432 spacePush(ctxt, *ctxt->space); 9433#ifdef LIBXML_SAX1_ENABLED 9434 if (ctxt->sax2) 9435#endif /* LIBXML_SAX1_ENABLED */ 9436 name = xmlParseStartTag2(ctxt, &prefix, &URI); 9437#ifdef LIBXML_SAX1_ENABLED 9438 else 9439 name = xmlParseStartTag(ctxt); 9440#endif /* LIBXML_SAX1_ENABLED */ 9441 if (name == NULL) { 9442 spacePop(ctxt); 9443 ctxt->instate = XML_PARSER_EOF; 9444 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9445 ctxt->sax->endDocument(ctxt->userData); 9446 goto done; 9447 } 9448#ifdef LIBXML_VALID_ENABLED 9449 /* 9450 * [ VC: Root Element Type ] 9451 * The Name in the document type declaration must match 9452 * the element type of the root element. 9453 */ 9454 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9455 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9456 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9457#endif /* LIBXML_VALID_ENABLED */ 9458 9459 /* 9460 * Check for an Empty Element. 9461 */ 9462 if ((RAW == '/') && (NXT(1) == '>')) { 9463 SKIP(2); 9464 9465 if (ctxt->sax2) { 9466 if ((ctxt->sax != NULL) && 9467 (ctxt->sax->endElementNs != NULL) && 9468 (!ctxt->disableSAX)) 9469 ctxt->sax->endElementNs(ctxt->userData, name, 9470 prefix, URI); 9471#ifdef LIBXML_SAX1_ENABLED 9472 } else { 9473 if ((ctxt->sax != NULL) && 9474 (ctxt->sax->endElement != NULL) && 9475 (!ctxt->disableSAX)) 9476 ctxt->sax->endElement(ctxt->userData, name); 9477#endif /* LIBXML_SAX1_ENABLED */ 9478 } 9479 spacePop(ctxt); 9480 if (ctxt->nameNr == 0) { 9481 ctxt->instate = XML_PARSER_EPILOG; 9482 } else { 9483 ctxt->instate = XML_PARSER_CONTENT; 9484 } 9485 break; 9486 } 9487 if (RAW == '>') { 9488 NEXT; 9489 } else { 9490 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 9491 "Couldn't find end of Start Tag %s\n", 9492 name); 9493 nodePop(ctxt); 9494 spacePop(ctxt); 9495 } 9496 if (ctxt->sax2) 9497 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9498#ifdef LIBXML_SAX1_ENABLED 9499 else 9500 namePush(ctxt, name); 9501#endif /* LIBXML_SAX1_ENABLED */ 9502 9503 ctxt->instate = XML_PARSER_CONTENT; 9504 break; 9505 } 9506 case XML_PARSER_CONTENT: { 9507 const xmlChar *test; 9508 unsigned int cons; 9509 if ((avail < 2) && (ctxt->inputNr == 1)) 9510 goto done; 9511 cur = ctxt->input->cur[0]; 9512 next = ctxt->input->cur[1]; 9513 9514 test = CUR_PTR; 9515 cons = ctxt->input->consumed; 9516 if ((cur == '<') && (next == '/')) { 9517 ctxt->instate = XML_PARSER_END_TAG; 9518 break; 9519 } else if ((cur == '<') && (next == '?')) { 9520 if ((!terminate) && 9521 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9522 goto done; 9523 xmlParsePI(ctxt); 9524 } else if ((cur == '<') && (next != '!')) { 9525 ctxt->instate = XML_PARSER_START_TAG; 9526 break; 9527 } else if ((cur == '<') && (next == '!') && 9528 (ctxt->input->cur[2] == '-') && 9529 (ctxt->input->cur[3] == '-')) { 9530 if ((!terminate) && 9531 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9532 goto done; 9533 xmlParseComment(ctxt); 9534 ctxt->instate = XML_PARSER_CONTENT; 9535 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 9536 (ctxt->input->cur[2] == '[') && 9537 (ctxt->input->cur[3] == 'C') && 9538 (ctxt->input->cur[4] == 'D') && 9539 (ctxt->input->cur[5] == 'A') && 9540 (ctxt->input->cur[6] == 'T') && 9541 (ctxt->input->cur[7] == 'A') && 9542 (ctxt->input->cur[8] == '[')) { 9543 SKIP(9); 9544 ctxt->instate = XML_PARSER_CDATA_SECTION; 9545 break; 9546 } else if ((cur == '<') && (next == '!') && 9547 (avail < 9)) { 9548 goto done; 9549 } else if (cur == '&') { 9550 if ((!terminate) && 9551 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 9552 goto done; 9553 xmlParseReference(ctxt); 9554 } else { 9555 /* TODO Avoid the extra copy, handle directly !!! */ 9556 /* 9557 * Goal of the following test is: 9558 * - minimize calls to the SAX 'character' callback 9559 * when they are mergeable 9560 * - handle an problem for isBlank when we only parse 9561 * a sequence of blank chars and the next one is 9562 * not available to check against '<' presence. 9563 * - tries to homogenize the differences in SAX 9564 * callbacks between the push and pull versions 9565 * of the parser. 9566 */ 9567 if ((ctxt->inputNr == 1) && 9568 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 9569 if (!terminate) { 9570 if (ctxt->progressive) { 9571 if ((lastlt == NULL) || 9572 (ctxt->input->cur > lastlt)) 9573 goto done; 9574 } else if (xmlParseLookupSequence(ctxt, 9575 '<', 0, 0) < 0) { 9576 goto done; 9577 } 9578 } 9579 } 9580 ctxt->checkIndex = 0; 9581 xmlParseCharData(ctxt, 0); 9582 } 9583 /* 9584 * Pop-up of finished entities. 9585 */ 9586 while ((RAW == 0) && (ctxt->inputNr > 1)) 9587 xmlPopInput(ctxt); 9588 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9589 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9590 "detected an error in element content\n"); 9591 ctxt->instate = XML_PARSER_EOF; 9592 break; 9593 } 9594 break; 9595 } 9596 case XML_PARSER_END_TAG: 9597 if (avail < 2) 9598 goto done; 9599 if (!terminate) { 9600 if (ctxt->progressive) { 9601 if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) 9602 goto done; 9603 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9604 goto done; 9605 } 9606 } 9607 if (ctxt->sax2) { 9608 xmlParseEndTag2(ctxt, 9609 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 9610 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 9611 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]); 9612 nameNsPop(ctxt); 9613 } 9614#ifdef LIBXML_SAX1_ENABLED 9615 else 9616 xmlParseEndTag1(ctxt, 0); 9617#endif /* LIBXML_SAX1_ENABLED */ 9618 if (ctxt->nameNr == 0) { 9619 ctxt->instate = XML_PARSER_EPILOG; 9620 } else { 9621 ctxt->instate = XML_PARSER_CONTENT; 9622 } 9623 break; 9624 case XML_PARSER_CDATA_SECTION: { 9625 /* 9626 * The Push mode need to have the SAX callback for 9627 * cdataBlock merge back contiguous callbacks. 9628 */ 9629 int base; 9630 9631 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 9632 if (base < 0) { 9633 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 9634 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9635 if (ctxt->sax->cdataBlock != NULL) 9636 ctxt->sax->cdataBlock(ctxt->userData, 9637 ctxt->input->cur, 9638 XML_PARSER_BIG_BUFFER_SIZE); 9639 else if (ctxt->sax->characters != NULL) 9640 ctxt->sax->characters(ctxt->userData, 9641 ctxt->input->cur, 9642 XML_PARSER_BIG_BUFFER_SIZE); 9643 } 9644 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 9645 ctxt->checkIndex = 0; 9646 } 9647 goto done; 9648 } else { 9649 if ((ctxt->sax != NULL) && (base > 0) && 9650 (!ctxt->disableSAX)) { 9651 if (ctxt->sax->cdataBlock != NULL) 9652 ctxt->sax->cdataBlock(ctxt->userData, 9653 ctxt->input->cur, base); 9654 else if (ctxt->sax->characters != NULL) 9655 ctxt->sax->characters(ctxt->userData, 9656 ctxt->input->cur, base); 9657 } 9658 SKIP(base + 3); 9659 ctxt->checkIndex = 0; 9660 ctxt->instate = XML_PARSER_CONTENT; 9661#ifdef DEBUG_PUSH 9662 xmlGenericError(xmlGenericErrorContext, 9663 "PP: entering CONTENT\n"); 9664#endif 9665 } 9666 break; 9667 } 9668 case XML_PARSER_MISC: 9669 SKIP_BLANKS; 9670 if (ctxt->input->buf == NULL) 9671 avail = ctxt->input->length - 9672 (ctxt->input->cur - ctxt->input->base); 9673 else 9674 avail = ctxt->input->buf->buffer->use - 9675 (ctxt->input->cur - ctxt->input->base); 9676 if (avail < 2) 9677 goto done; 9678 cur = ctxt->input->cur[0]; 9679 next = ctxt->input->cur[1]; 9680 if ((cur == '<') && (next == '?')) { 9681 if ((!terminate) && 9682 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9683 goto done; 9684#ifdef DEBUG_PUSH 9685 xmlGenericError(xmlGenericErrorContext, 9686 "PP: Parsing PI\n"); 9687#endif 9688 xmlParsePI(ctxt); 9689 } else if ((cur == '<') && (next == '!') && 9690 (ctxt->input->cur[2] == '-') && 9691 (ctxt->input->cur[3] == '-')) { 9692 if ((!terminate) && 9693 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9694 goto done; 9695#ifdef DEBUG_PUSH 9696 xmlGenericError(xmlGenericErrorContext, 9697 "PP: Parsing Comment\n"); 9698#endif 9699 xmlParseComment(ctxt); 9700 ctxt->instate = XML_PARSER_MISC; 9701 } else if ((cur == '<') && (next == '!') && 9702 (ctxt->input->cur[2] == 'D') && 9703 (ctxt->input->cur[3] == 'O') && 9704 (ctxt->input->cur[4] == 'C') && 9705 (ctxt->input->cur[5] == 'T') && 9706 (ctxt->input->cur[6] == 'Y') && 9707 (ctxt->input->cur[7] == 'P') && 9708 (ctxt->input->cur[8] == 'E')) { 9709 if ((!terminate) && 9710 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 9711 goto done; 9712#ifdef DEBUG_PUSH 9713 xmlGenericError(xmlGenericErrorContext, 9714 "PP: Parsing internal subset\n"); 9715#endif 9716 ctxt->inSubset = 1; 9717 xmlParseDocTypeDecl(ctxt); 9718 if (RAW == '[') { 9719 ctxt->instate = XML_PARSER_DTD; 9720#ifdef DEBUG_PUSH 9721 xmlGenericError(xmlGenericErrorContext, 9722 "PP: entering DTD\n"); 9723#endif 9724 } else { 9725 /* 9726 * Create and update the external subset. 9727 */ 9728 ctxt->inSubset = 2; 9729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9730 (ctxt->sax->externalSubset != NULL)) 9731 ctxt->sax->externalSubset(ctxt->userData, 9732 ctxt->intSubName, ctxt->extSubSystem, 9733 ctxt->extSubURI); 9734 ctxt->inSubset = 0; 9735 ctxt->instate = XML_PARSER_PROLOG; 9736#ifdef DEBUG_PUSH 9737 xmlGenericError(xmlGenericErrorContext, 9738 "PP: entering PROLOG\n"); 9739#endif 9740 } 9741 } else if ((cur == '<') && (next == '!') && 9742 (avail < 9)) { 9743 goto done; 9744 } else { 9745 ctxt->instate = XML_PARSER_START_TAG; 9746 ctxt->progressive = 1; 9747 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9748#ifdef DEBUG_PUSH 9749 xmlGenericError(xmlGenericErrorContext, 9750 "PP: entering START_TAG\n"); 9751#endif 9752 } 9753 break; 9754 case XML_PARSER_PROLOG: 9755 SKIP_BLANKS; 9756 if (ctxt->input->buf == NULL) 9757 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9758 else 9759 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9760 if (avail < 2) 9761 goto done; 9762 cur = ctxt->input->cur[0]; 9763 next = ctxt->input->cur[1]; 9764 if ((cur == '<') && (next == '?')) { 9765 if ((!terminate) && 9766 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9767 goto done; 9768#ifdef DEBUG_PUSH 9769 xmlGenericError(xmlGenericErrorContext, 9770 "PP: Parsing PI\n"); 9771#endif 9772 xmlParsePI(ctxt); 9773 } else if ((cur == '<') && (next == '!') && 9774 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9775 if ((!terminate) && 9776 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9777 goto done; 9778#ifdef DEBUG_PUSH 9779 xmlGenericError(xmlGenericErrorContext, 9780 "PP: Parsing Comment\n"); 9781#endif 9782 xmlParseComment(ctxt); 9783 ctxt->instate = XML_PARSER_PROLOG; 9784 } else if ((cur == '<') && (next == '!') && 9785 (avail < 4)) { 9786 goto done; 9787 } else { 9788 ctxt->instate = XML_PARSER_START_TAG; 9789 ctxt->progressive = 1; 9790 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9791#ifdef DEBUG_PUSH 9792 xmlGenericError(xmlGenericErrorContext, 9793 "PP: entering START_TAG\n"); 9794#endif 9795 } 9796 break; 9797 case XML_PARSER_EPILOG: 9798 SKIP_BLANKS; 9799 if (ctxt->input->buf == NULL) 9800 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9801 else 9802 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9803 if (avail < 2) 9804 goto done; 9805 cur = ctxt->input->cur[0]; 9806 next = ctxt->input->cur[1]; 9807 if ((cur == '<') && (next == '?')) { 9808 if ((!terminate) && 9809 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9810 goto done; 9811#ifdef DEBUG_PUSH 9812 xmlGenericError(xmlGenericErrorContext, 9813 "PP: Parsing PI\n"); 9814#endif 9815 xmlParsePI(ctxt); 9816 ctxt->instate = XML_PARSER_EPILOG; 9817 } else if ((cur == '<') && (next == '!') && 9818 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9819 if ((!terminate) && 9820 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9821 goto done; 9822#ifdef DEBUG_PUSH 9823 xmlGenericError(xmlGenericErrorContext, 9824 "PP: Parsing Comment\n"); 9825#endif 9826 xmlParseComment(ctxt); 9827 ctxt->instate = XML_PARSER_EPILOG; 9828 } else if ((cur == '<') && (next == '!') && 9829 (avail < 4)) { 9830 goto done; 9831 } else { 9832 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9833 ctxt->instate = XML_PARSER_EOF; 9834#ifdef DEBUG_PUSH 9835 xmlGenericError(xmlGenericErrorContext, 9836 "PP: entering EOF\n"); 9837#endif 9838 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9839 ctxt->sax->endDocument(ctxt->userData); 9840 goto done; 9841 } 9842 break; 9843 case XML_PARSER_DTD: { 9844 /* 9845 * Sorry but progressive parsing of the internal subset 9846 * is not expected to be supported. We first check that 9847 * the full content of the internal subset is available and 9848 * the parsing is launched only at that point. 9849 * Internal subset ends up with "']' S? '>'" in an unescaped 9850 * section and not in a ']]>' sequence which are conditional 9851 * sections (whoever argued to keep that crap in XML deserve 9852 * a place in hell !). 9853 */ 9854 int base, i; 9855 xmlChar *buf; 9856 xmlChar quote = 0; 9857 9858 base = ctxt->input->cur - ctxt->input->base; 9859 if (base < 0) return(0); 9860 if (ctxt->checkIndex > base) 9861 base = ctxt->checkIndex; 9862 buf = ctxt->input->buf->buffer->content; 9863 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 9864 base++) { 9865 if (quote != 0) { 9866 if (buf[base] == quote) 9867 quote = 0; 9868 continue; 9869 } 9870 if (buf[base] == '"') { 9871 quote = '"'; 9872 continue; 9873 } 9874 if (buf[base] == '\'') { 9875 quote = '\''; 9876 continue; 9877 } 9878 if (buf[base] == ']') { 9879 if ((unsigned int) base +1 >= 9880 ctxt->input->buf->buffer->use) 9881 break; 9882 if (buf[base + 1] == ']') { 9883 /* conditional crap, skip both ']' ! */ 9884 base++; 9885 continue; 9886 } 9887 for (i = 0; 9888 (unsigned int) base + i < ctxt->input->buf->buffer->use; 9889 i++) { 9890 if (buf[base + i] == '>') 9891 goto found_end_int_subset; 9892 } 9893 break; 9894 } 9895 } 9896 /* 9897 * We didn't found the end of the Internal subset 9898 */ 9899 if (quote == 0) 9900 ctxt->checkIndex = base; 9901#ifdef DEBUG_PUSH 9902 if (next == 0) 9903 xmlGenericError(xmlGenericErrorContext, 9904 "PP: lookup of int subset end filed\n"); 9905#endif 9906 goto done; 9907 9908found_end_int_subset: 9909 xmlParseInternalSubset(ctxt); 9910 ctxt->inSubset = 2; 9911 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9912 (ctxt->sax->externalSubset != NULL)) 9913 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9914 ctxt->extSubSystem, ctxt->extSubURI); 9915 ctxt->inSubset = 0; 9916 ctxt->instate = XML_PARSER_PROLOG; 9917 ctxt->checkIndex = 0; 9918#ifdef DEBUG_PUSH 9919 xmlGenericError(xmlGenericErrorContext, 9920 "PP: entering PROLOG\n"); 9921#endif 9922 break; 9923 } 9924 case XML_PARSER_COMMENT: 9925 xmlGenericError(xmlGenericErrorContext, 9926 "PP: internal error, state == COMMENT\n"); 9927 ctxt->instate = XML_PARSER_CONTENT; 9928#ifdef DEBUG_PUSH 9929 xmlGenericError(xmlGenericErrorContext, 9930 "PP: entering CONTENT\n"); 9931#endif 9932 break; 9933 case XML_PARSER_IGNORE: 9934 xmlGenericError(xmlGenericErrorContext, 9935 "PP: internal error, state == IGNORE"); 9936 ctxt->instate = XML_PARSER_DTD; 9937#ifdef DEBUG_PUSH 9938 xmlGenericError(xmlGenericErrorContext, 9939 "PP: entering DTD\n"); 9940#endif 9941 break; 9942 case XML_PARSER_PI: 9943 xmlGenericError(xmlGenericErrorContext, 9944 "PP: internal error, state == PI\n"); 9945 ctxt->instate = XML_PARSER_CONTENT; 9946#ifdef DEBUG_PUSH 9947 xmlGenericError(xmlGenericErrorContext, 9948 "PP: entering CONTENT\n"); 9949#endif 9950 break; 9951 case XML_PARSER_ENTITY_DECL: 9952 xmlGenericError(xmlGenericErrorContext, 9953 "PP: internal error, state == ENTITY_DECL\n"); 9954 ctxt->instate = XML_PARSER_DTD; 9955#ifdef DEBUG_PUSH 9956 xmlGenericError(xmlGenericErrorContext, 9957 "PP: entering DTD\n"); 9958#endif 9959 break; 9960 case XML_PARSER_ENTITY_VALUE: 9961 xmlGenericError(xmlGenericErrorContext, 9962 "PP: internal error, state == ENTITY_VALUE\n"); 9963 ctxt->instate = XML_PARSER_CONTENT; 9964#ifdef DEBUG_PUSH 9965 xmlGenericError(xmlGenericErrorContext, 9966 "PP: entering DTD\n"); 9967#endif 9968 break; 9969 case XML_PARSER_ATTRIBUTE_VALUE: 9970 xmlGenericError(xmlGenericErrorContext, 9971 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 9972 ctxt->instate = XML_PARSER_START_TAG; 9973#ifdef DEBUG_PUSH 9974 xmlGenericError(xmlGenericErrorContext, 9975 "PP: entering START_TAG\n"); 9976#endif 9977 break; 9978 case XML_PARSER_SYSTEM_LITERAL: 9979 xmlGenericError(xmlGenericErrorContext, 9980 "PP: internal error, state == SYSTEM_LITERAL\n"); 9981 ctxt->instate = XML_PARSER_START_TAG; 9982#ifdef DEBUG_PUSH 9983 xmlGenericError(xmlGenericErrorContext, 9984 "PP: entering START_TAG\n"); 9985#endif 9986 break; 9987 case XML_PARSER_PUBLIC_LITERAL: 9988 xmlGenericError(xmlGenericErrorContext, 9989 "PP: internal error, state == PUBLIC_LITERAL\n"); 9990 ctxt->instate = XML_PARSER_START_TAG; 9991#ifdef DEBUG_PUSH 9992 xmlGenericError(xmlGenericErrorContext, 9993 "PP: entering START_TAG\n"); 9994#endif 9995 break; 9996 } 9997 } 9998done: 9999#ifdef DEBUG_PUSH 10000 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 10001#endif 10002 return(ret); 10003} 10004 10005/** 10006 * xmlParseChunk: 10007 * @ctxt: an XML parser context 10008 * @chunk: an char array 10009 * @size: the size in byte of the chunk 10010 * @terminate: last chunk indicator 10011 * 10012 * Parse a Chunk of memory 10013 * 10014 * Returns zero if no error, the xmlParserErrors otherwise. 10015 */ 10016int 10017xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 10018 int terminate) { 10019 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10020 return(ctxt->errNo); 10021 if (ctxt->instate == XML_PARSER_START) 10022 xmlDetectSAX2(ctxt); 10023 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10024 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 10025 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10026 int cur = ctxt->input->cur - ctxt->input->base; 10027 10028 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10029 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10030 ctxt->input->cur = ctxt->input->base + cur; 10031 ctxt->input->end = 10032 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10033#ifdef DEBUG_PUSH 10034 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10035#endif 10036 10037 } else if (ctxt->instate != XML_PARSER_EOF) { 10038 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 10039 xmlParserInputBufferPtr in = ctxt->input->buf; 10040 if ((in->encoder != NULL) && (in->buffer != NULL) && 10041 (in->raw != NULL)) { 10042 int nbchars; 10043 10044 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 10045 if (nbchars < 0) { 10046 /* TODO 2.6.0 */ 10047 xmlGenericError(xmlGenericErrorContext, 10048 "xmlParseChunk: encoder error\n"); 10049 return(XML_ERR_INVALID_ENCODING); 10050 } 10051 } 10052 } 10053 } 10054 xmlParseTryOrFinish(ctxt, terminate); 10055 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10056 return(ctxt->errNo); 10057 if (terminate) { 10058 /* 10059 * Check for termination 10060 */ 10061 int avail = 0; 10062 if (ctxt->input->buf == NULL) 10063 avail = ctxt->input->length - 10064 (ctxt->input->cur - ctxt->input->base); 10065 else 10066 avail = ctxt->input->buf->buffer->use - 10067 (ctxt->input->cur - ctxt->input->base); 10068 10069 if ((ctxt->instate != XML_PARSER_EOF) && 10070 (ctxt->instate != XML_PARSER_EPILOG)) { 10071 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10072 } 10073 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 10074 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10075 } 10076 if (ctxt->instate != XML_PARSER_EOF) { 10077 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10078 ctxt->sax->endDocument(ctxt->userData); 10079 } 10080 ctxt->instate = XML_PARSER_EOF; 10081 } 10082 return((xmlParserErrors) ctxt->errNo); 10083} 10084 10085/************************************************************************ 10086 * * 10087 * I/O front end functions to the parser * 10088 * * 10089 ************************************************************************/ 10090 10091/** 10092 * xmlStopParser: 10093 * @ctxt: an XML parser context 10094 * 10095 * Blocks further parser processing 10096 */ 10097void 10098xmlStopParser(xmlParserCtxtPtr ctxt) { 10099 ctxt->instate = XML_PARSER_EOF; 10100 if (ctxt->input != NULL) 10101 ctxt->input->cur = BAD_CAST""; 10102} 10103 10104/** 10105 * xmlCreatePushParserCtxt: 10106 * @sax: a SAX handler 10107 * @user_data: The user data returned on SAX callbacks 10108 * @chunk: a pointer to an array of chars 10109 * @size: number of chars in the array 10110 * @filename: an optional file name or URI 10111 * 10112 * Create a parser context for using the XML parser in push mode. 10113 * If @buffer and @size are non-NULL, the data is used to detect 10114 * the encoding. The remaining characters will be parsed so they 10115 * don't need to be fed in again through xmlParseChunk. 10116 * To allow content encoding detection, @size should be >= 4 10117 * The value of @filename is used for fetching external entities 10118 * and error/warning reports. 10119 * 10120 * Returns the new parser context or NULL 10121 */ 10122 10123xmlParserCtxtPtr 10124xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10125 const char *chunk, int size, const char *filename) { 10126 xmlParserCtxtPtr ctxt; 10127 xmlParserInputPtr inputStream; 10128 xmlParserInputBufferPtr buf; 10129 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 10130 10131 /* 10132 * plug some encoding conversion routines 10133 */ 10134 if ((chunk != NULL) && (size >= 4)) 10135 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 10136 10137 buf = xmlAllocParserInputBuffer(enc); 10138 if (buf == NULL) return(NULL); 10139 10140 ctxt = xmlNewParserCtxt(); 10141 if (ctxt == NULL) { 10142 xmlErrMemory(NULL, "creating parser: out of memory\n"); 10143 xmlFreeParserInputBuffer(buf); 10144 return(NULL); 10145 } 10146 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 10147 if (ctxt->pushTab == NULL) { 10148 xmlErrMemory(ctxt, NULL); 10149 xmlFreeParserInputBuffer(buf); 10150 xmlFreeParserCtxt(ctxt); 10151 return(NULL); 10152 } 10153 if (sax != NULL) { 10154#ifdef LIBXML_SAX1_ENABLED 10155 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 10156#endif /* LIBXML_SAX1_ENABLED */ 10157 xmlFree(ctxt->sax); 10158 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10159 if (ctxt->sax == NULL) { 10160 xmlErrMemory(ctxt, NULL); 10161 xmlFreeParserInputBuffer(buf); 10162 xmlFreeParserCtxt(ctxt); 10163 return(NULL); 10164 } 10165 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10166 if (user_data != NULL) 10167 ctxt->userData = user_data; 10168 } 10169 if (filename == NULL) { 10170 ctxt->directory = NULL; 10171 } else { 10172 ctxt->directory = xmlParserGetDirectory(filename); 10173 } 10174 10175 inputStream = xmlNewInputStream(ctxt); 10176 if (inputStream == NULL) { 10177 xmlFreeParserCtxt(ctxt); 10178 xmlFreeParserInputBuffer(buf); 10179 return(NULL); 10180 } 10181 10182 if (filename == NULL) 10183 inputStream->filename = NULL; 10184 else 10185 inputStream->filename = (char *) 10186 xmlCanonicPath((const xmlChar *) filename); 10187 inputStream->buf = buf; 10188 inputStream->base = inputStream->buf->buffer->content; 10189 inputStream->cur = inputStream->buf->buffer->content; 10190 inputStream->end = 10191 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 10192 10193 inputPush(ctxt, inputStream); 10194 10195 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10196 (ctxt->input->buf != NULL)) { 10197 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10198 int cur = ctxt->input->cur - ctxt->input->base; 10199 10200 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10201 10202 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10203 ctxt->input->cur = ctxt->input->base + cur; 10204 ctxt->input->end = 10205 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10206#ifdef DEBUG_PUSH 10207 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10208#endif 10209 } 10210 10211 if (enc != XML_CHAR_ENCODING_NONE) { 10212 xmlSwitchEncoding(ctxt, enc); 10213 } 10214 10215 return(ctxt); 10216} 10217#endif /* LIBXML_PUSH_ENABLED */ 10218 10219/** 10220 * xmlCreateIOParserCtxt: 10221 * @sax: a SAX handler 10222 * @user_data: The user data returned on SAX callbacks 10223 * @ioread: an I/O read function 10224 * @ioclose: an I/O close function 10225 * @ioctx: an I/O handler 10226 * @enc: the charset encoding if known 10227 * 10228 * Create a parser context for using the XML parser with an existing 10229 * I/O stream 10230 * 10231 * Returns the new parser context or NULL 10232 */ 10233xmlParserCtxtPtr 10234xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10235 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 10236 void *ioctx, xmlCharEncoding enc) { 10237 xmlParserCtxtPtr ctxt; 10238 xmlParserInputPtr inputStream; 10239 xmlParserInputBufferPtr buf; 10240 10241 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 10242 if (buf == NULL) return(NULL); 10243 10244 ctxt = xmlNewParserCtxt(); 10245 if (ctxt == NULL) { 10246 xmlFree(buf); 10247 return(NULL); 10248 } 10249 if (sax != NULL) { 10250#ifdef LIBXML_SAX1_ENABLED 10251 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 10252#endif /* LIBXML_SAX1_ENABLED */ 10253 xmlFree(ctxt->sax); 10254 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10255 if (ctxt->sax == NULL) { 10256 xmlErrMemory(ctxt, NULL); 10257 xmlFree(ctxt); 10258 return(NULL); 10259 } 10260 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10261 if (user_data != NULL) 10262 ctxt->userData = user_data; 10263 } 10264 10265 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 10266 if (inputStream == NULL) { 10267 xmlFreeParserCtxt(ctxt); 10268 return(NULL); 10269 } 10270 inputPush(ctxt, inputStream); 10271 10272 return(ctxt); 10273} 10274 10275#ifdef LIBXML_VALID_ENABLED 10276/************************************************************************ 10277 * * 10278 * Front ends when parsing a DTD * 10279 * * 10280 ************************************************************************/ 10281 10282/** 10283 * xmlIOParseDTD: 10284 * @sax: the SAX handler block or NULL 10285 * @input: an Input Buffer 10286 * @enc: the charset encoding if known 10287 * 10288 * Load and parse a DTD 10289 * 10290 * Returns the resulting xmlDtdPtr or NULL in case of error. 10291 * @input will be freed at parsing end. 10292 */ 10293 10294xmlDtdPtr 10295xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 10296 xmlCharEncoding enc) { 10297 xmlDtdPtr ret = NULL; 10298 xmlParserCtxtPtr ctxt; 10299 xmlParserInputPtr pinput = NULL; 10300 xmlChar start[4]; 10301 10302 if (input == NULL) 10303 return(NULL); 10304 10305 ctxt = xmlNewParserCtxt(); 10306 if (ctxt == NULL) { 10307 return(NULL); 10308 } 10309 10310 /* 10311 * Set-up the SAX context 10312 */ 10313 if (sax != NULL) { 10314 if (ctxt->sax != NULL) 10315 xmlFree(ctxt->sax); 10316 ctxt->sax = sax; 10317 ctxt->userData = NULL; 10318 } 10319 xmlDetectSAX2(ctxt); 10320 10321 /* 10322 * generate a parser input from the I/O handler 10323 */ 10324 10325 pinput = xmlNewIOInputStream(ctxt, input, enc); 10326 if (pinput == NULL) { 10327 if (sax != NULL) ctxt->sax = NULL; 10328 xmlFreeParserCtxt(ctxt); 10329 return(NULL); 10330 } 10331 10332 /* 10333 * plug some encoding conversion routines here. 10334 */ 10335 xmlPushInput(ctxt, pinput); 10336 10337 pinput->filename = NULL; 10338 pinput->line = 1; 10339 pinput->col = 1; 10340 pinput->base = ctxt->input->cur; 10341 pinput->cur = ctxt->input->cur; 10342 pinput->free = NULL; 10343 10344 /* 10345 * let's parse that entity knowing it's an external subset. 10346 */ 10347 ctxt->inSubset = 2; 10348 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10349 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10350 BAD_CAST "none", BAD_CAST "none"); 10351 10352 if ((enc == XML_CHAR_ENCODING_NONE) && 10353 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10354 /* 10355 * Get the 4 first bytes and decode the charset 10356 * if enc != XML_CHAR_ENCODING_NONE 10357 * plug some encoding conversion routines. 10358 */ 10359 start[0] = RAW; 10360 start[1] = NXT(1); 10361 start[2] = NXT(2); 10362 start[3] = NXT(3); 10363 enc = xmlDetectCharEncoding(start, 4); 10364 if (enc != XML_CHAR_ENCODING_NONE) { 10365 xmlSwitchEncoding(ctxt, enc); 10366 } 10367 } 10368 10369 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 10370 10371 if (ctxt->myDoc != NULL) { 10372 if (ctxt->wellFormed) { 10373 ret = ctxt->myDoc->extSubset; 10374 ctxt->myDoc->extSubset = NULL; 10375 if (ret != NULL) { 10376 xmlNodePtr tmp; 10377 10378 ret->doc = NULL; 10379 tmp = ret->children; 10380 while (tmp != NULL) { 10381 tmp->doc = NULL; 10382 tmp = tmp->next; 10383 } 10384 } 10385 } else { 10386 ret = NULL; 10387 } 10388 xmlFreeDoc(ctxt->myDoc); 10389 ctxt->myDoc = NULL; 10390 } 10391 if (sax != NULL) ctxt->sax = NULL; 10392 xmlFreeParserCtxt(ctxt); 10393 10394 return(ret); 10395} 10396 10397/** 10398 * xmlSAXParseDTD: 10399 * @sax: the SAX handler block 10400 * @ExternalID: a NAME* containing the External ID of the DTD 10401 * @SystemID: a NAME* containing the URL to the DTD 10402 * 10403 * Load and parse an external subset. 10404 * 10405 * Returns the resulting xmlDtdPtr or NULL in case of error. 10406 */ 10407 10408xmlDtdPtr 10409xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 10410 const xmlChar *SystemID) { 10411 xmlDtdPtr ret = NULL; 10412 xmlParserCtxtPtr ctxt; 10413 xmlParserInputPtr input = NULL; 10414 xmlCharEncoding enc; 10415 10416 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 10417 10418 ctxt = xmlNewParserCtxt(); 10419 if (ctxt == NULL) { 10420 return(NULL); 10421 } 10422 10423 /* 10424 * Set-up the SAX context 10425 */ 10426 if (sax != NULL) { 10427 if (ctxt->sax != NULL) 10428 xmlFree(ctxt->sax); 10429 ctxt->sax = sax; 10430 ctxt->userData = ctxt; 10431 } 10432 10433 /* 10434 * Ask the Entity resolver to load the damn thing 10435 */ 10436 10437 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 10438 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID); 10439 if (input == NULL) { 10440 if (sax != NULL) ctxt->sax = NULL; 10441 xmlFreeParserCtxt(ctxt); 10442 return(NULL); 10443 } 10444 10445 /* 10446 * plug some encoding conversion routines here. 10447 */ 10448 xmlPushInput(ctxt, input); 10449 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10450 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 10451 xmlSwitchEncoding(ctxt, enc); 10452 } 10453 10454 if (input->filename == NULL) 10455 input->filename = (char *) xmlCanonicPath(SystemID); 10456 input->line = 1; 10457 input->col = 1; 10458 input->base = ctxt->input->cur; 10459 input->cur = ctxt->input->cur; 10460 input->free = NULL; 10461 10462 /* 10463 * let's parse that entity knowing it's an external subset. 10464 */ 10465 ctxt->inSubset = 2; 10466 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10467 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10468 ExternalID, SystemID); 10469 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 10470 10471 if (ctxt->myDoc != NULL) { 10472 if (ctxt->wellFormed) { 10473 ret = ctxt->myDoc->extSubset; 10474 ctxt->myDoc->extSubset = NULL; 10475 if (ret != NULL) { 10476 xmlNodePtr tmp; 10477 10478 ret->doc = NULL; 10479 tmp = ret->children; 10480 while (tmp != NULL) { 10481 tmp->doc = NULL; 10482 tmp = tmp->next; 10483 } 10484 } 10485 } else { 10486 ret = NULL; 10487 } 10488 xmlFreeDoc(ctxt->myDoc); 10489 ctxt->myDoc = NULL; 10490 } 10491 if (sax != NULL) ctxt->sax = NULL; 10492 xmlFreeParserCtxt(ctxt); 10493 10494 return(ret); 10495} 10496 10497 10498/** 10499 * xmlParseDTD: 10500 * @ExternalID: a NAME* containing the External ID of the DTD 10501 * @SystemID: a NAME* containing the URL to the DTD 10502 * 10503 * Load and parse an external subset. 10504 * 10505 * Returns the resulting xmlDtdPtr or NULL in case of error. 10506 */ 10507 10508xmlDtdPtr 10509xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 10510 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 10511} 10512#endif /* LIBXML_VALID_ENABLED */ 10513 10514/************************************************************************ 10515 * * 10516 * Front ends when parsing an Entity * 10517 * * 10518 ************************************************************************/ 10519 10520/** 10521 * xmlParseCtxtExternalEntity: 10522 * @ctx: the existing parsing context 10523 * @URL: the URL for the entity to load 10524 * @ID: the System ID for the entity to load 10525 * @lst: the return value for the set of parsed nodes 10526 * 10527 * Parse an external general entity within an existing parsing context 10528 * An external general parsed entity is well-formed if it matches the 10529 * production labeled extParsedEnt. 10530 * 10531 * [78] extParsedEnt ::= TextDecl? content 10532 * 10533 * Returns 0 if the entity is well formed, -1 in case of args problem and 10534 * the parser error code otherwise 10535 */ 10536 10537int 10538xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 10539 const xmlChar *ID, xmlNodePtr *lst) { 10540 xmlParserCtxtPtr ctxt; 10541 xmlDocPtr newDoc; 10542 xmlSAXHandlerPtr oldsax = NULL; 10543 int ret = 0; 10544 xmlChar start[4]; 10545 xmlCharEncoding enc; 10546 10547 if (ctx->depth > 40) { 10548 return(XML_ERR_ENTITY_LOOP); 10549 } 10550 10551 if (lst != NULL) 10552 *lst = NULL; 10553 if ((URL == NULL) && (ID == NULL)) 10554 return(-1); 10555 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 10556 return(-1); 10557 10558 10559 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10560 if (ctxt == NULL) return(-1); 10561 ctxt->userData = ctxt; 10562 ctxt->_private = ctx->_private; 10563 oldsax = ctxt->sax; 10564 ctxt->sax = ctx->sax; 10565 xmlDetectSAX2(ctxt); 10566 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10567 if (newDoc == NULL) { 10568 xmlFreeParserCtxt(ctxt); 10569 return(-1); 10570 } 10571 if (ctx->myDoc != NULL) { 10572 newDoc->intSubset = ctx->myDoc->intSubset; 10573 newDoc->extSubset = ctx->myDoc->extSubset; 10574 } 10575 if (ctx->myDoc->URL != NULL) { 10576 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 10577 } 10578 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10579 if (newDoc->children == NULL) { 10580 ctxt->sax = oldsax; 10581 xmlFreeParserCtxt(ctxt); 10582 newDoc->intSubset = NULL; 10583 newDoc->extSubset = NULL; 10584 xmlFreeDoc(newDoc); 10585 return(-1); 10586 } 10587 nodePush(ctxt, newDoc->children); 10588 if (ctx->myDoc == NULL) { 10589 ctxt->myDoc = newDoc; 10590 } else { 10591 ctxt->myDoc = ctx->myDoc; 10592 newDoc->children->doc = ctx->myDoc; 10593 } 10594 10595 /* 10596 * Get the 4 first bytes and decode the charset 10597 * if enc != XML_CHAR_ENCODING_NONE 10598 * plug some encoding conversion routines. 10599 */ 10600 GROW 10601 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10602 start[0] = RAW; 10603 start[1] = NXT(1); 10604 start[2] = NXT(2); 10605 start[3] = NXT(3); 10606 enc = xmlDetectCharEncoding(start, 4); 10607 if (enc != XML_CHAR_ENCODING_NONE) { 10608 xmlSwitchEncoding(ctxt, enc); 10609 } 10610 } 10611 10612 /* 10613 * Parse a possible text declaration first 10614 */ 10615 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10616 xmlParseTextDecl(ctxt); 10617 } 10618 10619 /* 10620 * Doing validity checking on chunk doesn't make sense 10621 */ 10622 ctxt->instate = XML_PARSER_CONTENT; 10623 ctxt->validate = ctx->validate; 10624 ctxt->valid = ctx->valid; 10625 ctxt->loadsubset = ctx->loadsubset; 10626 ctxt->depth = ctx->depth + 1; 10627 ctxt->replaceEntities = ctx->replaceEntities; 10628 if (ctxt->validate) { 10629 ctxt->vctxt.error = ctx->vctxt.error; 10630 ctxt->vctxt.warning = ctx->vctxt.warning; 10631 } else { 10632 ctxt->vctxt.error = NULL; 10633 ctxt->vctxt.warning = NULL; 10634 } 10635 ctxt->vctxt.nodeTab = NULL; 10636 ctxt->vctxt.nodeNr = 0; 10637 ctxt->vctxt.nodeMax = 0; 10638 ctxt->vctxt.node = NULL; 10639 10640 xmlParseContent(ctxt); 10641 10642 ctx->validate = ctxt->validate; 10643 ctx->valid = ctxt->valid; 10644 if ((RAW == '<') && (NXT(1) == '/')) { 10645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10646 } else if (RAW != 0) { 10647 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10648 } 10649 if (ctxt->node != newDoc->children) { 10650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10651 } 10652 10653 if (!ctxt->wellFormed) { 10654 if (ctxt->errNo == 0) 10655 ret = 1; 10656 else 10657 ret = ctxt->errNo; 10658 } else { 10659 if (lst != NULL) { 10660 xmlNodePtr cur; 10661 10662 /* 10663 * Return the newly created nodeset after unlinking it from 10664 * they pseudo parent. 10665 */ 10666 cur = newDoc->children->children; 10667 *lst = cur; 10668 while (cur != NULL) { 10669 cur->parent = NULL; 10670 cur = cur->next; 10671 } 10672 newDoc->children->children = NULL; 10673 } 10674 ret = 0; 10675 } 10676 ctxt->sax = oldsax; 10677 xmlFreeParserCtxt(ctxt); 10678 newDoc->intSubset = NULL; 10679 newDoc->extSubset = NULL; 10680 xmlFreeDoc(newDoc); 10681 10682 return(ret); 10683} 10684 10685/** 10686 * xmlParseExternalEntityPrivate: 10687 * @doc: the document the chunk pertains to 10688 * @oldctxt: the previous parser context if available 10689 * @sax: the SAX handler bloc (possibly NULL) 10690 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10691 * @depth: Used for loop detection, use 0 10692 * @URL: the URL for the entity to load 10693 * @ID: the System ID for the entity to load 10694 * @list: the return value for the set of parsed nodes 10695 * 10696 * Private version of xmlParseExternalEntity() 10697 * 10698 * Returns 0 if the entity is well formed, -1 in case of args problem and 10699 * the parser error code otherwise 10700 */ 10701 10702static xmlParserErrors 10703xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 10704 xmlSAXHandlerPtr sax, 10705 void *user_data, int depth, const xmlChar *URL, 10706 const xmlChar *ID, xmlNodePtr *list) { 10707 xmlParserCtxtPtr ctxt; 10708 xmlDocPtr newDoc; 10709 xmlSAXHandlerPtr oldsax = NULL; 10710 xmlParserErrors ret = XML_ERR_OK; 10711 xmlChar start[4]; 10712 xmlCharEncoding enc; 10713 10714 if (depth > 40) { 10715 return(XML_ERR_ENTITY_LOOP); 10716 } 10717 10718 10719 10720 if (list != NULL) 10721 *list = NULL; 10722 if ((URL == NULL) && (ID == NULL)) 10723 return(XML_ERR_INTERNAL_ERROR); 10724 if (doc == NULL) /* @@ relax but check for dereferences */ 10725 return(XML_ERR_INTERNAL_ERROR); 10726 10727 10728 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10729 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10730 ctxt->userData = ctxt; 10731 if (oldctxt != NULL) { 10732 ctxt->_private = oldctxt->_private; 10733 ctxt->loadsubset = oldctxt->loadsubset; 10734 ctxt->validate = oldctxt->validate; 10735 ctxt->external = oldctxt->external; 10736 ctxt->record_info = oldctxt->record_info; 10737 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 10738 ctxt->node_seq.length = oldctxt->node_seq.length; 10739 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 10740 } else { 10741 /* 10742 * Doing validity checking on chunk without context 10743 * doesn't make sense 10744 */ 10745 ctxt->_private = NULL; 10746 ctxt->validate = 0; 10747 ctxt->external = 2; 10748 ctxt->loadsubset = 0; 10749 } 10750 if (sax != NULL) { 10751 oldsax = ctxt->sax; 10752 ctxt->sax = sax; 10753 if (user_data != NULL) 10754 ctxt->userData = user_data; 10755 } 10756 xmlDetectSAX2(ctxt); 10757 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10758 if (newDoc == NULL) { 10759 ctxt->node_seq.maximum = 0; 10760 ctxt->node_seq.length = 0; 10761 ctxt->node_seq.buffer = NULL; 10762 xmlFreeParserCtxt(ctxt); 10763 return(XML_ERR_INTERNAL_ERROR); 10764 } 10765 if (doc != NULL) { 10766 newDoc->intSubset = doc->intSubset; 10767 newDoc->extSubset = doc->extSubset; 10768 } 10769 if (doc->URL != NULL) { 10770 newDoc->URL = xmlStrdup(doc->URL); 10771 } 10772 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10773 if (newDoc->children == NULL) { 10774 if (sax != NULL) 10775 ctxt->sax = oldsax; 10776 ctxt->node_seq.maximum = 0; 10777 ctxt->node_seq.length = 0; 10778 ctxt->node_seq.buffer = NULL; 10779 xmlFreeParserCtxt(ctxt); 10780 newDoc->intSubset = NULL; 10781 newDoc->extSubset = NULL; 10782 xmlFreeDoc(newDoc); 10783 return(XML_ERR_INTERNAL_ERROR); 10784 } 10785 nodePush(ctxt, newDoc->children); 10786 if (doc == NULL) { 10787 ctxt->myDoc = newDoc; 10788 } else { 10789 ctxt->myDoc = doc; 10790 newDoc->children->doc = doc; 10791 } 10792 10793 /* 10794 * Get the 4 first bytes and decode the charset 10795 * if enc != XML_CHAR_ENCODING_NONE 10796 * plug some encoding conversion routines. 10797 */ 10798 GROW; 10799 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10800 start[0] = RAW; 10801 start[1] = NXT(1); 10802 start[2] = NXT(2); 10803 start[3] = NXT(3); 10804 enc = xmlDetectCharEncoding(start, 4); 10805 if (enc != XML_CHAR_ENCODING_NONE) { 10806 xmlSwitchEncoding(ctxt, enc); 10807 } 10808 } 10809 10810 /* 10811 * Parse a possible text declaration first 10812 */ 10813 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10814 xmlParseTextDecl(ctxt); 10815 } 10816 10817 ctxt->instate = XML_PARSER_CONTENT; 10818 ctxt->depth = depth; 10819 10820 xmlParseContent(ctxt); 10821 10822 if ((RAW == '<') && (NXT(1) == '/')) { 10823 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10824 } else if (RAW != 0) { 10825 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10826 } 10827 if (ctxt->node != newDoc->children) { 10828 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10829 } 10830 10831 if (!ctxt->wellFormed) { 10832 if (ctxt->errNo == 0) 10833 ret = XML_ERR_INTERNAL_ERROR; 10834 else 10835 ret = (xmlParserErrors)ctxt->errNo; 10836 } else { 10837 if (list != NULL) { 10838 xmlNodePtr cur; 10839 10840 /* 10841 * Return the newly created nodeset after unlinking it from 10842 * they pseudo parent. 10843 */ 10844 cur = newDoc->children->children; 10845 *list = cur; 10846 while (cur != NULL) { 10847 cur->parent = NULL; 10848 cur = cur->next; 10849 } 10850 newDoc->children->children = NULL; 10851 } 10852 ret = XML_ERR_OK; 10853 } 10854 if (sax != NULL) 10855 ctxt->sax = oldsax; 10856 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 10857 oldctxt->node_seq.length = ctxt->node_seq.length; 10858 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 10859 ctxt->node_seq.maximum = 0; 10860 ctxt->node_seq.length = 0; 10861 ctxt->node_seq.buffer = NULL; 10862 xmlFreeParserCtxt(ctxt); 10863 newDoc->intSubset = NULL; 10864 newDoc->extSubset = NULL; 10865 xmlFreeDoc(newDoc); 10866 10867 return(ret); 10868} 10869 10870#ifdef LIBXML_SAX1_ENABLED 10871/** 10872 * xmlParseExternalEntity: 10873 * @doc: the document the chunk pertains to 10874 * @sax: the SAX handler bloc (possibly NULL) 10875 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10876 * @depth: Used for loop detection, use 0 10877 * @URL: the URL for the entity to load 10878 * @ID: the System ID for the entity to load 10879 * @lst: the return value for the set of parsed nodes 10880 * 10881 * Parse an external general entity 10882 * An external general parsed entity is well-formed if it matches the 10883 * production labeled extParsedEnt. 10884 * 10885 * [78] extParsedEnt ::= TextDecl? content 10886 * 10887 * Returns 0 if the entity is well formed, -1 in case of args problem and 10888 * the parser error code otherwise 10889 */ 10890 10891int 10892xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 10893 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 10894 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 10895 ID, lst)); 10896} 10897 10898/** 10899 * xmlParseBalancedChunkMemory: 10900 * @doc: the document the chunk pertains to 10901 * @sax: the SAX handler bloc (possibly NULL) 10902 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10903 * @depth: Used for loop detection, use 0 10904 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10905 * @lst: the return value for the set of parsed nodes 10906 * 10907 * Parse a well-balanced chunk of an XML document 10908 * called by the parser 10909 * The allowed sequence for the Well Balanced Chunk is the one defined by 10910 * the content production in the XML grammar: 10911 * 10912 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10913 * 10914 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10915 * the parser error code otherwise 10916 */ 10917 10918int 10919xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10920 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 10921 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 10922 depth, string, lst, 0 ); 10923} 10924#endif /* LIBXML_SAX1_ENABLED */ 10925 10926/** 10927 * xmlParseBalancedChunkMemoryInternal: 10928 * @oldctxt: the existing parsing context 10929 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10930 * @user_data: the user data field for the parser context 10931 * @lst: the return value for the set of parsed nodes 10932 * 10933 * 10934 * Parse a well-balanced chunk of an XML document 10935 * called by the parser 10936 * The allowed sequence for the Well Balanced Chunk is the one defined by 10937 * the content production in the XML grammar: 10938 * 10939 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10940 * 10941 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 10942 * error code otherwise 10943 * 10944 * In case recover is set to 1, the nodelist will not be empty even if 10945 * the parsed chunk is not well balanced. 10946 */ 10947static xmlParserErrors 10948xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 10949 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 10950 xmlParserCtxtPtr ctxt; 10951 xmlDocPtr newDoc = NULL; 10952 xmlSAXHandlerPtr oldsax = NULL; 10953 xmlNodePtr content = NULL; 10954 int size; 10955 xmlParserErrors ret = XML_ERR_OK; 10956 10957 if (oldctxt->depth > 40) { 10958 return(XML_ERR_ENTITY_LOOP); 10959 } 10960 10961 10962 if (lst != NULL) 10963 *lst = NULL; 10964 if (string == NULL) 10965 return(XML_ERR_INTERNAL_ERROR); 10966 10967 size = xmlStrlen(string); 10968 10969 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10970 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10971 if (user_data != NULL) 10972 ctxt->userData = user_data; 10973 else 10974 ctxt->userData = ctxt; 10975 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10976 ctxt->dict = oldctxt->dict; 10977 10978 oldsax = ctxt->sax; 10979 ctxt->sax = oldctxt->sax; 10980 xmlDetectSAX2(ctxt); 10981 10982 ctxt->_private = oldctxt->_private; 10983 if (oldctxt->myDoc == NULL) { 10984 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10985 if (newDoc == NULL) { 10986 ctxt->sax = oldsax; 10987 ctxt->dict = NULL; 10988 xmlFreeParserCtxt(ctxt); 10989 return(XML_ERR_INTERNAL_ERROR); 10990 } 10991 ctxt->myDoc = newDoc; 10992 } else { 10993 ctxt->myDoc = oldctxt->myDoc; 10994 content = ctxt->myDoc->children; 10995 } 10996 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL, 10997 BAD_CAST "pseudoroot", NULL); 10998 if (ctxt->myDoc->children == NULL) { 10999 ctxt->sax = oldsax; 11000 ctxt->dict = NULL; 11001 xmlFreeParserCtxt(ctxt); 11002 if (newDoc != NULL) 11003 xmlFreeDoc(newDoc); 11004 return(XML_ERR_INTERNAL_ERROR); 11005 } 11006 nodePush(ctxt, ctxt->myDoc->children); 11007 ctxt->instate = XML_PARSER_CONTENT; 11008 ctxt->depth = oldctxt->depth + 1; 11009 11010 ctxt->validate = 0; 11011 ctxt->loadsubset = oldctxt->loadsubset; 11012 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 11013 /* 11014 * ID/IDREF registration will be done in xmlValidateElement below 11015 */ 11016 ctxt->loadsubset |= XML_SKIP_IDS; 11017 } 11018 ctxt->dictNames = oldctxt->dictNames; 11019 11020 xmlParseContent(ctxt); 11021 if ((RAW == '<') && (NXT(1) == '/')) { 11022 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11023 } else if (RAW != 0) { 11024 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11025 } 11026 if (ctxt->node != ctxt->myDoc->children) { 11027 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11028 } 11029 11030 if (!ctxt->wellFormed) { 11031 if (ctxt->errNo == 0) 11032 ret = XML_ERR_INTERNAL_ERROR; 11033 else 11034 ret = (xmlParserErrors)ctxt->errNo; 11035 } else { 11036 ret = XML_ERR_OK; 11037 } 11038 11039 if ((lst != NULL) && (ret == XML_ERR_OK)) { 11040 xmlNodePtr cur; 11041 11042 /* 11043 * Return the newly created nodeset after unlinking it from 11044 * they pseudo parent. 11045 */ 11046 cur = ctxt->myDoc->children->children; 11047 *lst = cur; 11048 while (cur != NULL) { 11049#ifdef LIBXML_VALID_ENABLED 11050 if (oldctxt->validate && oldctxt->wellFormed && 11051 oldctxt->myDoc && oldctxt->myDoc->intSubset) { 11052 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 11053 oldctxt->myDoc, cur); 11054 } 11055#endif /* LIBXML_VALID_ENABLED */ 11056 cur->parent = NULL; 11057 cur = cur->next; 11058 } 11059 ctxt->myDoc->children->children = NULL; 11060 } 11061 if (ctxt->myDoc != NULL) { 11062 xmlFreeNode(ctxt->myDoc->children); 11063 ctxt->myDoc->children = content; 11064 } 11065 11066 ctxt->sax = oldsax; 11067 ctxt->dict = NULL; 11068 xmlFreeParserCtxt(ctxt); 11069 if (newDoc != NULL) 11070 xmlFreeDoc(newDoc); 11071 11072 return(ret); 11073} 11074 11075#ifdef LIBXML_SAX1_ENABLED 11076/** 11077 * xmlParseBalancedChunkMemoryRecover: 11078 * @doc: the document the chunk pertains to 11079 * @sax: the SAX handler bloc (possibly NULL) 11080 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11081 * @depth: Used for loop detection, use 0 11082 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11083 * @lst: the return value for the set of parsed nodes 11084 * @recover: return nodes even if the data is broken (use 0) 11085 * 11086 * 11087 * Parse a well-balanced chunk of an XML document 11088 * called by the parser 11089 * The allowed sequence for the Well Balanced Chunk is the one defined by 11090 * the content production in the XML grammar: 11091 * 11092 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11093 * 11094 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11095 * the parser error code otherwise 11096 * 11097 * In case recover is set to 1, the nodelist will not be empty even if 11098 * the parsed chunk is not well balanced. 11099 */ 11100int 11101xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11102 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 11103 int recover) { 11104 xmlParserCtxtPtr ctxt; 11105 xmlDocPtr newDoc; 11106 xmlSAXHandlerPtr oldsax = NULL; 11107 xmlNodePtr content; 11108 int size; 11109 int ret = 0; 11110 11111 if (depth > 40) { 11112 return(XML_ERR_ENTITY_LOOP); 11113 } 11114 11115 11116 if (lst != NULL) 11117 *lst = NULL; 11118 if (string == NULL) 11119 return(-1); 11120 11121 size = xmlStrlen(string); 11122 11123 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11124 if (ctxt == NULL) return(-1); 11125 ctxt->userData = ctxt; 11126 if (sax != NULL) { 11127 oldsax = ctxt->sax; 11128 ctxt->sax = sax; 11129 if (user_data != NULL) 11130 ctxt->userData = user_data; 11131 } 11132 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11133 if (newDoc == NULL) { 11134 xmlFreeParserCtxt(ctxt); 11135 return(-1); 11136 } 11137 if (doc != NULL) { 11138 newDoc->intSubset = doc->intSubset; 11139 newDoc->extSubset = doc->extSubset; 11140 } 11141 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11142 if (newDoc->children == NULL) { 11143 if (sax != NULL) 11144 ctxt->sax = oldsax; 11145 xmlFreeParserCtxt(ctxt); 11146 newDoc->intSubset = NULL; 11147 newDoc->extSubset = NULL; 11148 xmlFreeDoc(newDoc); 11149 return(-1); 11150 } 11151 nodePush(ctxt, newDoc->children); 11152 if (doc == NULL) { 11153 ctxt->myDoc = newDoc; 11154 } else { 11155 ctxt->myDoc = newDoc; 11156 newDoc->children->doc = doc; 11157 } 11158 ctxt->instate = XML_PARSER_CONTENT; 11159 ctxt->depth = depth; 11160 11161 /* 11162 * Doing validity checking on chunk doesn't make sense 11163 */ 11164 ctxt->validate = 0; 11165 ctxt->loadsubset = 0; 11166 xmlDetectSAX2(ctxt); 11167 11168 if ( doc != NULL ){ 11169 content = doc->children; 11170 doc->children = NULL; 11171 xmlParseContent(ctxt); 11172 doc->children = content; 11173 } 11174 else { 11175 xmlParseContent(ctxt); 11176 } 11177 if ((RAW == '<') && (NXT(1) == '/')) { 11178 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11179 } else if (RAW != 0) { 11180 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11181 } 11182 if (ctxt->node != newDoc->children) { 11183 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11184 } 11185 11186 if (!ctxt->wellFormed) { 11187 if (ctxt->errNo == 0) 11188 ret = 1; 11189 else 11190 ret = ctxt->errNo; 11191 } else { 11192 ret = 0; 11193 } 11194 11195 if (lst != NULL && (ret == 0 || recover == 1)) { 11196 xmlNodePtr cur; 11197 11198 /* 11199 * Return the newly created nodeset after unlinking it from 11200 * they pseudo parent. 11201 */ 11202 cur = newDoc->children->children; 11203 *lst = cur; 11204 while (cur != NULL) { 11205 cur->parent = NULL; 11206 cur = cur->next; 11207 } 11208 newDoc->children->children = NULL; 11209 } 11210 11211 if (sax != NULL) 11212 ctxt->sax = oldsax; 11213 xmlFreeParserCtxt(ctxt); 11214 newDoc->intSubset = NULL; 11215 newDoc->extSubset = NULL; 11216 xmlFreeDoc(newDoc); 11217 11218 return(ret); 11219} 11220 11221/** 11222 * xmlSAXParseEntity: 11223 * @sax: the SAX handler block 11224 * @filename: the filename 11225 * 11226 * parse an XML external entity out of context and build a tree. 11227 * It use the given SAX function block to handle the parsing callback. 11228 * If sax is NULL, fallback to the default DOM tree building routines. 11229 * 11230 * [78] extParsedEnt ::= TextDecl? content 11231 * 11232 * This correspond to a "Well Balanced" chunk 11233 * 11234 * Returns the resulting document tree 11235 */ 11236 11237xmlDocPtr 11238xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 11239 xmlDocPtr ret; 11240 xmlParserCtxtPtr ctxt; 11241 11242 ctxt = xmlCreateFileParserCtxt(filename); 11243 if (ctxt == NULL) { 11244 return(NULL); 11245 } 11246 if (sax != NULL) { 11247 if (ctxt->sax != NULL) 11248 xmlFree(ctxt->sax); 11249 ctxt->sax = sax; 11250 ctxt->userData = NULL; 11251 } 11252 11253 xmlParseExtParsedEnt(ctxt); 11254 11255 if (ctxt->wellFormed) 11256 ret = ctxt->myDoc; 11257 else { 11258 ret = NULL; 11259 xmlFreeDoc(ctxt->myDoc); 11260 ctxt->myDoc = NULL; 11261 } 11262 if (sax != NULL) 11263 ctxt->sax = NULL; 11264 xmlFreeParserCtxt(ctxt); 11265 11266 return(ret); 11267} 11268 11269/** 11270 * xmlParseEntity: 11271 * @filename: the filename 11272 * 11273 * parse an XML external entity out of context and build a tree. 11274 * 11275 * [78] extParsedEnt ::= TextDecl? content 11276 * 11277 * This correspond to a "Well Balanced" chunk 11278 * 11279 * Returns the resulting document tree 11280 */ 11281 11282xmlDocPtr 11283xmlParseEntity(const char *filename) { 11284 return(xmlSAXParseEntity(NULL, filename)); 11285} 11286#endif /* LIBXML_SAX1_ENABLED */ 11287 11288/** 11289 * xmlCreateEntityParserCtxt: 11290 * @URL: the entity URL 11291 * @ID: the entity PUBLIC ID 11292 * @base: a possible base for the target URI 11293 * 11294 * Create a parser context for an external entity 11295 * Automatic support for ZLIB/Compress compressed document is provided 11296 * by default if found at compile-time. 11297 * 11298 * Returns the new parser context or NULL 11299 */ 11300xmlParserCtxtPtr 11301xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 11302 const xmlChar *base) { 11303 xmlParserCtxtPtr ctxt; 11304 xmlParserInputPtr inputStream; 11305 char *directory = NULL; 11306 xmlChar *uri; 11307 11308 ctxt = xmlNewParserCtxt(); 11309 if (ctxt == NULL) { 11310 return(NULL); 11311 } 11312 11313 uri = xmlBuildURI(URL, base); 11314 11315 if (uri == NULL) { 11316 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11317 if (inputStream == NULL) { 11318 xmlFreeParserCtxt(ctxt); 11319 return(NULL); 11320 } 11321 11322 inputPush(ctxt, inputStream); 11323 11324 if ((ctxt->directory == NULL) && (directory == NULL)) 11325 directory = xmlParserGetDirectory((char *)URL); 11326 if ((ctxt->directory == NULL) && (directory != NULL)) 11327 ctxt->directory = directory; 11328 } else { 11329 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 11330 if (inputStream == NULL) { 11331 xmlFree(uri); 11332 xmlFreeParserCtxt(ctxt); 11333 return(NULL); 11334 } 11335 11336 inputPush(ctxt, inputStream); 11337 11338 if ((ctxt->directory == NULL) && (directory == NULL)) 11339 directory = xmlParserGetDirectory((char *)uri); 11340 if ((ctxt->directory == NULL) && (directory != NULL)) 11341 ctxt->directory = directory; 11342 xmlFree(uri); 11343 } 11344 return(ctxt); 11345} 11346 11347/************************************************************************ 11348 * * 11349 * Front ends when parsing from a file * 11350 * * 11351 ************************************************************************/ 11352 11353/** 11354 * xmlCreateFileParserCtxt: 11355 * @filename: the filename 11356 * 11357 * Create a parser context for a file content. 11358 * Automatic support for ZLIB/Compress compressed document is provided 11359 * by default if found at compile-time. 11360 * 11361 * Returns the new parser context or NULL 11362 */ 11363xmlParserCtxtPtr 11364xmlCreateFileParserCtxt(const char *filename) 11365{ 11366 xmlParserCtxtPtr ctxt; 11367 xmlParserInputPtr inputStream; 11368 char *directory = NULL; 11369 11370 ctxt = xmlNewParserCtxt(); 11371 if (ctxt == NULL) { 11372 xmlErrMemory(NULL, "cannot allocate parser context"); 11373 return(NULL); 11374 } 11375 11376 11377 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 11378 if (inputStream == NULL) { 11379 xmlFreeParserCtxt(ctxt); 11380 return(NULL); 11381 } 11382 11383 inputPush(ctxt, inputStream); 11384 if ((ctxt->directory == NULL) && (directory == NULL)) 11385 directory = xmlParserGetDirectory(filename); 11386 if ((ctxt->directory == NULL) && (directory != NULL)) 11387 ctxt->directory = directory; 11388 11389 return(ctxt); 11390} 11391 11392#ifdef LIBXML_SAX1_ENABLED 11393/** 11394 * xmlSAXParseFileWithData: 11395 * @sax: the SAX handler block 11396 * @filename: the filename 11397 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11398 * documents 11399 * @data: the userdata 11400 * 11401 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11402 * compressed document is provided by default if found at compile-time. 11403 * It use the given SAX function block to handle the parsing callback. 11404 * If sax is NULL, fallback to the default DOM tree building routines. 11405 * 11406 * User data (void *) is stored within the parser context in the 11407 * context's _private member, so it is available nearly everywhere in libxml 11408 * 11409 * Returns the resulting document tree 11410 */ 11411 11412xmlDocPtr 11413xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 11414 int recovery, void *data) { 11415 xmlDocPtr ret; 11416 xmlParserCtxtPtr ctxt; 11417 char *directory = NULL; 11418 11419 xmlInitParser(); 11420 11421 ctxt = xmlCreateFileParserCtxt(filename); 11422 if (ctxt == NULL) { 11423 return(NULL); 11424 } 11425 if (sax != NULL) { 11426 if (ctxt->sax != NULL) 11427 xmlFree(ctxt->sax); 11428 ctxt->sax = sax; 11429 } 11430 xmlDetectSAX2(ctxt); 11431 if (data!=NULL) { 11432 ctxt->_private = data; 11433 } 11434 11435 if ((ctxt->directory == NULL) && (directory == NULL)) 11436 directory = xmlParserGetDirectory(filename); 11437 if ((ctxt->directory == NULL) && (directory != NULL)) 11438 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 11439 11440 ctxt->recovery = recovery; 11441 11442 xmlParseDocument(ctxt); 11443 11444 if ((ctxt->wellFormed) || recovery) { 11445 ret = ctxt->myDoc; 11446 if (ret != NULL) { 11447 if (ctxt->input->buf->compressed > 0) 11448 ret->compression = 9; 11449 else 11450 ret->compression = ctxt->input->buf->compressed; 11451 } 11452 } 11453 else { 11454 ret = NULL; 11455 xmlFreeDoc(ctxt->myDoc); 11456 ctxt->myDoc = NULL; 11457 } 11458 if (sax != NULL) 11459 ctxt->sax = NULL; 11460 xmlFreeParserCtxt(ctxt); 11461 11462 return(ret); 11463} 11464 11465/** 11466 * xmlSAXParseFile: 11467 * @sax: the SAX handler block 11468 * @filename: the filename 11469 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11470 * documents 11471 * 11472 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11473 * compressed document is provided by default if found at compile-time. 11474 * It use the given SAX function block to handle the parsing callback. 11475 * If sax is NULL, fallback to the default DOM tree building routines. 11476 * 11477 * Returns the resulting document tree 11478 */ 11479 11480xmlDocPtr 11481xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 11482 int recovery) { 11483 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 11484} 11485 11486/** 11487 * xmlRecoverDoc: 11488 * @cur: a pointer to an array of xmlChar 11489 * 11490 * parse an XML in-memory document and build a tree. 11491 * In the case the document is not Well Formed, a tree is built anyway 11492 * 11493 * Returns the resulting document tree 11494 */ 11495 11496xmlDocPtr 11497xmlRecoverDoc(xmlChar *cur) { 11498 return(xmlSAXParseDoc(NULL, cur, 1)); 11499} 11500 11501/** 11502 * xmlParseFile: 11503 * @filename: the filename 11504 * 11505 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11506 * compressed document is provided by default if found at compile-time. 11507 * 11508 * Returns the resulting document tree if the file was wellformed, 11509 * NULL otherwise. 11510 */ 11511 11512xmlDocPtr 11513xmlParseFile(const char *filename) { 11514 return(xmlSAXParseFile(NULL, filename, 0)); 11515} 11516 11517/** 11518 * xmlRecoverFile: 11519 * @filename: the filename 11520 * 11521 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11522 * compressed document is provided by default if found at compile-time. 11523 * In the case the document is not Well Formed, a tree is built anyway 11524 * 11525 * Returns the resulting document tree 11526 */ 11527 11528xmlDocPtr 11529xmlRecoverFile(const char *filename) { 11530 return(xmlSAXParseFile(NULL, filename, 1)); 11531} 11532 11533 11534/** 11535 * xmlSetupParserForBuffer: 11536 * @ctxt: an XML parser context 11537 * @buffer: a xmlChar * buffer 11538 * @filename: a file name 11539 * 11540 * Setup the parser context to parse a new buffer; Clears any prior 11541 * contents from the parser context. The buffer parameter must not be 11542 * NULL, but the filename parameter can be 11543 */ 11544void 11545xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 11546 const char* filename) 11547{ 11548 xmlParserInputPtr input; 11549 11550 input = xmlNewInputStream(ctxt); 11551 if (input == NULL) { 11552 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 11553 xmlFree(ctxt); 11554 return; 11555 } 11556 11557 xmlClearParserCtxt(ctxt); 11558 if (filename != NULL) 11559 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 11560 input->base = buffer; 11561 input->cur = buffer; 11562 input->end = &buffer[xmlStrlen(buffer)]; 11563 inputPush(ctxt, input); 11564} 11565 11566/** 11567 * xmlSAXUserParseFile: 11568 * @sax: a SAX handler 11569 * @user_data: The user data returned on SAX callbacks 11570 * @filename: a file name 11571 * 11572 * parse an XML file and call the given SAX handler routines. 11573 * Automatic support for ZLIB/Compress compressed document is provided 11574 * 11575 * Returns 0 in case of success or a error number otherwise 11576 */ 11577int 11578xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 11579 const char *filename) { 11580 int ret = 0; 11581 xmlParserCtxtPtr ctxt; 11582 11583 ctxt = xmlCreateFileParserCtxt(filename); 11584 if (ctxt == NULL) return -1; 11585#ifdef LIBXML_SAX1_ENABLED 11586 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11587#endif /* LIBXML_SAX1_ENABLED */ 11588 xmlFree(ctxt->sax); 11589 ctxt->sax = sax; 11590 xmlDetectSAX2(ctxt); 11591 11592 if (user_data != NULL) 11593 ctxt->userData = user_data; 11594 11595 xmlParseDocument(ctxt); 11596 11597 if (ctxt->wellFormed) 11598 ret = 0; 11599 else { 11600 if (ctxt->errNo != 0) 11601 ret = ctxt->errNo; 11602 else 11603 ret = -1; 11604 } 11605 if (sax != NULL) 11606 ctxt->sax = NULL; 11607 xmlFreeParserCtxt(ctxt); 11608 11609 return ret; 11610} 11611#endif /* LIBXML_SAX1_ENABLED */ 11612 11613/************************************************************************ 11614 * * 11615 * Front ends when parsing from memory * 11616 * * 11617 ************************************************************************/ 11618 11619/** 11620 * xmlCreateMemoryParserCtxt: 11621 * @buffer: a pointer to a char array 11622 * @size: the size of the array 11623 * 11624 * Create a parser context for an XML in-memory document. 11625 * 11626 * Returns the new parser context or NULL 11627 */ 11628xmlParserCtxtPtr 11629xmlCreateMemoryParserCtxt(const char *buffer, int size) { 11630 xmlParserCtxtPtr ctxt; 11631 xmlParserInputPtr input; 11632 xmlParserInputBufferPtr buf; 11633 11634 if (buffer == NULL) 11635 return(NULL); 11636 if (size <= 0) 11637 return(NULL); 11638 11639 ctxt = xmlNewParserCtxt(); 11640 if (ctxt == NULL) 11641 return(NULL); 11642 11643 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 11644 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 11645 if (buf == NULL) { 11646 xmlFreeParserCtxt(ctxt); 11647 return(NULL); 11648 } 11649 11650 input = xmlNewInputStream(ctxt); 11651 if (input == NULL) { 11652 xmlFreeParserInputBuffer(buf); 11653 xmlFreeParserCtxt(ctxt); 11654 return(NULL); 11655 } 11656 11657 input->filename = NULL; 11658 input->buf = buf; 11659 input->base = input->buf->buffer->content; 11660 input->cur = input->buf->buffer->content; 11661 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 11662 11663 inputPush(ctxt, input); 11664 return(ctxt); 11665} 11666 11667#ifdef LIBXML_SAX1_ENABLED 11668/** 11669 * xmlSAXParseMemoryWithData: 11670 * @sax: the SAX handler block 11671 * @buffer: an pointer to a char array 11672 * @size: the size of the array 11673 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11674 * documents 11675 * @data: the userdata 11676 * 11677 * parse an XML in-memory block and use the given SAX function block 11678 * to handle the parsing callback. If sax is NULL, fallback to the default 11679 * DOM tree building routines. 11680 * 11681 * User data (void *) is stored within the parser context in the 11682 * context's _private member, so it is available nearly everywhere in libxml 11683 * 11684 * Returns the resulting document tree 11685 */ 11686 11687xmlDocPtr 11688xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 11689 int size, int recovery, void *data) { 11690 xmlDocPtr ret; 11691 xmlParserCtxtPtr ctxt; 11692 11693 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11694 if (ctxt == NULL) return(NULL); 11695 if (sax != NULL) { 11696 if (ctxt->sax != NULL) 11697 xmlFree(ctxt->sax); 11698 ctxt->sax = sax; 11699 } 11700 xmlDetectSAX2(ctxt); 11701 if (data!=NULL) { 11702 ctxt->_private=data; 11703 } 11704 11705 ctxt->recovery = recovery; 11706 11707 xmlParseDocument(ctxt); 11708 11709 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11710 else { 11711 ret = NULL; 11712 xmlFreeDoc(ctxt->myDoc); 11713 ctxt->myDoc = NULL; 11714 } 11715 if (sax != NULL) 11716 ctxt->sax = NULL; 11717 xmlFreeParserCtxt(ctxt); 11718 11719 return(ret); 11720} 11721 11722/** 11723 * xmlSAXParseMemory: 11724 * @sax: the SAX handler block 11725 * @buffer: an pointer to a char array 11726 * @size: the size of the array 11727 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 11728 * documents 11729 * 11730 * parse an XML in-memory block and use the given SAX function block 11731 * to handle the parsing callback. If sax is NULL, fallback to the default 11732 * DOM tree building routines. 11733 * 11734 * Returns the resulting document tree 11735 */ 11736xmlDocPtr 11737xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 11738 int size, int recovery) { 11739 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 11740} 11741 11742/** 11743 * xmlParseMemory: 11744 * @buffer: an pointer to a char array 11745 * @size: the size of the array 11746 * 11747 * parse an XML in-memory block and build a tree. 11748 * 11749 * Returns the resulting document tree 11750 */ 11751 11752xmlDocPtr xmlParseMemory(const char *buffer, int size) { 11753 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 11754} 11755 11756/** 11757 * xmlRecoverMemory: 11758 * @buffer: an pointer to a char array 11759 * @size: the size of the array 11760 * 11761 * parse an XML in-memory block and build a tree. 11762 * In the case the document is not Well Formed, a tree is built anyway 11763 * 11764 * Returns the resulting document tree 11765 */ 11766 11767xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 11768 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 11769} 11770 11771/** 11772 * xmlSAXUserParseMemory: 11773 * @sax: a SAX handler 11774 * @user_data: The user data returned on SAX callbacks 11775 * @buffer: an in-memory XML document input 11776 * @size: the length of the XML document in bytes 11777 * 11778 * A better SAX parsing routine. 11779 * parse an XML in-memory buffer and call the given SAX handler routines. 11780 * 11781 * Returns 0 in case of success or a error number otherwise 11782 */ 11783int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 11784 const char *buffer, int size) { 11785 int ret = 0; 11786 xmlParserCtxtPtr ctxt; 11787 xmlSAXHandlerPtr oldsax = NULL; 11788 11789 if (sax == NULL) return -1; 11790 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11791 if (ctxt == NULL) return -1; 11792 oldsax = ctxt->sax; 11793 ctxt->sax = sax; 11794 xmlDetectSAX2(ctxt); 11795 if (user_data != NULL) 11796 ctxt->userData = user_data; 11797 11798 xmlParseDocument(ctxt); 11799 11800 if (ctxt->wellFormed) 11801 ret = 0; 11802 else { 11803 if (ctxt->errNo != 0) 11804 ret = ctxt->errNo; 11805 else 11806 ret = -1; 11807 } 11808 ctxt->sax = oldsax; 11809 xmlFreeParserCtxt(ctxt); 11810 11811 return ret; 11812} 11813#endif /* LIBXML_SAX1_ENABLED */ 11814 11815/** 11816 * xmlCreateDocParserCtxt: 11817 * @cur: a pointer to an array of xmlChar 11818 * 11819 * Creates a parser context for an XML in-memory document. 11820 * 11821 * Returns the new parser context or NULL 11822 */ 11823xmlParserCtxtPtr 11824xmlCreateDocParserCtxt(const xmlChar *cur) { 11825 int len; 11826 11827 if (cur == NULL) 11828 return(NULL); 11829 len = xmlStrlen(cur); 11830 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 11831} 11832 11833#ifdef LIBXML_SAX1_ENABLED 11834/** 11835 * xmlSAXParseDoc: 11836 * @sax: the SAX handler block 11837 * @cur: a pointer to an array of xmlChar 11838 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11839 * documents 11840 * 11841 * parse an XML in-memory document and build a tree. 11842 * It use the given SAX function block to handle the parsing callback. 11843 * If sax is NULL, fallback to the default DOM tree building routines. 11844 * 11845 * Returns the resulting document tree 11846 */ 11847 11848xmlDocPtr 11849xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 11850 xmlDocPtr ret; 11851 xmlParserCtxtPtr ctxt; 11852 11853 if (cur == NULL) return(NULL); 11854 11855 11856 ctxt = xmlCreateDocParserCtxt(cur); 11857 if (ctxt == NULL) return(NULL); 11858 if (sax != NULL) { 11859 ctxt->sax = sax; 11860 ctxt->userData = NULL; 11861 } 11862 xmlDetectSAX2(ctxt); 11863 11864 xmlParseDocument(ctxt); 11865 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11866 else { 11867 ret = NULL; 11868 xmlFreeDoc(ctxt->myDoc); 11869 ctxt->myDoc = NULL; 11870 } 11871 if (sax != NULL) 11872 ctxt->sax = NULL; 11873 xmlFreeParserCtxt(ctxt); 11874 11875 return(ret); 11876} 11877 11878/** 11879 * xmlParseDoc: 11880 * @cur: a pointer to an array of xmlChar 11881 * 11882 * parse an XML in-memory document and build a tree. 11883 * 11884 * Returns the resulting document tree 11885 */ 11886 11887xmlDocPtr 11888xmlParseDoc(xmlChar *cur) { 11889 return(xmlSAXParseDoc(NULL, cur, 0)); 11890} 11891#endif /* LIBXML_SAX1_ENABLED */ 11892 11893#ifdef LIBXML_LEGACY_ENABLED 11894/************************************************************************ 11895 * * 11896 * Specific function to keep track of entities references * 11897 * and used by the XSLT debugger * 11898 * * 11899 ************************************************************************/ 11900 11901static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 11902 11903/** 11904 * xmlAddEntityReference: 11905 * @ent : A valid entity 11906 * @firstNode : A valid first node for children of entity 11907 * @lastNode : A valid last node of children entity 11908 * 11909 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 11910 */ 11911static void 11912xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 11913 xmlNodePtr lastNode) 11914{ 11915 if (xmlEntityRefFunc != NULL) { 11916 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 11917 } 11918} 11919 11920 11921/** 11922 * xmlSetEntityReferenceFunc: 11923 * @func: A valid function 11924 * 11925 * Set the function to call call back when a xml reference has been made 11926 */ 11927void 11928xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 11929{ 11930 xmlEntityRefFunc = func; 11931} 11932#endif /* LIBXML_LEGACY_ENABLED */ 11933 11934/************************************************************************ 11935 * * 11936 * Miscellaneous * 11937 * * 11938 ************************************************************************/ 11939 11940#ifdef LIBXML_XPATH_ENABLED 11941#include <libxml/xpath.h> 11942#endif 11943 11944extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 11945static int xmlParserInitialized = 0; 11946 11947/** 11948 * xmlInitParser: 11949 * 11950 * Initialization function for the XML parser. 11951 * This is not reentrant. Call once before processing in case of 11952 * use in multithreaded programs. 11953 */ 11954 11955void 11956xmlInitParser(void) { 11957 if (xmlParserInitialized != 0) 11958 return; 11959 11960 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 11961 (xmlGenericError == NULL)) 11962 initGenericErrorDefaultFunc(NULL); 11963 xmlInitGlobals(); 11964 xmlInitThreads(); 11965 xmlInitMemory(); 11966 xmlInitCharEncodingHandlers(); 11967 xmlDefaultSAXHandlerInit(); 11968 xmlRegisterDefaultInputCallbacks(); 11969#ifdef LIBXML_OUTPUT_ENABLED 11970 xmlRegisterDefaultOutputCallbacks(); 11971#endif /* LIBXML_OUTPUT_ENABLED */ 11972#ifdef LIBXML_HTML_ENABLED 11973 htmlInitAutoClose(); 11974 htmlDefaultSAXHandlerInit(); 11975#endif 11976#ifdef LIBXML_XPATH_ENABLED 11977 xmlXPathInit(); 11978#endif 11979 xmlParserInitialized = 1; 11980} 11981 11982/** 11983 * xmlCleanupParser: 11984 * 11985 * Cleanup function for the XML parser. It tries to reclaim all 11986 * parsing related global memory allocated for the parser processing. 11987 * It doesn't deallocate any document related memory. Calling this 11988 * function should not prevent reusing the parser. 11989 * One should call xmlCleanupParser() only when the process has 11990 * finished using the library or XML document built with it. 11991 */ 11992 11993void 11994xmlCleanupParser(void) { 11995 if (!xmlParserInitialized) 11996 return; 11997 11998 xmlCleanupCharEncodingHandlers(); 11999#ifdef LIBXML_CATALOG_ENABLED 12000 xmlCatalogCleanup(); 12001#endif 12002 xmlCleanupInputCallbacks(); 12003#ifdef LIBXML_OUTPUT_ENABLED 12004 xmlCleanupOutputCallbacks(); 12005#endif 12006 xmlCleanupThreads(); 12007 xmlCleanupGlobals(); 12008 xmlResetLastError(); 12009 xmlParserInitialized = 0; 12010} 12011 12012/************************************************************************ 12013 * * 12014 * New set (2.6.0) of simpler and more flexible APIs * 12015 * * 12016 ************************************************************************/ 12017 12018/** 12019 * DICT_FREE: 12020 * @str: a string 12021 * 12022 * Free a string if it is not owned by the "dict" dictionnary in the 12023 * current scope 12024 */ 12025#define DICT_FREE(str) \ 12026 if ((str) && ((!dict) || \ 12027 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 12028 xmlFree((char *)(str)); 12029 12030/** 12031 * xmlCtxtReset: 12032 * @ctxt: an XML parser context 12033 * 12034 * Reset a parser context 12035 */ 12036void 12037xmlCtxtReset(xmlParserCtxtPtr ctxt) 12038{ 12039 xmlParserInputPtr input; 12040 xmlDictPtr dict = ctxt->dict; 12041 12042 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 12043 xmlFreeInputStream(input); 12044 } 12045 ctxt->inputNr = 0; 12046 ctxt->input = NULL; 12047 12048 ctxt->spaceNr = 0; 12049 ctxt->spaceTab[0] = -1; 12050 ctxt->space = &ctxt->spaceTab[0]; 12051 12052 12053 ctxt->nodeNr = 0; 12054 ctxt->node = NULL; 12055 12056 ctxt->nameNr = 0; 12057 ctxt->name = NULL; 12058 12059 DICT_FREE(ctxt->version); 12060 ctxt->version = NULL; 12061 DICT_FREE(ctxt->encoding); 12062 ctxt->encoding = NULL; 12063 DICT_FREE(ctxt->directory); 12064 ctxt->directory = NULL; 12065 DICT_FREE(ctxt->extSubURI); 12066 ctxt->extSubURI = NULL; 12067 DICT_FREE(ctxt->extSubSystem); 12068 ctxt->extSubSystem = NULL; 12069 if (ctxt->myDoc != NULL) 12070 xmlFreeDoc(ctxt->myDoc); 12071 ctxt->myDoc = NULL; 12072 12073 ctxt->standalone = -1; 12074 ctxt->hasExternalSubset = 0; 12075 ctxt->hasPErefs = 0; 12076 ctxt->html = 0; 12077 ctxt->external = 0; 12078 ctxt->instate = XML_PARSER_START; 12079 ctxt->token = 0; 12080 12081 ctxt->wellFormed = 1; 12082 ctxt->nsWellFormed = 1; 12083 ctxt->valid = 1; 12084 ctxt->vctxt.userData = ctxt; 12085 ctxt->vctxt.error = xmlParserValidityError; 12086 ctxt->vctxt.warning = xmlParserValidityWarning; 12087 ctxt->record_info = 0; 12088 ctxt->nbChars = 0; 12089 ctxt->checkIndex = 0; 12090 ctxt->inSubset = 0; 12091 ctxt->errNo = XML_ERR_OK; 12092 ctxt->depth = 0; 12093 ctxt->charset = XML_CHAR_ENCODING_UTF8; 12094 ctxt->catalogs = NULL; 12095 xmlInitNodeInfoSeq(&ctxt->node_seq); 12096 12097 if (ctxt->attsDefault != NULL) { 12098 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 12099 ctxt->attsDefault = NULL; 12100 } 12101 if (ctxt->attsSpecial != NULL) { 12102 xmlHashFree(ctxt->attsSpecial, NULL); 12103 ctxt->attsSpecial = NULL; 12104 } 12105 12106#ifdef LIBXML_CATALOG_ENABLED 12107 if (ctxt->catalogs != NULL) 12108 xmlCatalogFreeLocal(ctxt->catalogs); 12109#endif 12110} 12111 12112/** 12113 * xmlCtxtUseOptions: 12114 * @ctxt: an XML parser context 12115 * @options: a combination of xmlParserOption(s) 12116 * 12117 * Applies the options to the parser context 12118 * 12119 * Returns 0 in case of success, the set of unknown or unimplemented options 12120 * in case of error. 12121 */ 12122int 12123xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 12124{ 12125 if (options & XML_PARSE_RECOVER) { 12126 ctxt->recovery = 1; 12127 options -= XML_PARSE_RECOVER; 12128 } else 12129 ctxt->recovery = 0; 12130 if (options & XML_PARSE_DTDLOAD) { 12131 ctxt->loadsubset = XML_DETECT_IDS; 12132 options -= XML_PARSE_DTDLOAD; 12133 } else 12134 ctxt->loadsubset = 0; 12135 if (options & XML_PARSE_DTDATTR) { 12136 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 12137 options -= XML_PARSE_DTDATTR; 12138 } 12139 if (options & XML_PARSE_NOENT) { 12140 ctxt->replaceEntities = 1; 12141 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 12142 options -= XML_PARSE_NOENT; 12143 } else 12144 ctxt->replaceEntities = 0; 12145 if (options & XML_PARSE_NOWARNING) { 12146 ctxt->sax->warning = NULL; 12147 options -= XML_PARSE_NOWARNING; 12148 } 12149 if (options & XML_PARSE_NOERROR) { 12150 ctxt->sax->error = NULL; 12151 ctxt->sax->fatalError = NULL; 12152 options -= XML_PARSE_NOERROR; 12153 } 12154 if (options & XML_PARSE_PEDANTIC) { 12155 ctxt->pedantic = 1; 12156 options -= XML_PARSE_PEDANTIC; 12157 } else 12158 ctxt->pedantic = 0; 12159 if (options & XML_PARSE_NOBLANKS) { 12160 ctxt->keepBlanks = 0; 12161 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 12162 options -= XML_PARSE_NOBLANKS; 12163 } else 12164 ctxt->keepBlanks = 1; 12165 if (options & XML_PARSE_DTDVALID) { 12166 ctxt->validate = 1; 12167 if (options & XML_PARSE_NOWARNING) 12168 ctxt->vctxt.warning = NULL; 12169 if (options & XML_PARSE_NOERROR) 12170 ctxt->vctxt.error = NULL; 12171 options -= XML_PARSE_DTDVALID; 12172 } else 12173 ctxt->validate = 0; 12174#ifdef LIBXML_SAX1_ENABLED 12175 if (options & XML_PARSE_SAX1) { 12176 ctxt->sax->startElement = xmlSAX2StartElement; 12177 ctxt->sax->endElement = xmlSAX2EndElement; 12178 ctxt->sax->startElementNs = NULL; 12179 ctxt->sax->endElementNs = NULL; 12180 ctxt->sax->initialized = 1; 12181 options -= XML_PARSE_SAX1; 12182 } 12183#endif /* LIBXML_SAX1_ENABLED */ 12184 if (options & XML_PARSE_NODICT) { 12185 ctxt->dictNames = 0; 12186 options -= XML_PARSE_NODICT; 12187 } else { 12188 ctxt->dictNames = 1; 12189 } 12190 if (options & XML_PARSE_NOCDATA) { 12191 ctxt->sax->cdataBlock = NULL; 12192 options -= XML_PARSE_NOCDATA; 12193 } 12194 if (options & XML_PARSE_NSCLEAN) { 12195 ctxt->options |= XML_PARSE_NSCLEAN; 12196 options -= XML_PARSE_NSCLEAN; 12197 } 12198 return (options); 12199} 12200 12201/** 12202 * xmlDoRead: 12203 * @ctxt: an XML parser context 12204 * @URL: the base URL to use for the document 12205 * @encoding: the document encoding, or NULL 12206 * @options: a combination of xmlParserOption(s) 12207 * @reuse: keep the context for reuse 12208 * 12209 * Common front-end for the xmlRead functions 12210 * 12211 * Returns the resulting document tree or NULL 12212 */ 12213static xmlDocPtr 12214xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 12215 int options, int reuse) 12216{ 12217 xmlDocPtr ret; 12218 12219 xmlCtxtUseOptions(ctxt, options); 12220 if (encoding != NULL) { 12221 xmlCharEncodingHandlerPtr hdlr; 12222 12223 hdlr = xmlFindCharEncodingHandler(encoding); 12224 if (hdlr != NULL) 12225 xmlSwitchToEncoding(ctxt, hdlr); 12226 } 12227 if ((URL != NULL) && (ctxt->input != NULL) && 12228 (ctxt->input->filename == NULL)) 12229 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 12230 xmlParseDocument(ctxt); 12231 if ((ctxt->wellFormed) || ctxt->recovery) 12232 ret = ctxt->myDoc; 12233 else { 12234 ret = NULL; 12235 if (ctxt->myDoc != NULL) { 12236 if ((ctxt->dictNames) && 12237 (ctxt->myDoc->dict == ctxt->dict)) 12238 xmlDictReference(ctxt->dict); 12239 xmlFreeDoc(ctxt->myDoc); 12240 } 12241 } 12242 ctxt->myDoc = NULL; 12243 if (!reuse) { 12244 if ((ctxt->dictNames) && 12245 (ret != NULL) && 12246 (ret->dict == ctxt->dict)) 12247 ctxt->dict = NULL; 12248 xmlFreeParserCtxt(ctxt); 12249 } else { 12250 /* Must duplicate the reference to the dictionary */ 12251 if ((ctxt->dictNames) && 12252 (ret != NULL) && 12253 (ret->dict == ctxt->dict)) 12254 xmlDictReference(ctxt->dict); 12255 } 12256 12257 return (ret); 12258} 12259 12260/** 12261 * xmlReadDoc: 12262 * @cur: a pointer to a zero terminated string 12263 * @URL: the base URL to use for the document 12264 * @encoding: the document encoding, or NULL 12265 * @options: a combination of xmlParserOption(s) 12266 * 12267 * parse an XML in-memory document and build a tree. 12268 * 12269 * Returns the resulting document tree 12270 */ 12271xmlDocPtr 12272xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 12273{ 12274 xmlParserCtxtPtr ctxt; 12275 12276 if (cur == NULL) 12277 return (NULL); 12278 12279 ctxt = xmlCreateDocParserCtxt(cur); 12280 if (ctxt == NULL) 12281 return (NULL); 12282 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12283} 12284 12285/** 12286 * xmlReadFile: 12287 * @filename: a file or URL 12288 * @encoding: the document encoding, or NULL 12289 * @options: a combination of xmlParserOption(s) 12290 * 12291 * parse an XML file from the filesystem or the network. 12292 * 12293 * Returns the resulting document tree 12294 */ 12295xmlDocPtr 12296xmlReadFile(const char *filename, const char *encoding, int options) 12297{ 12298 xmlParserCtxtPtr ctxt; 12299 12300 ctxt = xmlCreateFileParserCtxt(filename); 12301 if (ctxt == NULL) 12302 return (NULL); 12303 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 12304} 12305 12306/** 12307 * xmlReadMemory: 12308 * @buffer: a pointer to a char array 12309 * @size: the size of the array 12310 * @URL: the base URL to use for the document 12311 * @encoding: the document encoding, or NULL 12312 * @options: a combination of xmlParserOption(s) 12313 * 12314 * parse an XML in-memory document and build a tree. 12315 * 12316 * Returns the resulting document tree 12317 */ 12318xmlDocPtr 12319xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 12320{ 12321 xmlParserCtxtPtr ctxt; 12322 12323 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12324 if (ctxt == NULL) 12325 return (NULL); 12326 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12327} 12328 12329/** 12330 * xmlReadFd: 12331 * @fd: an open file descriptor 12332 * @URL: the base URL to use for the document 12333 * @encoding: the document encoding, or NULL 12334 * @options: a combination of xmlParserOption(s) 12335 * 12336 * parse an XML from a file descriptor and build a tree. 12337 * 12338 * Returns the resulting document tree 12339 */ 12340xmlDocPtr 12341xmlReadFd(int fd, const char *URL, const char *encoding, int options) 12342{ 12343 xmlParserCtxtPtr ctxt; 12344 xmlParserInputBufferPtr input; 12345 xmlParserInputPtr stream; 12346 12347 if (fd < 0) 12348 return (NULL); 12349 12350 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12351 if (input == NULL) 12352 return (NULL); 12353 ctxt = xmlNewParserCtxt(); 12354 if (ctxt == NULL) { 12355 xmlFreeParserInputBuffer(input); 12356 return (NULL); 12357 } 12358 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12359 if (stream == NULL) { 12360 xmlFreeParserInputBuffer(input); 12361 xmlFreeParserCtxt(ctxt); 12362 return (NULL); 12363 } 12364 inputPush(ctxt, stream); 12365 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12366} 12367 12368/** 12369 * xmlReadIO: 12370 * @ioread: an I/O read function 12371 * @ioclose: an I/O close function 12372 * @ioctx: an I/O handler 12373 * @URL: the base URL to use for the document 12374 * @encoding: the document encoding, or NULL 12375 * @options: a combination of xmlParserOption(s) 12376 * 12377 * parse an XML document from I/O functions and source and build a tree. 12378 * 12379 * Returns the resulting document tree 12380 */ 12381xmlDocPtr 12382xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12383 void *ioctx, const char *URL, const char *encoding, int options) 12384{ 12385 xmlParserCtxtPtr ctxt; 12386 xmlParserInputBufferPtr input; 12387 xmlParserInputPtr stream; 12388 12389 if (ioread == NULL) 12390 return (NULL); 12391 12392 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12393 XML_CHAR_ENCODING_NONE); 12394 if (input == NULL) 12395 return (NULL); 12396 ctxt = xmlNewParserCtxt(); 12397 if (ctxt == NULL) { 12398 xmlFreeParserInputBuffer(input); 12399 return (NULL); 12400 } 12401 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12402 if (stream == NULL) { 12403 xmlFreeParserInputBuffer(input); 12404 xmlFreeParserCtxt(ctxt); 12405 return (NULL); 12406 } 12407 inputPush(ctxt, stream); 12408 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12409} 12410 12411/** 12412 * xmlCtxtReadDoc: 12413 * @ctxt: an XML parser context 12414 * @cur: a pointer to a zero terminated string 12415 * @URL: the base URL to use for the document 12416 * @encoding: the document encoding, or NULL 12417 * @options: a combination of xmlParserOption(s) 12418 * 12419 * parse an XML in-memory document and build a tree. 12420 * This reuses the existing @ctxt parser context 12421 * 12422 * Returns the resulting document tree 12423 */ 12424xmlDocPtr 12425xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 12426 const char *URL, const char *encoding, int options) 12427{ 12428 xmlParserInputPtr stream; 12429 12430 if (cur == NULL) 12431 return (NULL); 12432 if (ctxt == NULL) 12433 return (NULL); 12434 12435 xmlCtxtReset(ctxt); 12436 12437 stream = xmlNewStringInputStream(ctxt, cur); 12438 if (stream == NULL) { 12439 return (NULL); 12440 } 12441 inputPush(ctxt, stream); 12442 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12443} 12444 12445/** 12446 * xmlCtxtReadFile: 12447 * @ctxt: an XML parser context 12448 * @filename: a file or URL 12449 * @encoding: the document encoding, or NULL 12450 * @options: a combination of xmlParserOption(s) 12451 * 12452 * parse an XML file from the filesystem or the network. 12453 * This reuses the existing @ctxt parser context 12454 * 12455 * Returns the resulting document tree 12456 */ 12457xmlDocPtr 12458xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 12459 const char *encoding, int options) 12460{ 12461 xmlParserInputPtr stream; 12462 12463 if (filename == NULL) 12464 return (NULL); 12465 if (ctxt == NULL) 12466 return (NULL); 12467 12468 xmlCtxtReset(ctxt); 12469 12470 stream = xmlNewInputFromFile(ctxt, filename); 12471 if (stream == NULL) { 12472 return (NULL); 12473 } 12474 inputPush(ctxt, stream); 12475 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 12476} 12477 12478/** 12479 * xmlCtxtReadMemory: 12480 * @ctxt: an XML parser context 12481 * @buffer: a pointer to a char array 12482 * @size: the size of the array 12483 * @URL: the base URL to use for the document 12484 * @encoding: the document encoding, or NULL 12485 * @options: a combination of xmlParserOption(s) 12486 * 12487 * parse an XML in-memory document and build a tree. 12488 * This reuses the existing @ctxt parser context 12489 * 12490 * Returns the resulting document tree 12491 */ 12492xmlDocPtr 12493xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 12494 const char *URL, const char *encoding, int options) 12495{ 12496 xmlParserInputBufferPtr input; 12497 xmlParserInputPtr stream; 12498 12499 if (ctxt == NULL) 12500 return (NULL); 12501 if (buffer == NULL) 12502 return (NULL); 12503 12504 xmlCtxtReset(ctxt); 12505 12506 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12507 if (input == NULL) { 12508 return(NULL); 12509 } 12510 12511 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12512 if (stream == NULL) { 12513 xmlFreeParserInputBuffer(input); 12514 return(NULL); 12515 } 12516 12517 inputPush(ctxt, stream); 12518 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12519} 12520 12521/** 12522 * xmlCtxtReadFd: 12523 * @ctxt: an XML parser context 12524 * @fd: an open file descriptor 12525 * @URL: the base URL to use for the document 12526 * @encoding: the document encoding, or NULL 12527 * @options: a combination of xmlParserOption(s) 12528 * 12529 * parse an XML from a file descriptor and build a tree. 12530 * This reuses the existing @ctxt parser context 12531 * 12532 * Returns the resulting document tree 12533 */ 12534xmlDocPtr 12535xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 12536 const char *URL, const char *encoding, int options) 12537{ 12538 xmlParserInputBufferPtr input; 12539 xmlParserInputPtr stream; 12540 12541 if (fd < 0) 12542 return (NULL); 12543 if (ctxt == NULL) 12544 return (NULL); 12545 12546 xmlCtxtReset(ctxt); 12547 12548 12549 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12550 if (input == NULL) 12551 return (NULL); 12552 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12553 if (stream == NULL) { 12554 xmlFreeParserInputBuffer(input); 12555 return (NULL); 12556 } 12557 inputPush(ctxt, stream); 12558 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12559} 12560 12561/** 12562 * xmlCtxtReadIO: 12563 * @ctxt: an XML parser context 12564 * @ioread: an I/O read function 12565 * @ioclose: an I/O close function 12566 * @ioctx: an I/O handler 12567 * @URL: the base URL to use for the document 12568 * @encoding: the document encoding, or NULL 12569 * @options: a combination of xmlParserOption(s) 12570 * 12571 * parse an XML document from I/O functions and source and build a tree. 12572 * This reuses the existing @ctxt parser context 12573 * 12574 * Returns the resulting document tree 12575 */ 12576xmlDocPtr 12577xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 12578 xmlInputCloseCallback ioclose, void *ioctx, 12579 const char *URL, 12580 const char *encoding, int options) 12581{ 12582 xmlParserInputBufferPtr input; 12583 xmlParserInputPtr stream; 12584 12585 if (ioread == NULL) 12586 return (NULL); 12587 if (ctxt == NULL) 12588 return (NULL); 12589 12590 xmlCtxtReset(ctxt); 12591 12592 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12593 XML_CHAR_ENCODING_NONE); 12594 if (input == NULL) 12595 return (NULL); 12596 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12597 if (stream == NULL) { 12598 xmlFreeParserInputBuffer(input); 12599 return (NULL); 12600 } 12601 inputPush(ctxt, stream); 12602 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12603} 12604