parser.c revision a82b182655ccee95e3b7210066206ddb3918823f
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60 61#ifdef HAVE_CTYPE_H 62#include <ctype.h> 63#endif 64#ifdef HAVE_STDLIB_H 65#include <stdlib.h> 66#endif 67#ifdef HAVE_SYS_STAT_H 68#include <sys/stat.h> 69#endif 70#ifdef HAVE_FCNTL_H 71#include <fcntl.h> 72#endif 73#ifdef HAVE_UNISTD_H 74#include <unistd.h> 75#endif 76#ifdef HAVE_ZLIB_H 77#include <zlib.h> 78#endif 79 80/** 81 * xmlParserMaxDepth: 82 * 83 * arbitrary depth limit for the XML documents that we allow to 84 * process. This is not a limitation of the parser but a safety 85 * boundary feature. 86 */ 87unsigned int xmlParserMaxDepth = 1024; 88 89#define SAX2 1 90 91#define XML_PARSER_BIG_BUFFER_SIZE 300 92#define XML_PARSER_BUFFER_SIZE 100 93 94#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 95 96/* 97 * List of XML prefixed PI allowed by W3C specs 98 */ 99 100static const char *xmlW3CPIs[] = { 101 "xml-stylesheet", 102 NULL 103}; 104 105 106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 108 const xmlChar **str); 109 110static xmlParserErrors 111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 112 xmlSAXHandlerPtr sax, 113 void *user_data, int depth, const xmlChar *URL, 114 const xmlChar *ID, xmlNodePtr *list); 115 116#ifdef LIBXML_LEGACY_ENABLED 117static void 118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 119 xmlNodePtr lastNode); 120#endif /* LIBXML_LEGACY_ENABLED */ 121 122static xmlParserErrors 123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 124 const xmlChar *string, void *user_data, xmlNodePtr *lst); 125 126/************************************************************************ 127 * * 128 * Some factorized error routines * 129 * * 130 ************************************************************************/ 131 132/** 133 * xmlErrAttributeDup: 134 * @ctxt: an XML parser context 135 * @prefix: the attribute prefix 136 * @localname: the attribute localname 137 * 138 * Handle a redefinition of attribute error 139 */ 140static void 141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 142 const xmlChar * localname) 143{ 144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145 (ctxt->instate == XML_PARSER_EOF)) 146 return; 147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 148 if (prefix == NULL) 149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 150 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 151 (const char *) localname, NULL, NULL, 0, 0, 152 "Attribute %s redefined\n", localname); 153 else 154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 155 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 156 (const char *) prefix, (const char *) localname, 157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 158 localname); 159 ctxt->wellFormed = 0; 160 if (ctxt->recovery == 0) 161 ctxt->disableSAX = 1; 162} 163 164/** 165 * xmlFatalErr: 166 * @ctxt: an XML parser context 167 * @error: the error number 168 * @extra: extra information string 169 * 170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 171 */ 172static void 173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 174{ 175 const char *errmsg; 176 177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 178 (ctxt->instate == XML_PARSER_EOF)) 179 return; 180 switch (error) { 181 case XML_ERR_INVALID_HEX_CHARREF: 182 errmsg = "CharRef: invalid hexadecimal value\n"; 183 break; 184 case XML_ERR_INVALID_DEC_CHARREF: 185 errmsg = "CharRef: invalid decimal value\n"; 186 break; 187 case XML_ERR_INVALID_CHARREF: 188 errmsg = "CharRef: invalid value\n"; 189 break; 190 case XML_ERR_INTERNAL_ERROR: 191 errmsg = "internal error"; 192 break; 193 case XML_ERR_PEREF_AT_EOF: 194 errmsg = "PEReference at end of document\n"; 195 break; 196 case XML_ERR_PEREF_IN_PROLOG: 197 errmsg = "PEReference in prolog\n"; 198 break; 199 case XML_ERR_PEREF_IN_EPILOG: 200 errmsg = "PEReference in epilog\n"; 201 break; 202 case XML_ERR_PEREF_NO_NAME: 203 errmsg = "PEReference: no name\n"; 204 break; 205 case XML_ERR_PEREF_SEMICOL_MISSING: 206 errmsg = "PEReference: expecting ';'\n"; 207 break; 208 case XML_ERR_ENTITY_LOOP: 209 errmsg = "Detected an entity reference loop\n"; 210 break; 211 case XML_ERR_ENTITY_NOT_STARTED: 212 errmsg = "EntityValue: \" or ' expected\n"; 213 break; 214 case XML_ERR_ENTITY_PE_INTERNAL: 215 errmsg = "PEReferences forbidden in internal subset\n"; 216 break; 217 case XML_ERR_ENTITY_NOT_FINISHED: 218 errmsg = "EntityValue: \" or ' expected\n"; 219 break; 220 case XML_ERR_ATTRIBUTE_NOT_STARTED: 221 errmsg = "AttValue: \" or ' expected\n"; 222 break; 223 case XML_ERR_LT_IN_ATTRIBUTE: 224 errmsg = "Unescaped '<' not allowed in attributes values\n"; 225 break; 226 case XML_ERR_LITERAL_NOT_STARTED: 227 errmsg = "SystemLiteral \" or ' expected\n"; 228 break; 229 case XML_ERR_LITERAL_NOT_FINISHED: 230 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 231 break; 232 case XML_ERR_MISPLACED_CDATA_END: 233 errmsg = "Sequence ']]>' not allowed in content\n"; 234 break; 235 case XML_ERR_URI_REQUIRED: 236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 237 break; 238 case XML_ERR_PUBID_REQUIRED: 239 errmsg = "PUBLIC, the Public Identifier is missing\n"; 240 break; 241 case XML_ERR_HYPHEN_IN_COMMENT: 242 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 243 break; 244 case XML_ERR_PI_NOT_STARTED: 245 errmsg = "xmlParsePI : no target name\n"; 246 break; 247 case XML_ERR_RESERVED_XML_NAME: 248 errmsg = "Invalid PI name\n"; 249 break; 250 case XML_ERR_NOTATION_NOT_STARTED: 251 errmsg = "NOTATION: Name expected here\n"; 252 break; 253 case XML_ERR_NOTATION_NOT_FINISHED: 254 errmsg = "'>' required to close NOTATION declaration\n"; 255 break; 256 case XML_ERR_VALUE_REQUIRED: 257 errmsg = "Entity value required\n"; 258 break; 259 case XML_ERR_URI_FRAGMENT: 260 errmsg = "Fragment not allowed"; 261 break; 262 case XML_ERR_ATTLIST_NOT_STARTED: 263 errmsg = "'(' required to start ATTLIST enumeration\n"; 264 break; 265 case XML_ERR_NMTOKEN_REQUIRED: 266 errmsg = "NmToken expected in ATTLIST enumeration\n"; 267 break; 268 case XML_ERR_ATTLIST_NOT_FINISHED: 269 errmsg = "')' required to finish ATTLIST enumeration\n"; 270 break; 271 case XML_ERR_MIXED_NOT_STARTED: 272 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 273 break; 274 case XML_ERR_PCDATA_REQUIRED: 275 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 276 break; 277 case XML_ERR_ELEMCONTENT_NOT_STARTED: 278 errmsg = "ContentDecl : Name or '(' expected\n"; 279 break; 280 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 281 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 282 break; 283 case XML_ERR_PEREF_IN_INT_SUBSET: 284 errmsg = 285 "PEReference: forbidden within markup decl in internal subset\n"; 286 break; 287 case XML_ERR_GT_REQUIRED: 288 errmsg = "expected '>'\n"; 289 break; 290 case XML_ERR_CONDSEC_INVALID: 291 errmsg = "XML conditional section '[' expected\n"; 292 break; 293 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 294 errmsg = "Content error in the external subset\n"; 295 break; 296 case XML_ERR_CONDSEC_INVALID_KEYWORD: 297 errmsg = 298 "conditional section INCLUDE or IGNORE keyword expected\n"; 299 break; 300 case XML_ERR_CONDSEC_NOT_FINISHED: 301 errmsg = "XML conditional section not closed\n"; 302 break; 303 case XML_ERR_XMLDECL_NOT_STARTED: 304 errmsg = "Text declaration '<?xml' required\n"; 305 break; 306 case XML_ERR_XMLDECL_NOT_FINISHED: 307 errmsg = "parsing XML declaration: '?>' expected\n"; 308 break; 309 case XML_ERR_EXT_ENTITY_STANDALONE: 310 errmsg = "external parsed entities cannot be standalone\n"; 311 break; 312 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 313 errmsg = "EntityRef: expecting ';'\n"; 314 break; 315 case XML_ERR_DOCTYPE_NOT_FINISHED: 316 errmsg = "DOCTYPE improperly terminated\n"; 317 break; 318 case XML_ERR_LTSLASH_REQUIRED: 319 errmsg = "EndTag: '</' not found\n"; 320 break; 321 case XML_ERR_EQUAL_REQUIRED: 322 errmsg = "expected '='\n"; 323 break; 324 case XML_ERR_STRING_NOT_CLOSED: 325 errmsg = "String not closed expecting \" or '\n"; 326 break; 327 case XML_ERR_STRING_NOT_STARTED: 328 errmsg = "String not started expecting ' or \"\n"; 329 break; 330 case XML_ERR_ENCODING_NAME: 331 errmsg = "Invalid XML encoding name\n"; 332 break; 333 case XML_ERR_STANDALONE_VALUE: 334 errmsg = "standalone accepts only 'yes' or 'no'\n"; 335 break; 336 case XML_ERR_DOCUMENT_EMPTY: 337 errmsg = "Document is empty\n"; 338 break; 339 case XML_ERR_DOCUMENT_END: 340 errmsg = "Extra content at the end of the document\n"; 341 break; 342 case XML_ERR_NOT_WELL_BALANCED: 343 errmsg = "chunk is not well balanced\n"; 344 break; 345 case XML_ERR_EXTRA_CONTENT: 346 errmsg = "extra content at the end of well balanced chunk\n"; 347 break; 348 case XML_ERR_VERSION_MISSING: 349 errmsg = "Malformed declaration expecting version\n"; 350 break; 351#if 0 352 case: 353 errmsg = "\n"; 354 break; 355#endif 356 default: 357 errmsg = "Unregistered error message\n"; 358 } 359 ctxt->errNo = error; 360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 362 info); 363 ctxt->wellFormed = 0; 364 if (ctxt->recovery == 0) 365 ctxt->disableSAX = 1; 366} 367 368/** 369 * xmlFatalErrMsg: 370 * @ctxt: an XML parser context 371 * @error: the error number 372 * @msg: the error message 373 * 374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 375 */ 376static void 377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 378 const char *msg) 379{ 380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 381 (ctxt->instate == XML_PARSER_EOF)) 382 return; 383 ctxt->errNo = error; 384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 386 ctxt->wellFormed = 0; 387 if (ctxt->recovery == 0) 388 ctxt->disableSAX = 1; 389} 390 391/** 392 * xmlWarningMsg: 393 * @ctxt: an XML parser context 394 * @error: the error number 395 * @msg: the error message 396 * @str1: extra data 397 * @str2: extra data 398 * 399 * Handle a warning. 400 */ 401static void 402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 403 const char *msg, const xmlChar *str1, const xmlChar *str2) 404{ 405 xmlStructuredErrorFunc schannel = NULL; 406 407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 408 (ctxt->instate == XML_PARSER_EOF)) 409 return; 410 ctxt->errNo = error; 411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 412 schannel = ctxt->sax->serror; 413 __xmlRaiseError(schannel, 414 (ctxt->sax) ? ctxt->sax->warning : NULL, 415 ctxt->userData, 416 ctxt, NULL, XML_FROM_PARSER, error, 417 XML_ERR_WARNING, NULL, 0, 418 (const char *) str1, (const char *) str2, NULL, 0, 0, 419 msg, (const char *) str1, (const char *) str2); 420} 421 422/** 423 * xmlValidityError: 424 * @ctxt: an XML parser context 425 * @error: the error number 426 * @msg: the error message 427 * @str1: extra data 428 * 429 * Handle a validity error. 430 */ 431static void 432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 433 const char *msg, const xmlChar *str1) 434{ 435 xmlStructuredErrorFunc schannel = NULL; 436 437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 438 (ctxt->instate == XML_PARSER_EOF)) 439 return; 440 ctxt->errNo = error; 441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 442 schannel = ctxt->sax->serror; 443 __xmlRaiseError(schannel, 444 ctxt->vctxt.error, ctxt->vctxt.userData, 445 ctxt, NULL, XML_FROM_DTD, error, 446 XML_ERR_ERROR, NULL, 0, (const char *) str1, 447 NULL, NULL, 0, 0, 448 msg, (const char *) str1); 449 ctxt->valid = 0; 450} 451 452/** 453 * xmlFatalErrMsgInt: 454 * @ctxt: an XML parser context 455 * @error: the error number 456 * @msg: the error message 457 * @val: an integer value 458 * 459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 460 */ 461static void 462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 463 const char *msg, int val) 464{ 465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 466 (ctxt->instate == XML_PARSER_EOF)) 467 return; 468 ctxt->errNo = error; 469 __xmlRaiseError(NULL, NULL, NULL, 470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 472 ctxt->wellFormed = 0; 473 if (ctxt->recovery == 0) 474 ctxt->disableSAX = 1; 475} 476 477/** 478 * xmlFatalErrMsgStrIntStr: 479 * @ctxt: an XML parser context 480 * @error: the error number 481 * @msg: the error message 482 * @str1: an string info 483 * @val: an integer value 484 * @str2: an string info 485 * 486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 487 */ 488static void 489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 490 const char *msg, const xmlChar *str1, int val, 491 const xmlChar *str2) 492{ 493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 494 (ctxt->instate == XML_PARSER_EOF)) 495 return; 496 ctxt->errNo = error; 497 __xmlRaiseError(NULL, NULL, NULL, 498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 499 NULL, 0, (const char *) str1, (const char *) str2, 500 NULL, val, 0, msg, str1, val, str2); 501 ctxt->wellFormed = 0; 502 if (ctxt->recovery == 0) 503 ctxt->disableSAX = 1; 504} 505 506/** 507 * xmlFatalErrMsgStr: 508 * @ctxt: an XML parser context 509 * @error: the error number 510 * @msg: the error message 511 * @val: a string value 512 * 513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 514 */ 515static void 516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 517 const char *msg, const xmlChar * val) 518{ 519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 520 (ctxt->instate == XML_PARSER_EOF)) 521 return; 522 ctxt->errNo = error; 523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 524 XML_FROM_PARSER, error, XML_ERR_FATAL, 525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 526 val); 527 ctxt->wellFormed = 0; 528 if (ctxt->recovery == 0) 529 ctxt->disableSAX = 1; 530} 531 532/** 533 * xmlErrMsgStr: 534 * @ctxt: an XML parser context 535 * @error: the error number 536 * @msg: the error message 537 * @val: a string value 538 * 539 * Handle a non fatal parser error 540 */ 541static void 542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 543 const char *msg, const xmlChar * val) 544{ 545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 546 (ctxt->instate == XML_PARSER_EOF)) 547 return; 548 ctxt->errNo = error; 549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 550 XML_FROM_PARSER, error, XML_ERR_ERROR, 551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 552 val); 553} 554 555/** 556 * xmlNsErr: 557 * @ctxt: an XML parser context 558 * @error: the error number 559 * @msg: the message 560 * @info1: extra information string 561 * @info2: extra information string 562 * 563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 564 */ 565static void 566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 567 const char *msg, 568 const xmlChar * info1, const xmlChar * info2, 569 const xmlChar * info3) 570{ 571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 572 (ctxt->instate == XML_PARSER_EOF)) 573 return; 574 ctxt->errNo = error; 575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 576 XML_ERR_ERROR, NULL, 0, (const char *) info1, 577 (const char *) info2, (const char *) info3, 0, 0, msg, 578 info1, info2, info3); 579 ctxt->nsWellFormed = 0; 580} 581 582/************************************************************************ 583 * * 584 * SAX2 defaulted attributes handling * 585 * * 586 ************************************************************************/ 587 588/** 589 * xmlDetectSAX2: 590 * @ctxt: an XML parser context 591 * 592 * Do the SAX2 detection and specific intialization 593 */ 594static void 595xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 596 if (ctxt == NULL) return; 597#ifdef LIBXML_SAX1_ENABLED 598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 599 ((ctxt->sax->startElementNs != NULL) || 600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 601#else 602 ctxt->sax2 = 1; 603#endif /* LIBXML_SAX1_ENABLED */ 604 605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 608 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 609 (ctxt->str_xml_ns == NULL)) { 610 xmlErrMemory(ctxt, NULL); 611 } 612} 613 614typedef struct _xmlDefAttrs xmlDefAttrs; 615typedef xmlDefAttrs *xmlDefAttrsPtr; 616struct _xmlDefAttrs { 617 int nbAttrs; /* number of defaulted attributes on that element */ 618 int maxAttrs; /* the size of the array */ 619 const xmlChar *values[4]; /* array of localname/prefix/values */ 620}; 621 622/** 623 * xmlAddDefAttrs: 624 * @ctxt: an XML parser context 625 * @fullname: the element fullname 626 * @fullattr: the attribute fullname 627 * @value: the attribute value 628 * 629 * Add a defaulted attribute for an element 630 */ 631static void 632xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 633 const xmlChar *fullname, 634 const xmlChar *fullattr, 635 const xmlChar *value) { 636 xmlDefAttrsPtr defaults; 637 int len; 638 const xmlChar *name; 639 const xmlChar *prefix; 640 641 if (ctxt->attsDefault == NULL) { 642 ctxt->attsDefault = xmlHashCreate(10); 643 if (ctxt->attsDefault == NULL) 644 goto mem_error; 645 } 646 647 /* 648 * split the element name into prefix:localname , the string found 649 * are within the DTD and then not associated to namespace names. 650 */ 651 name = xmlSplitQName3(fullname, &len); 652 if (name == NULL) { 653 name = xmlDictLookup(ctxt->dict, fullname, -1); 654 prefix = NULL; 655 } else { 656 name = xmlDictLookup(ctxt->dict, name, -1); 657 prefix = xmlDictLookup(ctxt->dict, fullname, len); 658 } 659 660 /* 661 * make sure there is some storage 662 */ 663 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 664 if (defaults == NULL) { 665 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 666 (4 * 4) * sizeof(const xmlChar *)); 667 if (defaults == NULL) 668 goto mem_error; 669 defaults->nbAttrs = 0; 670 defaults->maxAttrs = 4; 671 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 672 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 673 xmlDefAttrsPtr temp; 674 675 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 676 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 677 if (temp == NULL) 678 goto mem_error; 679 defaults = temp; 680 defaults->maxAttrs *= 2; 681 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 682 } 683 684 /* 685 * plit the element name into prefix:localname , the string found 686 * are within the DTD and hen not associated to namespace names. 687 */ 688 name = xmlSplitQName3(fullattr, &len); 689 if (name == NULL) { 690 name = xmlDictLookup(ctxt->dict, fullattr, -1); 691 prefix = NULL; 692 } else { 693 name = xmlDictLookup(ctxt->dict, name, -1); 694 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 695 } 696 697 defaults->values[4 * defaults->nbAttrs] = name; 698 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 699 /* intern the string and precompute the end */ 700 len = xmlStrlen(value); 701 value = xmlDictLookup(ctxt->dict, value, len); 702 defaults->values[4 * defaults->nbAttrs + 2] = value; 703 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 704 defaults->nbAttrs++; 705 706 return; 707 708mem_error: 709 xmlErrMemory(ctxt, NULL); 710 return; 711} 712 713/** 714 * xmlAddSpecialAttr: 715 * @ctxt: an XML parser context 716 * @fullname: the element fullname 717 * @fullattr: the attribute fullname 718 * @type: the attribute type 719 * 720 * Register that this attribute is not CDATA 721 */ 722static void 723xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 724 const xmlChar *fullname, 725 const xmlChar *fullattr, 726 int type) 727{ 728 if (ctxt->attsSpecial == NULL) { 729 ctxt->attsSpecial = xmlHashCreate(10); 730 if (ctxt->attsSpecial == NULL) 731 goto mem_error; 732 } 733 734 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 735 (void *) (long) type); 736 return; 737 738mem_error: 739 xmlErrMemory(ctxt, NULL); 740 return; 741} 742 743/** 744 * xmlCheckLanguageID: 745 * @lang: pointer to the string value 746 * 747 * Checks that the value conforms to the LanguageID production: 748 * 749 * NOTE: this is somewhat deprecated, those productions were removed from 750 * the XML Second edition. 751 * 752 * [33] LanguageID ::= Langcode ('-' Subcode)* 753 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 754 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 755 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 756 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 757 * [38] Subcode ::= ([a-z] | [A-Z])+ 758 * 759 * Returns 1 if correct 0 otherwise 760 **/ 761int 762xmlCheckLanguageID(const xmlChar * lang) 763{ 764 const xmlChar *cur = lang; 765 766 if (cur == NULL) 767 return (0); 768 if (((cur[0] == 'i') && (cur[1] == '-')) || 769 ((cur[0] == 'I') && (cur[1] == '-'))) { 770 /* 771 * IANA code 772 */ 773 cur += 2; 774 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 775 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 776 cur++; 777 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 778 ((cur[0] == 'X') && (cur[1] == '-'))) { 779 /* 780 * User code 781 */ 782 cur += 2; 783 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 784 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 785 cur++; 786 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 787 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 788 /* 789 * ISO639 790 */ 791 cur++; 792 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 793 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 794 cur++; 795 else 796 return (0); 797 } else 798 return (0); 799 while (cur[0] != 0) { /* non input consuming */ 800 if (cur[0] != '-') 801 return (0); 802 cur++; 803 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 804 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 805 cur++; 806 else 807 return (0); 808 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 809 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 810 cur++; 811 } 812 return (1); 813} 814 815/************************************************************************ 816 * * 817 * Parser stacks related functions and macros * 818 * * 819 ************************************************************************/ 820 821xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 822 const xmlChar ** str); 823 824#ifdef SAX2 825/** 826 * nsPush: 827 * @ctxt: an XML parser context 828 * @prefix: the namespace prefix or NULL 829 * @URL: the namespace name 830 * 831 * Pushes a new parser namespace on top of the ns stack 832 * 833 * Returns -1 in case of error, -2 if the namespace should be discarded 834 * and the index in the stack otherwise. 835 */ 836static int 837nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 838{ 839 if (ctxt->options & XML_PARSE_NSCLEAN) { 840 int i; 841 for (i = 0;i < ctxt->nsNr;i += 2) { 842 if (ctxt->nsTab[i] == prefix) { 843 /* in scope */ 844 if (ctxt->nsTab[i + 1] == URL) 845 return(-2); 846 /* out of scope keep it */ 847 break; 848 } 849 } 850 } 851 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 852 ctxt->nsMax = 10; 853 ctxt->nsNr = 0; 854 ctxt->nsTab = (const xmlChar **) 855 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 856 if (ctxt->nsTab == NULL) { 857 xmlErrMemory(ctxt, NULL); 858 ctxt->nsMax = 0; 859 return (-1); 860 } 861 } else if (ctxt->nsNr >= ctxt->nsMax) { 862 ctxt->nsMax *= 2; 863 ctxt->nsTab = (const xmlChar **) 864 xmlRealloc((char *) ctxt->nsTab, 865 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 866 if (ctxt->nsTab == NULL) { 867 xmlErrMemory(ctxt, NULL); 868 ctxt->nsMax /= 2; 869 return (-1); 870 } 871 } 872 ctxt->nsTab[ctxt->nsNr++] = prefix; 873 ctxt->nsTab[ctxt->nsNr++] = URL; 874 return (ctxt->nsNr); 875} 876/** 877 * nsPop: 878 * @ctxt: an XML parser context 879 * @nr: the number to pop 880 * 881 * Pops the top @nr parser prefix/namespace from the ns stack 882 * 883 * Returns the number of namespaces removed 884 */ 885static int 886nsPop(xmlParserCtxtPtr ctxt, int nr) 887{ 888 int i; 889 890 if (ctxt->nsTab == NULL) return(0); 891 if (ctxt->nsNr < nr) { 892 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 893 nr = ctxt->nsNr; 894 } 895 if (ctxt->nsNr <= 0) 896 return (0); 897 898 for (i = 0;i < nr;i++) { 899 ctxt->nsNr--; 900 ctxt->nsTab[ctxt->nsNr] = NULL; 901 } 902 return(nr); 903} 904#endif 905 906static int 907xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 908 const xmlChar **atts; 909 int *attallocs; 910 int maxatts; 911 912 if (ctxt->atts == NULL) { 913 maxatts = 55; /* allow for 10 attrs by default */ 914 atts = (const xmlChar **) 915 xmlMalloc(maxatts * sizeof(xmlChar *)); 916 if (atts == NULL) goto mem_error; 917 ctxt->atts = atts; 918 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 919 if (attallocs == NULL) goto mem_error; 920 ctxt->attallocs = attallocs; 921 ctxt->maxatts = maxatts; 922 } else if (nr + 5 > ctxt->maxatts) { 923 maxatts = (nr + 5) * 2; 924 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 925 maxatts * sizeof(const xmlChar *)); 926 if (atts == NULL) goto mem_error; 927 ctxt->atts = atts; 928 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 929 (maxatts / 5) * sizeof(int)); 930 if (attallocs == NULL) goto mem_error; 931 ctxt->attallocs = attallocs; 932 ctxt->maxatts = maxatts; 933 } 934 return(ctxt->maxatts); 935mem_error: 936 xmlErrMemory(ctxt, NULL); 937 return(-1); 938} 939 940/** 941 * inputPush: 942 * @ctxt: an XML parser context 943 * @value: the parser input 944 * 945 * Pushes a new parser input on top of the input stack 946 * 947 * Returns 0 in case of error, the index in the stack otherwise 948 */ 949int 950inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 951{ 952 if ((ctxt == NULL) || (value == NULL)) 953 return(0); 954 if (ctxt->inputNr >= ctxt->inputMax) { 955 ctxt->inputMax *= 2; 956 ctxt->inputTab = 957 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 958 ctxt->inputMax * 959 sizeof(ctxt->inputTab[0])); 960 if (ctxt->inputTab == NULL) { 961 xmlErrMemory(ctxt, NULL); 962 return (0); 963 } 964 } 965 ctxt->inputTab[ctxt->inputNr] = value; 966 ctxt->input = value; 967 return (ctxt->inputNr++); 968} 969/** 970 * inputPop: 971 * @ctxt: an XML parser context 972 * 973 * Pops the top parser input from the input stack 974 * 975 * Returns the input just removed 976 */ 977xmlParserInputPtr 978inputPop(xmlParserCtxtPtr ctxt) 979{ 980 xmlParserInputPtr ret; 981 982 if (ctxt == NULL) 983 return(NULL); 984 if (ctxt->inputNr <= 0) 985 return (0); 986 ctxt->inputNr--; 987 if (ctxt->inputNr > 0) 988 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 989 else 990 ctxt->input = NULL; 991 ret = ctxt->inputTab[ctxt->inputNr]; 992 ctxt->inputTab[ctxt->inputNr] = 0; 993 return (ret); 994} 995/** 996 * nodePush: 997 * @ctxt: an XML parser context 998 * @value: the element node 999 * 1000 * Pushes a new element node on top of the node stack 1001 * 1002 * Returns 0 in case of error, the index in the stack otherwise 1003 */ 1004int 1005nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1006{ 1007 if (ctxt == NULL) return(0); 1008 if (ctxt->nodeNr >= ctxt->nodeMax) { 1009 ctxt->nodeMax *= 2; 1010 ctxt->nodeTab = 1011 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1012 ctxt->nodeMax * 1013 sizeof(ctxt->nodeTab[0])); 1014 if (ctxt->nodeTab == NULL) { 1015 xmlErrMemory(ctxt, NULL); 1016 return (0); 1017 } 1018 } 1019 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 1020 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1021 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 1022 xmlParserMaxDepth); 1023 ctxt->instate = XML_PARSER_EOF; 1024 return(0); 1025 } 1026 ctxt->nodeTab[ctxt->nodeNr] = value; 1027 ctxt->node = value; 1028 return (ctxt->nodeNr++); 1029} 1030/** 1031 * nodePop: 1032 * @ctxt: an XML parser context 1033 * 1034 * Pops the top element node from the node stack 1035 * 1036 * Returns the node just removed 1037 */ 1038xmlNodePtr 1039nodePop(xmlParserCtxtPtr ctxt) 1040{ 1041 xmlNodePtr ret; 1042 1043 if (ctxt == NULL) return(NULL); 1044 if (ctxt->nodeNr <= 0) 1045 return (NULL); 1046 ctxt->nodeNr--; 1047 if (ctxt->nodeNr > 0) 1048 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1049 else 1050 ctxt->node = NULL; 1051 ret = ctxt->nodeTab[ctxt->nodeNr]; 1052 ctxt->nodeTab[ctxt->nodeNr] = 0; 1053 return (ret); 1054} 1055 1056#ifdef LIBXML_PUSH_ENABLED 1057/** 1058 * nameNsPush: 1059 * @ctxt: an XML parser context 1060 * @value: the element name 1061 * @prefix: the element prefix 1062 * @URI: the element namespace name 1063 * 1064 * Pushes a new element name/prefix/URL on top of the name stack 1065 * 1066 * Returns -1 in case of error, the index in the stack otherwise 1067 */ 1068static int 1069nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1070 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1071{ 1072 if (ctxt->nameNr >= ctxt->nameMax) { 1073 const xmlChar * *tmp; 1074 void **tmp2; 1075 ctxt->nameMax *= 2; 1076 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1077 ctxt->nameMax * 1078 sizeof(ctxt->nameTab[0])); 1079 if (tmp == NULL) { 1080 ctxt->nameMax /= 2; 1081 goto mem_error; 1082 } 1083 ctxt->nameTab = tmp; 1084 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1085 ctxt->nameMax * 3 * 1086 sizeof(ctxt->pushTab[0])); 1087 if (tmp2 == NULL) { 1088 ctxt->nameMax /= 2; 1089 goto mem_error; 1090 } 1091 ctxt->pushTab = tmp2; 1092 } 1093 ctxt->nameTab[ctxt->nameNr] = value; 1094 ctxt->name = value; 1095 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1096 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1097 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1098 return (ctxt->nameNr++); 1099mem_error: 1100 xmlErrMemory(ctxt, NULL); 1101 return (-1); 1102} 1103/** 1104 * nameNsPop: 1105 * @ctxt: an XML parser context 1106 * 1107 * Pops the top element/prefix/URI name from the name stack 1108 * 1109 * Returns the name just removed 1110 */ 1111static const xmlChar * 1112nameNsPop(xmlParserCtxtPtr ctxt) 1113{ 1114 const xmlChar *ret; 1115 1116 if (ctxt->nameNr <= 0) 1117 return (0); 1118 ctxt->nameNr--; 1119 if (ctxt->nameNr > 0) 1120 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1121 else 1122 ctxt->name = NULL; 1123 ret = ctxt->nameTab[ctxt->nameNr]; 1124 ctxt->nameTab[ctxt->nameNr] = NULL; 1125 return (ret); 1126} 1127#endif /* LIBXML_PUSH_ENABLED */ 1128 1129/** 1130 * namePush: 1131 * @ctxt: an XML parser context 1132 * @value: the element name 1133 * 1134 * Pushes a new element name on top of the name stack 1135 * 1136 * Returns -1 in case of error, the index in the stack otherwise 1137 */ 1138int 1139namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1140{ 1141 if (ctxt == NULL) return (-1); 1142 1143 if (ctxt->nameNr >= ctxt->nameMax) { 1144 const xmlChar * *tmp; 1145 ctxt->nameMax *= 2; 1146 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1147 ctxt->nameMax * 1148 sizeof(ctxt->nameTab[0])); 1149 if (tmp == NULL) { 1150 ctxt->nameMax /= 2; 1151 goto mem_error; 1152 } 1153 ctxt->nameTab = tmp; 1154 } 1155 ctxt->nameTab[ctxt->nameNr] = value; 1156 ctxt->name = value; 1157 return (ctxt->nameNr++); 1158mem_error: 1159 xmlErrMemory(ctxt, NULL); 1160 return (-1); 1161} 1162/** 1163 * namePop: 1164 * @ctxt: an XML parser context 1165 * 1166 * Pops the top element name from the name stack 1167 * 1168 * Returns the name just removed 1169 */ 1170const xmlChar * 1171namePop(xmlParserCtxtPtr ctxt) 1172{ 1173 const xmlChar *ret; 1174 1175 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1176 return (NULL); 1177 ctxt->nameNr--; 1178 if (ctxt->nameNr > 0) 1179 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1180 else 1181 ctxt->name = NULL; 1182 ret = ctxt->nameTab[ctxt->nameNr]; 1183 ctxt->nameTab[ctxt->nameNr] = 0; 1184 return (ret); 1185} 1186 1187static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1188 if (ctxt->spaceNr >= ctxt->spaceMax) { 1189 ctxt->spaceMax *= 2; 1190 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1191 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1192 if (ctxt->spaceTab == NULL) { 1193 xmlErrMemory(ctxt, NULL); 1194 return(0); 1195 } 1196 } 1197 ctxt->spaceTab[ctxt->spaceNr] = val; 1198 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1199 return(ctxt->spaceNr++); 1200} 1201 1202static int spacePop(xmlParserCtxtPtr ctxt) { 1203 int ret; 1204 if (ctxt->spaceNr <= 0) return(0); 1205 ctxt->spaceNr--; 1206 if (ctxt->spaceNr > 0) 1207 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1208 else 1209 ctxt->space = NULL; 1210 ret = ctxt->spaceTab[ctxt->spaceNr]; 1211 ctxt->spaceTab[ctxt->spaceNr] = -1; 1212 return(ret); 1213} 1214 1215/* 1216 * Macros for accessing the content. Those should be used only by the parser, 1217 * and not exported. 1218 * 1219 * Dirty macros, i.e. one often need to make assumption on the context to 1220 * use them 1221 * 1222 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1223 * To be used with extreme caution since operations consuming 1224 * characters may move the input buffer to a different location ! 1225 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1226 * This should be used internally by the parser 1227 * only to compare to ASCII values otherwise it would break when 1228 * running with UTF-8 encoding. 1229 * RAW same as CUR but in the input buffer, bypass any token 1230 * extraction that may have been done 1231 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1232 * to compare on ASCII based substring. 1233 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1234 * strings without newlines within the parser. 1235 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1236 * defined char within the parser. 1237 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1238 * 1239 * NEXT Skip to the next character, this does the proper decoding 1240 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1241 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1242 * CUR_CHAR(l) returns the current unicode character (int), set l 1243 * to the number of xmlChars used for the encoding [0-5]. 1244 * CUR_SCHAR same but operate on a string instead of the context 1245 * COPY_BUF copy the current unicode char to the target buffer, increment 1246 * the index 1247 * GROW, SHRINK handling of input buffers 1248 */ 1249 1250#define RAW (*ctxt->input->cur) 1251#define CUR (*ctxt->input->cur) 1252#define NXT(val) ctxt->input->cur[(val)] 1253#define CUR_PTR ctxt->input->cur 1254 1255#define CMP4( s, c1, c2, c3, c4 ) \ 1256 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1257 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1258#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1259 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1260#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1261 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1262#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1263 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1264#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1265 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1266#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1267 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1268 ((unsigned char *) s)[ 8 ] == c9 ) 1269#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1270 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1271 ((unsigned char *) s)[ 9 ] == c10 ) 1272 1273#define SKIP(val) do { \ 1274 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1276 if ((*ctxt->input->cur == 0) && \ 1277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1278 xmlPopInput(ctxt); \ 1279 } while (0) 1280 1281#define SKIPL(val) do { \ 1282 int skipl; \ 1283 for(skipl=0; skipl<val; skipl++) { \ 1284 if (*(ctxt->input->cur) == '\n') { \ 1285 ctxt->input->line++; ctxt->input->col = 1; \ 1286 } else ctxt->input->col++; \ 1287 ctxt->nbChars++; \ 1288 ctxt->input->cur++; \ 1289 } \ 1290 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1291 if ((*ctxt->input->cur == 0) && \ 1292 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1293 xmlPopInput(ctxt); \ 1294 } while (0) 1295 1296#define SHRINK if ((ctxt->progressive == 0) && \ 1297 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1298 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1299 xmlSHRINK (ctxt); 1300 1301static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1302 xmlParserInputShrink(ctxt->input); 1303 if ((*ctxt->input->cur == 0) && 1304 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1305 xmlPopInput(ctxt); 1306 } 1307 1308#define GROW if ((ctxt->progressive == 0) && \ 1309 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1310 xmlGROW (ctxt); 1311 1312static void xmlGROW (xmlParserCtxtPtr ctxt) { 1313 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1314 if ((*ctxt->input->cur == 0) && 1315 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1316 xmlPopInput(ctxt); 1317} 1318 1319#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1320 1321#define NEXT xmlNextChar(ctxt) 1322 1323#define NEXT1 { \ 1324 ctxt->input->col++; \ 1325 ctxt->input->cur++; \ 1326 ctxt->nbChars++; \ 1327 if (*ctxt->input->cur == 0) \ 1328 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1329 } 1330 1331#define NEXTL(l) do { \ 1332 if (*(ctxt->input->cur) == '\n') { \ 1333 ctxt->input->line++; ctxt->input->col = 1; \ 1334 } else ctxt->input->col++; \ 1335 ctxt->input->cur += l; \ 1336 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1337 } while (0) 1338 1339#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1340#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1341 1342#define COPY_BUF(l,b,i,v) \ 1343 if (l == 1) b[i++] = (xmlChar) v; \ 1344 else i += xmlCopyCharMultiByte(&b[i],v) 1345 1346/** 1347 * xmlSkipBlankChars: 1348 * @ctxt: the XML parser context 1349 * 1350 * skip all blanks character found at that point in the input streams. 1351 * It pops up finished entities in the process if allowable at that point. 1352 * 1353 * Returns the number of space chars skipped 1354 */ 1355 1356int 1357xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1358 int res = 0; 1359 1360 /* 1361 * It's Okay to use CUR/NEXT here since all the blanks are on 1362 * the ASCII range. 1363 */ 1364 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1365 const xmlChar *cur; 1366 /* 1367 * if we are in the document content, go really fast 1368 */ 1369 cur = ctxt->input->cur; 1370 while (IS_BLANK_CH(*cur)) { 1371 if (*cur == '\n') { 1372 ctxt->input->line++; ctxt->input->col = 1; 1373 } 1374 cur++; 1375 res++; 1376 if (*cur == 0) { 1377 ctxt->input->cur = cur; 1378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1379 cur = ctxt->input->cur; 1380 } 1381 } 1382 ctxt->input->cur = cur; 1383 } else { 1384 int cur; 1385 do { 1386 cur = CUR; 1387 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ 1388 NEXT; 1389 cur = CUR; 1390 res++; 1391 } 1392 while ((cur == 0) && (ctxt->inputNr > 1) && 1393 (ctxt->instate != XML_PARSER_COMMENT)) { 1394 xmlPopInput(ctxt); 1395 cur = CUR; 1396 } 1397 /* 1398 * Need to handle support of entities branching here 1399 */ 1400 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1401 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1402 } 1403 return(res); 1404} 1405 1406/************************************************************************ 1407 * * 1408 * Commodity functions to handle entities * 1409 * * 1410 ************************************************************************/ 1411 1412/** 1413 * xmlPopInput: 1414 * @ctxt: an XML parser context 1415 * 1416 * xmlPopInput: the current input pointed by ctxt->input came to an end 1417 * pop it and return the next char. 1418 * 1419 * Returns the current xmlChar in the parser context 1420 */ 1421xmlChar 1422xmlPopInput(xmlParserCtxtPtr ctxt) { 1423 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1424 if (xmlParserDebugEntities) 1425 xmlGenericError(xmlGenericErrorContext, 1426 "Popping input %d\n", ctxt->inputNr); 1427 xmlFreeInputStream(inputPop(ctxt)); 1428 if ((*ctxt->input->cur == 0) && 1429 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1430 return(xmlPopInput(ctxt)); 1431 return(CUR); 1432} 1433 1434/** 1435 * xmlPushInput: 1436 * @ctxt: an XML parser context 1437 * @input: an XML parser input fragment (entity, XML fragment ...). 1438 * 1439 * xmlPushInput: switch to a new input stream which is stacked on top 1440 * of the previous one(s). 1441 */ 1442void 1443xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1444 if (input == NULL) return; 1445 1446 if (xmlParserDebugEntities) { 1447 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1448 xmlGenericError(xmlGenericErrorContext, 1449 "%s(%d): ", ctxt->input->filename, 1450 ctxt->input->line); 1451 xmlGenericError(xmlGenericErrorContext, 1452 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1453 } 1454 inputPush(ctxt, input); 1455 GROW; 1456} 1457 1458/** 1459 * xmlParseCharRef: 1460 * @ctxt: an XML parser context 1461 * 1462 * parse Reference declarations 1463 * 1464 * [66] CharRef ::= '&#' [0-9]+ ';' | 1465 * '&#x' [0-9a-fA-F]+ ';' 1466 * 1467 * [ WFC: Legal Character ] 1468 * Characters referred to using character references must match the 1469 * production for Char. 1470 * 1471 * Returns the value parsed (as an int), 0 in case of error 1472 */ 1473int 1474xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1475 unsigned int val = 0; 1476 int count = 0; 1477 unsigned int outofrange = 0; 1478 1479 /* 1480 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1481 */ 1482 if ((RAW == '&') && (NXT(1) == '#') && 1483 (NXT(2) == 'x')) { 1484 SKIP(3); 1485 GROW; 1486 while (RAW != ';') { /* loop blocked by count */ 1487 if (count++ > 20) { 1488 count = 0; 1489 GROW; 1490 } 1491 if ((RAW >= '0') && (RAW <= '9')) 1492 val = val * 16 + (CUR - '0'); 1493 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1494 val = val * 16 + (CUR - 'a') + 10; 1495 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1496 val = val * 16 + (CUR - 'A') + 10; 1497 else { 1498 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1499 val = 0; 1500 break; 1501 } 1502 if (val > 0x10FFFF) 1503 outofrange = val; 1504 1505 NEXT; 1506 count++; 1507 } 1508 if (RAW == ';') { 1509 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1510 ctxt->input->col++; 1511 ctxt->nbChars ++; 1512 ctxt->input->cur++; 1513 } 1514 } else if ((RAW == '&') && (NXT(1) == '#')) { 1515 SKIP(2); 1516 GROW; 1517 while (RAW != ';') { /* loop blocked by count */ 1518 if (count++ > 20) { 1519 count = 0; 1520 GROW; 1521 } 1522 if ((RAW >= '0') && (RAW <= '9')) 1523 val = val * 10 + (CUR - '0'); 1524 else { 1525 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1526 val = 0; 1527 break; 1528 } 1529 if (val > 0x10FFFF) 1530 outofrange = val; 1531 1532 NEXT; 1533 count++; 1534 } 1535 if (RAW == ';') { 1536 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1537 ctxt->input->col++; 1538 ctxt->nbChars ++; 1539 ctxt->input->cur++; 1540 } 1541 } else { 1542 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1543 } 1544 1545 /* 1546 * [ WFC: Legal Character ] 1547 * Characters referred to using character references must match the 1548 * production for Char. 1549 */ 1550 if ((IS_CHAR(val) && (outofrange == 0))) { 1551 return(val); 1552 } else { 1553 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1554 "xmlParseCharRef: invalid xmlChar value %d\n", 1555 val); 1556 } 1557 return(0); 1558} 1559 1560/** 1561 * xmlParseStringCharRef: 1562 * @ctxt: an XML parser context 1563 * @str: a pointer to an index in the string 1564 * 1565 * parse Reference declarations, variant parsing from a string rather 1566 * than an an input flow. 1567 * 1568 * [66] CharRef ::= '&#' [0-9]+ ';' | 1569 * '&#x' [0-9a-fA-F]+ ';' 1570 * 1571 * [ WFC: Legal Character ] 1572 * Characters referred to using character references must match the 1573 * production for Char. 1574 * 1575 * Returns the value parsed (as an int), 0 in case of error, str will be 1576 * updated to the current value of the index 1577 */ 1578static int 1579xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1580 const xmlChar *ptr; 1581 xmlChar cur; 1582 unsigned int val = 0; 1583 unsigned int outofrange = 0; 1584 1585 if ((str == NULL) || (*str == NULL)) return(0); 1586 ptr = *str; 1587 cur = *ptr; 1588 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1589 ptr += 3; 1590 cur = *ptr; 1591 while (cur != ';') { /* Non input consuming loop */ 1592 if ((cur >= '0') && (cur <= '9')) 1593 val = val * 16 + (cur - '0'); 1594 else if ((cur >= 'a') && (cur <= 'f')) 1595 val = val * 16 + (cur - 'a') + 10; 1596 else if ((cur >= 'A') && (cur <= 'F')) 1597 val = val * 16 + (cur - 'A') + 10; 1598 else { 1599 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1600 val = 0; 1601 break; 1602 } 1603 if (val > 0x10FFFF) 1604 outofrange = val; 1605 1606 ptr++; 1607 cur = *ptr; 1608 } 1609 if (cur == ';') 1610 ptr++; 1611 } else if ((cur == '&') && (ptr[1] == '#')){ 1612 ptr += 2; 1613 cur = *ptr; 1614 while (cur != ';') { /* Non input consuming loops */ 1615 if ((cur >= '0') && (cur <= '9')) 1616 val = val * 10 + (cur - '0'); 1617 else { 1618 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1619 val = 0; 1620 break; 1621 } 1622 if (val > 0x10FFFF) 1623 outofrange = val; 1624 1625 ptr++; 1626 cur = *ptr; 1627 } 1628 if (cur == ';') 1629 ptr++; 1630 } else { 1631 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1632 return(0); 1633 } 1634 *str = ptr; 1635 1636 /* 1637 * [ WFC: Legal Character ] 1638 * Characters referred to using character references must match the 1639 * production for Char. 1640 */ 1641 if ((IS_CHAR(val) && (outofrange == 0))) { 1642 return(val); 1643 } else { 1644 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1645 "xmlParseStringCharRef: invalid xmlChar value %d\n", 1646 val); 1647 } 1648 return(0); 1649} 1650 1651/** 1652 * xmlNewBlanksWrapperInputStream: 1653 * @ctxt: an XML parser context 1654 * @entity: an Entity pointer 1655 * 1656 * Create a new input stream for wrapping 1657 * blanks around a PEReference 1658 * 1659 * Returns the new input stream or NULL 1660 */ 1661 1662static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 1663 1664static xmlParserInputPtr 1665xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1666 xmlParserInputPtr input; 1667 xmlChar *buffer; 1668 size_t length; 1669 if (entity == NULL) { 1670 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 1671 "xmlNewBlanksWrapperInputStream entity\n"); 1672 return(NULL); 1673 } 1674 if (xmlParserDebugEntities) 1675 xmlGenericError(xmlGenericErrorContext, 1676 "new blanks wrapper for entity: %s\n", entity->name); 1677 input = xmlNewInputStream(ctxt); 1678 if (input == NULL) { 1679 return(NULL); 1680 } 1681 length = xmlStrlen(entity->name) + 5; 1682 buffer = xmlMallocAtomic(length); 1683 if (buffer == NULL) { 1684 xmlErrMemory(ctxt, NULL); 1685 return(NULL); 1686 } 1687 buffer [0] = ' '; 1688 buffer [1] = '%'; 1689 buffer [length-3] = ';'; 1690 buffer [length-2] = ' '; 1691 buffer [length-1] = 0; 1692 memcpy(buffer + 2, entity->name, length - 5); 1693 input->free = deallocblankswrapper; 1694 input->base = buffer; 1695 input->cur = buffer; 1696 input->length = length; 1697 input->end = &buffer[length]; 1698 return(input); 1699} 1700 1701/** 1702 * xmlParserHandlePEReference: 1703 * @ctxt: the parser context 1704 * 1705 * [69] PEReference ::= '%' Name ';' 1706 * 1707 * [ WFC: No Recursion ] 1708 * A parsed entity must not contain a recursive 1709 * reference to itself, either directly or indirectly. 1710 * 1711 * [ WFC: Entity Declared ] 1712 * In a document without any DTD, a document with only an internal DTD 1713 * subset which contains no parameter entity references, or a document 1714 * with "standalone='yes'", ... ... The declaration of a parameter 1715 * entity must precede any reference to it... 1716 * 1717 * [ VC: Entity Declared ] 1718 * In a document with an external subset or external parameter entities 1719 * with "standalone='no'", ... ... The declaration of a parameter entity 1720 * must precede any reference to it... 1721 * 1722 * [ WFC: In DTD ] 1723 * Parameter-entity references may only appear in the DTD. 1724 * NOTE: misleading but this is handled. 1725 * 1726 * A PEReference may have been detected in the current input stream 1727 * the handling is done accordingly to 1728 * http://www.w3.org/TR/REC-xml#entproc 1729 * i.e. 1730 * - Included in literal in entity values 1731 * - Included as Parameter Entity reference within DTDs 1732 */ 1733void 1734xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1735 const xmlChar *name; 1736 xmlEntityPtr entity = NULL; 1737 xmlParserInputPtr input; 1738 1739 if (RAW != '%') return; 1740 switch(ctxt->instate) { 1741 case XML_PARSER_CDATA_SECTION: 1742 return; 1743 case XML_PARSER_COMMENT: 1744 return; 1745 case XML_PARSER_START_TAG: 1746 return; 1747 case XML_PARSER_END_TAG: 1748 return; 1749 case XML_PARSER_EOF: 1750 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 1751 return; 1752 case XML_PARSER_PROLOG: 1753 case XML_PARSER_START: 1754 case XML_PARSER_MISC: 1755 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 1756 return; 1757 case XML_PARSER_ENTITY_DECL: 1758 case XML_PARSER_CONTENT: 1759 case XML_PARSER_ATTRIBUTE_VALUE: 1760 case XML_PARSER_PI: 1761 case XML_PARSER_SYSTEM_LITERAL: 1762 case XML_PARSER_PUBLIC_LITERAL: 1763 /* we just ignore it there */ 1764 return; 1765 case XML_PARSER_EPILOG: 1766 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 1767 return; 1768 case XML_PARSER_ENTITY_VALUE: 1769 /* 1770 * NOTE: in the case of entity values, we don't do the 1771 * substitution here since we need the literal 1772 * entity value to be able to save the internal 1773 * subset of the document. 1774 * This will be handled by xmlStringDecodeEntities 1775 */ 1776 return; 1777 case XML_PARSER_DTD: 1778 /* 1779 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 1780 * In the internal DTD subset, parameter-entity references 1781 * can occur only where markup declarations can occur, not 1782 * within markup declarations. 1783 * In that case this is handled in xmlParseMarkupDecl 1784 */ 1785 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 1786 return; 1787 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 1788 return; 1789 break; 1790 case XML_PARSER_IGNORE: 1791 return; 1792 } 1793 1794 NEXT; 1795 name = xmlParseName(ctxt); 1796 if (xmlParserDebugEntities) 1797 xmlGenericError(xmlGenericErrorContext, 1798 "PEReference: %s\n", name); 1799 if (name == NULL) { 1800 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 1801 } else { 1802 if (RAW == ';') { 1803 NEXT; 1804 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 1805 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 1806 if (entity == NULL) { 1807 1808 /* 1809 * [ WFC: Entity Declared ] 1810 * In a document without any DTD, a document with only an 1811 * internal DTD subset which contains no parameter entity 1812 * references, or a document with "standalone='yes'", ... 1813 * ... The declaration of a parameter entity must precede 1814 * any reference to it... 1815 */ 1816 if ((ctxt->standalone == 1) || 1817 ((ctxt->hasExternalSubset == 0) && 1818 (ctxt->hasPErefs == 0))) { 1819 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 1820 "PEReference: %%%s; not found\n", name); 1821 } else { 1822 /* 1823 * [ VC: Entity Declared ] 1824 * In a document with an external subset or external 1825 * parameter entities with "standalone='no'", ... 1826 * ... The declaration of a parameter entity must precede 1827 * any reference to it... 1828 */ 1829 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 1830 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 1831 "PEReference: %%%s; not found\n", 1832 name); 1833 } else 1834 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 1835 "PEReference: %%%s; not found\n", 1836 name, NULL); 1837 ctxt->valid = 0; 1838 } 1839 } else if (ctxt->input->free != deallocblankswrapper) { 1840 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 1841 xmlPushInput(ctxt, input); 1842 } else { 1843 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 1844 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 1845 xmlChar start[4]; 1846 xmlCharEncoding enc; 1847 1848 /* 1849 * handle the extra spaces added before and after 1850 * c.f. http://www.w3.org/TR/REC-xml#as-PE 1851 * this is done independently. 1852 */ 1853 input = xmlNewEntityInputStream(ctxt, entity); 1854 xmlPushInput(ctxt, input); 1855 1856 /* 1857 * Get the 4 first bytes and decode the charset 1858 * if enc != XML_CHAR_ENCODING_NONE 1859 * plug some encoding conversion routines. 1860 * Note that, since we may have some non-UTF8 1861 * encoding (like UTF16, bug 135229), the 'length' 1862 * is not known, but we can calculate based upon 1863 * the amount of data in the buffer. 1864 */ 1865 GROW 1866 if ((ctxt->input->end - ctxt->input->cur)>=4) { 1867 start[0] = RAW; 1868 start[1] = NXT(1); 1869 start[2] = NXT(2); 1870 start[3] = NXT(3); 1871 enc = xmlDetectCharEncoding(start, 4); 1872 if (enc != XML_CHAR_ENCODING_NONE) { 1873 xmlSwitchEncoding(ctxt, enc); 1874 } 1875 } 1876 1877 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 1878 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 1879 (IS_BLANK_CH(NXT(5)))) { 1880 xmlParseTextDecl(ctxt); 1881 } 1882 } else { 1883 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 1884 "PEReference: %s is not a parameter entity\n", 1885 name); 1886 } 1887 } 1888 } else { 1889 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 1890 } 1891 } 1892} 1893 1894/* 1895 * Macro used to grow the current buffer. 1896 */ 1897#define growBuffer(buffer) { \ 1898 xmlChar *tmp; \ 1899 buffer##_size *= 2; \ 1900 tmp = (xmlChar *) \ 1901 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1902 if (tmp == NULL) goto mem_error; \ 1903 buffer = tmp; \ 1904} 1905 1906/** 1907 * xmlStringLenDecodeEntities: 1908 * @ctxt: the parser context 1909 * @str: the input string 1910 * @len: the string length 1911 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1912 * @end: an end marker xmlChar, 0 if none 1913 * @end2: an end marker xmlChar, 0 if none 1914 * @end3: an end marker xmlChar, 0 if none 1915 * 1916 * Takes a entity string content and process to do the adequate substitutions. 1917 * 1918 * [67] Reference ::= EntityRef | CharRef 1919 * 1920 * [69] PEReference ::= '%' Name ';' 1921 * 1922 * Returns A newly allocated string with the substitution done. The caller 1923 * must deallocate it ! 1924 */ 1925xmlChar * 1926xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 1927 int what, xmlChar end, xmlChar end2, xmlChar end3) { 1928 xmlChar *buffer = NULL; 1929 int buffer_size = 0; 1930 1931 xmlChar *current = NULL; 1932 const xmlChar *last; 1933 xmlEntityPtr ent; 1934 int c,l; 1935 int nbchars = 0; 1936 1937 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 1938 return(NULL); 1939 last = str + len; 1940 1941 if (ctxt->depth > 40) { 1942 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 1943 return(NULL); 1944 } 1945 1946 /* 1947 * allocate a translation buffer. 1948 */ 1949 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1950 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 1951 if (buffer == NULL) goto mem_error; 1952 1953 /* 1954 * OK loop until we reach one of the ending char or a size limit. 1955 * we are operating on already parsed values. 1956 */ 1957 if (str < last) 1958 c = CUR_SCHAR(str, l); 1959 else 1960 c = 0; 1961 while ((c != 0) && (c != end) && /* non input consuming loop */ 1962 (c != end2) && (c != end3)) { 1963 1964 if (c == 0) break; 1965 if ((c == '&') && (str[1] == '#')) { 1966 int val = xmlParseStringCharRef(ctxt, &str); 1967 if (val != 0) { 1968 COPY_BUF(0,buffer,nbchars,val); 1969 } 1970 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 1971 if (xmlParserDebugEntities) 1972 xmlGenericError(xmlGenericErrorContext, 1973 "String decoding Entity Reference: %.30s\n", 1974 str); 1975 ent = xmlParseStringEntityRef(ctxt, &str); 1976 if ((ent != NULL) && 1977 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 1978 if (ent->content != NULL) { 1979 COPY_BUF(0,buffer,nbchars,ent->content[0]); 1980 } else { 1981 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 1982 "predefined entity has no content\n"); 1983 } 1984 } else if ((ent != NULL) && (ent->content != NULL)) { 1985 xmlChar *rep; 1986 1987 ctxt->depth++; 1988 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 1989 0, 0, 0); 1990 ctxt->depth--; 1991 if (rep != NULL) { 1992 current = rep; 1993 while (*current != 0) { /* non input consuming loop */ 1994 buffer[nbchars++] = *current++; 1995 if (nbchars > 1996 buffer_size - XML_PARSER_BUFFER_SIZE) { 1997 growBuffer(buffer); 1998 } 1999 } 2000 xmlFree(rep); 2001 } 2002 } else if (ent != NULL) { 2003 int i = xmlStrlen(ent->name); 2004 const xmlChar *cur = ent->name; 2005 2006 buffer[nbchars++] = '&'; 2007 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2008 growBuffer(buffer); 2009 } 2010 for (;i > 0;i--) 2011 buffer[nbchars++] = *cur++; 2012 buffer[nbchars++] = ';'; 2013 } 2014 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2015 if (xmlParserDebugEntities) 2016 xmlGenericError(xmlGenericErrorContext, 2017 "String decoding PE Reference: %.30s\n", str); 2018 ent = xmlParseStringPEReference(ctxt, &str); 2019 if (ent != NULL) { 2020 xmlChar *rep; 2021 2022 ctxt->depth++; 2023 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2024 0, 0, 0); 2025 ctxt->depth--; 2026 if (rep != NULL) { 2027 current = rep; 2028 while (*current != 0) { /* non input consuming loop */ 2029 buffer[nbchars++] = *current++; 2030 if (nbchars > 2031 buffer_size - XML_PARSER_BUFFER_SIZE) { 2032 growBuffer(buffer); 2033 } 2034 } 2035 xmlFree(rep); 2036 } 2037 } 2038 } else { 2039 COPY_BUF(l,buffer,nbchars,c); 2040 str += l; 2041 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2042 growBuffer(buffer); 2043 } 2044 } 2045 if (str < last) 2046 c = CUR_SCHAR(str, l); 2047 else 2048 c = 0; 2049 } 2050 buffer[nbchars++] = 0; 2051 return(buffer); 2052 2053mem_error: 2054 xmlErrMemory(ctxt, NULL); 2055 return(NULL); 2056} 2057 2058/** 2059 * xmlStringDecodeEntities: 2060 * @ctxt: the parser context 2061 * @str: the input string 2062 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2063 * @end: an end marker xmlChar, 0 if none 2064 * @end2: an end marker xmlChar, 0 if none 2065 * @end3: an end marker xmlChar, 0 if none 2066 * 2067 * Takes a entity string content and process to do the adequate substitutions. 2068 * 2069 * [67] Reference ::= EntityRef | CharRef 2070 * 2071 * [69] PEReference ::= '%' Name ';' 2072 * 2073 * Returns A newly allocated string with the substitution done. The caller 2074 * must deallocate it ! 2075 */ 2076xmlChar * 2077xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2078 xmlChar end, xmlChar end2, xmlChar end3) { 2079 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2080 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2081 end, end2, end3)); 2082} 2083 2084/************************************************************************ 2085 * * 2086 * Commodity functions, cleanup needed ? * 2087 * * 2088 ************************************************************************/ 2089 2090/** 2091 * areBlanks: 2092 * @ctxt: an XML parser context 2093 * @str: a xmlChar * 2094 * @len: the size of @str 2095 * @blank_chars: we know the chars are blanks 2096 * 2097 * Is this a sequence of blank chars that one can ignore ? 2098 * 2099 * Returns 1 if ignorable 0 otherwise. 2100 */ 2101 2102static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2103 int blank_chars) { 2104 int i, ret; 2105 xmlNodePtr lastChild; 2106 2107 /* 2108 * Don't spend time trying to differentiate them, the same callback is 2109 * used ! 2110 */ 2111 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2112 return(0); 2113 2114 /* 2115 * Check for xml:space value. 2116 */ 2117 if (*(ctxt->space) == 1) 2118 return(0); 2119 2120 /* 2121 * Check that the string is made of blanks 2122 */ 2123 if (blank_chars == 0) { 2124 for (i = 0;i < len;i++) 2125 if (!(IS_BLANK_CH(str[i]))) return(0); 2126 } 2127 2128 /* 2129 * Look if the element is mixed content in the DTD if available 2130 */ 2131 if (ctxt->node == NULL) return(0); 2132 if (ctxt->myDoc != NULL) { 2133 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2134 if (ret == 0) return(1); 2135 if (ret == 1) return(0); 2136 } 2137 2138 /* 2139 * Otherwise, heuristic :-\ 2140 */ 2141 if (RAW != '<') return(0); 2142 if ((ctxt->node->children == NULL) && 2143 (RAW == '<') && (NXT(1) == '/')) return(0); 2144 2145 lastChild = xmlGetLastChild(ctxt->node); 2146 if (lastChild == NULL) { 2147 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2148 (ctxt->node->content != NULL)) return(0); 2149 } else if (xmlNodeIsText(lastChild)) 2150 return(0); 2151 else if ((ctxt->node->children != NULL) && 2152 (xmlNodeIsText(ctxt->node->children))) 2153 return(0); 2154 return(1); 2155} 2156 2157/************************************************************************ 2158 * * 2159 * Extra stuff for namespace support * 2160 * Relates to http://www.w3.org/TR/WD-xml-names * 2161 * * 2162 ************************************************************************/ 2163 2164/** 2165 * xmlSplitQName: 2166 * @ctxt: an XML parser context 2167 * @name: an XML parser context 2168 * @prefix: a xmlChar ** 2169 * 2170 * parse an UTF8 encoded XML qualified name string 2171 * 2172 * [NS 5] QName ::= (Prefix ':')? LocalPart 2173 * 2174 * [NS 6] Prefix ::= NCName 2175 * 2176 * [NS 7] LocalPart ::= NCName 2177 * 2178 * Returns the local part, and prefix is updated 2179 * to get the Prefix if any. 2180 */ 2181 2182xmlChar * 2183xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2184 xmlChar buf[XML_MAX_NAMELEN + 5]; 2185 xmlChar *buffer = NULL; 2186 int len = 0; 2187 int max = XML_MAX_NAMELEN; 2188 xmlChar *ret = NULL; 2189 const xmlChar *cur = name; 2190 int c; 2191 2192 if (prefix == NULL) return(NULL); 2193 *prefix = NULL; 2194 2195 if (cur == NULL) return(NULL); 2196 2197#ifndef XML_XML_NAMESPACE 2198 /* xml: prefix is not really a namespace */ 2199 if ((cur[0] == 'x') && (cur[1] == 'm') && 2200 (cur[2] == 'l') && (cur[3] == ':')) 2201 return(xmlStrdup(name)); 2202#endif 2203 2204 /* nasty but well=formed */ 2205 if (cur[0] == ':') 2206 return(xmlStrdup(name)); 2207 2208 c = *cur++; 2209 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2210 buf[len++] = c; 2211 c = *cur++; 2212 } 2213 if (len >= max) { 2214 /* 2215 * Okay someone managed to make a huge name, so he's ready to pay 2216 * for the processing speed. 2217 */ 2218 max = len * 2; 2219 2220 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2221 if (buffer == NULL) { 2222 xmlErrMemory(ctxt, NULL); 2223 return(NULL); 2224 } 2225 memcpy(buffer, buf, len); 2226 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2227 if (len + 10 > max) { 2228 xmlChar *tmp; 2229 2230 max *= 2; 2231 tmp = (xmlChar *) xmlRealloc(buffer, 2232 max * sizeof(xmlChar)); 2233 if (tmp == NULL) { 2234 xmlFree(tmp); 2235 xmlErrMemory(ctxt, NULL); 2236 return(NULL); 2237 } 2238 buffer = tmp; 2239 } 2240 buffer[len++] = c; 2241 c = *cur++; 2242 } 2243 buffer[len] = 0; 2244 } 2245 2246 /* nasty but well=formed 2247 if ((c == ':') && (*cur == 0)) { 2248 return(xmlStrdup(name)); 2249 } */ 2250 2251 if (buffer == NULL) 2252 ret = xmlStrndup(buf, len); 2253 else { 2254 ret = buffer; 2255 buffer = NULL; 2256 max = XML_MAX_NAMELEN; 2257 } 2258 2259 2260 if (c == ':') { 2261 c = *cur; 2262 *prefix = ret; 2263 if (c == 0) { 2264 return(xmlStrndup(BAD_CAST "", 0)); 2265 } 2266 len = 0; 2267 2268 /* 2269 * Check that the first character is proper to start 2270 * a new name 2271 */ 2272 if (!(((c >= 0x61) && (c <= 0x7A)) || 2273 ((c >= 0x41) && (c <= 0x5A)) || 2274 (c == '_') || (c == ':'))) { 2275 int l; 2276 int first = CUR_SCHAR(cur, l); 2277 2278 if (!IS_LETTER(first) && (first != '_')) { 2279 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2280 "Name %s is not XML Namespace compliant\n", 2281 name); 2282 } 2283 } 2284 cur++; 2285 2286 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2287 buf[len++] = c; 2288 c = *cur++; 2289 } 2290 if (len >= max) { 2291 /* 2292 * Okay someone managed to make a huge name, so he's ready to pay 2293 * for the processing speed. 2294 */ 2295 max = len * 2; 2296 2297 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2298 if (buffer == NULL) { 2299 xmlErrMemory(ctxt, NULL); 2300 return(NULL); 2301 } 2302 memcpy(buffer, buf, len); 2303 while (c != 0) { /* tested bigname2.xml */ 2304 if (len + 10 > max) { 2305 xmlChar *tmp; 2306 2307 max *= 2; 2308 tmp = (xmlChar *) xmlRealloc(buffer, 2309 max * sizeof(xmlChar)); 2310 if (tmp == NULL) { 2311 xmlErrMemory(ctxt, NULL); 2312 xmlFree(buffer); 2313 return(NULL); 2314 } 2315 buffer = tmp; 2316 } 2317 buffer[len++] = c; 2318 c = *cur++; 2319 } 2320 buffer[len] = 0; 2321 } 2322 2323 if (buffer == NULL) 2324 ret = xmlStrndup(buf, len); 2325 else { 2326 ret = buffer; 2327 } 2328 } 2329 2330 return(ret); 2331} 2332 2333/************************************************************************ 2334 * * 2335 * The parser itself * 2336 * Relates to http://www.w3.org/TR/REC-xml * 2337 * * 2338 ************************************************************************/ 2339 2340static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2341static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2342 int *len, int *alloc, int normalize); 2343 2344/** 2345 * xmlParseName: 2346 * @ctxt: an XML parser context 2347 * 2348 * parse an XML name. 2349 * 2350 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2351 * CombiningChar | Extender 2352 * 2353 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2354 * 2355 * [6] Names ::= Name (#x20 Name)* 2356 * 2357 * Returns the Name parsed or NULL 2358 */ 2359 2360const xmlChar * 2361xmlParseName(xmlParserCtxtPtr ctxt) { 2362 const xmlChar *in; 2363 const xmlChar *ret; 2364 int count = 0; 2365 2366 GROW; 2367 2368 /* 2369 * Accelerator for simple ASCII names 2370 */ 2371 in = ctxt->input->cur; 2372 if (((*in >= 0x61) && (*in <= 0x7A)) || 2373 ((*in >= 0x41) && (*in <= 0x5A)) || 2374 (*in == '_') || (*in == ':')) { 2375 in++; 2376 while (((*in >= 0x61) && (*in <= 0x7A)) || 2377 ((*in >= 0x41) && (*in <= 0x5A)) || 2378 ((*in >= 0x30) && (*in <= 0x39)) || 2379 (*in == '_') || (*in == '-') || 2380 (*in == ':') || (*in == '.')) 2381 in++; 2382 if ((*in > 0) && (*in < 0x80)) { 2383 count = in - ctxt->input->cur; 2384 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2385 ctxt->input->cur = in; 2386 ctxt->nbChars += count; 2387 ctxt->input->col += count; 2388 if (ret == NULL) 2389 xmlErrMemory(ctxt, NULL); 2390 return(ret); 2391 } 2392 } 2393 return(xmlParseNameComplex(ctxt)); 2394} 2395 2396/** 2397 * xmlParseNameAndCompare: 2398 * @ctxt: an XML parser context 2399 * 2400 * parse an XML name and compares for match 2401 * (specialized for endtag parsing) 2402 * 2403 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2404 * and the name for mismatch 2405 */ 2406 2407static const xmlChar * 2408xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2409 register const xmlChar *cmp = other; 2410 register const xmlChar *in; 2411 const xmlChar *ret; 2412 2413 GROW; 2414 2415 in = ctxt->input->cur; 2416 while (*in != 0 && *in == *cmp) { 2417 ++in; 2418 ++cmp; 2419 } 2420 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2421 /* success */ 2422 ctxt->input->cur = in; 2423 return (const xmlChar*) 1; 2424 } 2425 /* failure (or end of input buffer), check with full function */ 2426 ret = xmlParseName (ctxt); 2427 /* strings coming from the dictionnary direct compare possible */ 2428 if (ret == other) { 2429 return (const xmlChar*) 1; 2430 } 2431 return ret; 2432} 2433 2434static const xmlChar * 2435xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2436 int len = 0, l; 2437 int c; 2438 int count = 0; 2439 2440 /* 2441 * Handler for more complex cases 2442 */ 2443 GROW; 2444 c = CUR_CHAR(l); 2445 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2446 (!IS_LETTER(c) && (c != '_') && 2447 (c != ':'))) { 2448 return(NULL); 2449 } 2450 2451 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2452 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2453 (c == '.') || (c == '-') || 2454 (c == '_') || (c == ':') || 2455 (IS_COMBINING(c)) || 2456 (IS_EXTENDER(c)))) { 2457 if (count++ > 100) { 2458 count = 0; 2459 GROW; 2460 } 2461 len += l; 2462 NEXTL(l); 2463 c = CUR_CHAR(l); 2464 } 2465 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2466} 2467 2468/** 2469 * xmlParseStringName: 2470 * @ctxt: an XML parser context 2471 * @str: a pointer to the string pointer (IN/OUT) 2472 * 2473 * parse an XML name. 2474 * 2475 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2476 * CombiningChar | Extender 2477 * 2478 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2479 * 2480 * [6] Names ::= Name (#x20 Name)* 2481 * 2482 * Returns the Name parsed or NULL. The @str pointer 2483 * is updated to the current location in the string. 2484 */ 2485 2486static xmlChar * 2487xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2488 xmlChar buf[XML_MAX_NAMELEN + 5]; 2489 const xmlChar *cur = *str; 2490 int len = 0, l; 2491 int c; 2492 2493 c = CUR_SCHAR(cur, l); 2494 if (!IS_LETTER(c) && (c != '_') && 2495 (c != ':')) { 2496 return(NULL); 2497 } 2498 2499 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2500 (c == '.') || (c == '-') || 2501 (c == '_') || (c == ':') || 2502 (IS_COMBINING(c)) || 2503 (IS_EXTENDER(c))) { 2504 COPY_BUF(l,buf,len,c); 2505 cur += l; 2506 c = CUR_SCHAR(cur, l); 2507 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2508 /* 2509 * Okay someone managed to make a huge name, so he's ready to pay 2510 * for the processing speed. 2511 */ 2512 xmlChar *buffer; 2513 int max = len * 2; 2514 2515 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2516 if (buffer == NULL) { 2517 xmlErrMemory(ctxt, NULL); 2518 return(NULL); 2519 } 2520 memcpy(buffer, buf, len); 2521 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2522 /* test bigentname.xml */ 2523 (c == '.') || (c == '-') || 2524 (c == '_') || (c == ':') || 2525 (IS_COMBINING(c)) || 2526 (IS_EXTENDER(c))) { 2527 if (len + 10 > max) { 2528 xmlChar *tmp; 2529 max *= 2; 2530 tmp = (xmlChar *) xmlRealloc(buffer, 2531 max * sizeof(xmlChar)); 2532 if (tmp == NULL) { 2533 xmlErrMemory(ctxt, NULL); 2534 xmlFree(buffer); 2535 return(NULL); 2536 } 2537 buffer = tmp; 2538 } 2539 COPY_BUF(l,buffer,len,c); 2540 cur += l; 2541 c = CUR_SCHAR(cur, l); 2542 } 2543 buffer[len] = 0; 2544 *str = cur; 2545 return(buffer); 2546 } 2547 } 2548 *str = cur; 2549 return(xmlStrndup(buf, len)); 2550} 2551 2552/** 2553 * xmlParseNmtoken: 2554 * @ctxt: an XML parser context 2555 * 2556 * parse an XML Nmtoken. 2557 * 2558 * [7] Nmtoken ::= (NameChar)+ 2559 * 2560 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 2561 * 2562 * Returns the Nmtoken parsed or NULL 2563 */ 2564 2565xmlChar * 2566xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2567 xmlChar buf[XML_MAX_NAMELEN + 5]; 2568 int len = 0, l; 2569 int c; 2570 int count = 0; 2571 2572 GROW; 2573 c = CUR_CHAR(l); 2574 2575 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2576 (c == '.') || (c == '-') || 2577 (c == '_') || (c == ':') || 2578 (IS_COMBINING(c)) || 2579 (IS_EXTENDER(c))) { 2580 if (count++ > 100) { 2581 count = 0; 2582 GROW; 2583 } 2584 COPY_BUF(l,buf,len,c); 2585 NEXTL(l); 2586 c = CUR_CHAR(l); 2587 if (len >= XML_MAX_NAMELEN) { 2588 /* 2589 * Okay someone managed to make a huge token, so he's ready to pay 2590 * for the processing speed. 2591 */ 2592 xmlChar *buffer; 2593 int max = len * 2; 2594 2595 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2596 if (buffer == NULL) { 2597 xmlErrMemory(ctxt, NULL); 2598 return(NULL); 2599 } 2600 memcpy(buffer, buf, len); 2601 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2602 (c == '.') || (c == '-') || 2603 (c == '_') || (c == ':') || 2604 (IS_COMBINING(c)) || 2605 (IS_EXTENDER(c))) { 2606 if (count++ > 100) { 2607 count = 0; 2608 GROW; 2609 } 2610 if (len + 10 > max) { 2611 xmlChar *tmp; 2612 2613 max *= 2; 2614 tmp = (xmlChar *) xmlRealloc(buffer, 2615 max * sizeof(xmlChar)); 2616 if (tmp == NULL) { 2617 xmlErrMemory(ctxt, NULL); 2618 xmlFree(buffer); 2619 return(NULL); 2620 } 2621 buffer = tmp; 2622 } 2623 COPY_BUF(l,buffer,len,c); 2624 NEXTL(l); 2625 c = CUR_CHAR(l); 2626 } 2627 buffer[len] = 0; 2628 return(buffer); 2629 } 2630 } 2631 if (len == 0) 2632 return(NULL); 2633 return(xmlStrndup(buf, len)); 2634} 2635 2636/** 2637 * xmlParseEntityValue: 2638 * @ctxt: an XML parser context 2639 * @orig: if non-NULL store a copy of the original entity value 2640 * 2641 * parse a value for ENTITY declarations 2642 * 2643 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2644 * "'" ([^%&'] | PEReference | Reference)* "'" 2645 * 2646 * Returns the EntityValue parsed with reference substituted or NULL 2647 */ 2648 2649xmlChar * 2650xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2651 xmlChar *buf = NULL; 2652 int len = 0; 2653 int size = XML_PARSER_BUFFER_SIZE; 2654 int c, l; 2655 xmlChar stop; 2656 xmlChar *ret = NULL; 2657 const xmlChar *cur = NULL; 2658 xmlParserInputPtr input; 2659 2660 if (RAW == '"') stop = '"'; 2661 else if (RAW == '\'') stop = '\''; 2662 else { 2663 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 2664 return(NULL); 2665 } 2666 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 2667 if (buf == NULL) { 2668 xmlErrMemory(ctxt, NULL); 2669 return(NULL); 2670 } 2671 2672 /* 2673 * The content of the entity definition is copied in a buffer. 2674 */ 2675 2676 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2677 input = ctxt->input; 2678 GROW; 2679 NEXT; 2680 c = CUR_CHAR(l); 2681 /* 2682 * NOTE: 4.4.5 Included in Literal 2683 * When a parameter entity reference appears in a literal entity 2684 * value, ... a single or double quote character in the replacement 2685 * text is always treated as a normal data character and will not 2686 * terminate the literal. 2687 * In practice it means we stop the loop only when back at parsing 2688 * the initial entity and the quote is found 2689 */ 2690 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2691 (ctxt->input != input))) { 2692 if (len + 5 >= size) { 2693 xmlChar *tmp; 2694 2695 size *= 2; 2696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2697 if (tmp == NULL) { 2698 xmlErrMemory(ctxt, NULL); 2699 xmlFree(buf); 2700 return(NULL); 2701 } 2702 buf = tmp; 2703 } 2704 COPY_BUF(l,buf,len,c); 2705 NEXTL(l); 2706 /* 2707 * Pop-up of finished entities. 2708 */ 2709 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2710 xmlPopInput(ctxt); 2711 2712 GROW; 2713 c = CUR_CHAR(l); 2714 if (c == 0) { 2715 GROW; 2716 c = CUR_CHAR(l); 2717 } 2718 } 2719 buf[len] = 0; 2720 2721 /* 2722 * Raise problem w.r.t. '&' and '%' being used in non-entities 2723 * reference constructs. Note Charref will be handled in 2724 * xmlStringDecodeEntities() 2725 */ 2726 cur = buf; 2727 while (*cur != 0) { /* non input consuming */ 2728 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2729 xmlChar *name; 2730 xmlChar tmp = *cur; 2731 2732 cur++; 2733 name = xmlParseStringName(ctxt, &cur); 2734 if ((name == NULL) || (*cur != ';')) { 2735 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 2736 "EntityValue: '%c' forbidden except for entities references\n", 2737 tmp); 2738 } 2739 if ((tmp == '%') && (ctxt->inSubset == 1) && 2740 (ctxt->inputNr == 1)) { 2741 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 2742 } 2743 if (name != NULL) 2744 xmlFree(name); 2745 if (*cur == 0) 2746 break; 2747 } 2748 cur++; 2749 } 2750 2751 /* 2752 * Then PEReference entities are substituted. 2753 */ 2754 if (c != stop) { 2755 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 2756 xmlFree(buf); 2757 } else { 2758 NEXT; 2759 /* 2760 * NOTE: 4.4.7 Bypassed 2761 * When a general entity reference appears in the EntityValue in 2762 * an entity declaration, it is bypassed and left as is. 2763 * so XML_SUBSTITUTE_REF is not set here. 2764 */ 2765 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2766 0, 0, 0); 2767 if (orig != NULL) 2768 *orig = buf; 2769 else 2770 xmlFree(buf); 2771 } 2772 2773 return(ret); 2774} 2775 2776/** 2777 * xmlParseAttValueComplex: 2778 * @ctxt: an XML parser context 2779 * @len: the resulting attribute len 2780 * @normalize: wether to apply the inner normalization 2781 * 2782 * parse a value for an attribute, this is the fallback function 2783 * of xmlParseAttValue() when the attribute parsing requires handling 2784 * of non-ASCII characters, or normalization compaction. 2785 * 2786 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2787 */ 2788static xmlChar * 2789xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 2790 xmlChar limit = 0; 2791 xmlChar *buf = NULL; 2792 int len = 0; 2793 int buf_size = 0; 2794 int c, l, in_space = 0; 2795 xmlChar *current = NULL; 2796 xmlEntityPtr ent; 2797 2798 if (NXT(0) == '"') { 2799 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2800 limit = '"'; 2801 NEXT; 2802 } else if (NXT(0) == '\'') { 2803 limit = '\''; 2804 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2805 NEXT; 2806 } else { 2807 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 2808 return(NULL); 2809 } 2810 2811 /* 2812 * allocate a translation buffer. 2813 */ 2814 buf_size = XML_PARSER_BUFFER_SIZE; 2815 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 2816 if (buf == NULL) goto mem_error; 2817 2818 /* 2819 * OK loop until we reach one of the ending char or a size limit. 2820 */ 2821 c = CUR_CHAR(l); 2822 while ((NXT(0) != limit) && /* checked */ 2823 (c != '<')) { 2824 if (c == 0) break; 2825 if (c == '&') { 2826 in_space = 0; 2827 if (NXT(1) == '#') { 2828 int val = xmlParseCharRef(ctxt); 2829 2830 if (val == '&') { 2831 if (ctxt->replaceEntities) { 2832 if (len > buf_size - 10) { 2833 growBuffer(buf); 2834 } 2835 buf[len++] = '&'; 2836 } else { 2837 /* 2838 * The reparsing will be done in xmlStringGetNodeList() 2839 * called by the attribute() function in SAX.c 2840 */ 2841 if (len > buf_size - 10) { 2842 growBuffer(buf); 2843 } 2844 buf[len++] = '&'; 2845 buf[len++] = '#'; 2846 buf[len++] = '3'; 2847 buf[len++] = '8'; 2848 buf[len++] = ';'; 2849 } 2850 } else { 2851 if (len > buf_size - 10) { 2852 growBuffer(buf); 2853 } 2854 len += xmlCopyChar(0, &buf[len], val); 2855 } 2856 } else { 2857 ent = xmlParseEntityRef(ctxt); 2858 if ((ent != NULL) && 2859 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2860 if (len > buf_size - 10) { 2861 growBuffer(buf); 2862 } 2863 if ((ctxt->replaceEntities == 0) && 2864 (ent->content[0] == '&')) { 2865 buf[len++] = '&'; 2866 buf[len++] = '#'; 2867 buf[len++] = '3'; 2868 buf[len++] = '8'; 2869 buf[len++] = ';'; 2870 } else { 2871 buf[len++] = ent->content[0]; 2872 } 2873 } else if ((ent != NULL) && 2874 (ctxt->replaceEntities != 0)) { 2875 xmlChar *rep; 2876 2877 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 2878 rep = xmlStringDecodeEntities(ctxt, ent->content, 2879 XML_SUBSTITUTE_REF, 2880 0, 0, 0); 2881 if (rep != NULL) { 2882 current = rep; 2883 while (*current != 0) { /* non input consuming */ 2884 buf[len++] = *current++; 2885 if (len > buf_size - 10) { 2886 growBuffer(buf); 2887 } 2888 } 2889 xmlFree(rep); 2890 } 2891 } else { 2892 if (len > buf_size - 10) { 2893 growBuffer(buf); 2894 } 2895 if (ent->content != NULL) 2896 buf[len++] = ent->content[0]; 2897 } 2898 } else if (ent != NULL) { 2899 int i = xmlStrlen(ent->name); 2900 const xmlChar *cur = ent->name; 2901 2902 /* 2903 * This may look absurd but is needed to detect 2904 * entities problems 2905 */ 2906 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 2907 (ent->content != NULL)) { 2908 xmlChar *rep; 2909 rep = xmlStringDecodeEntities(ctxt, ent->content, 2910 XML_SUBSTITUTE_REF, 0, 0, 0); 2911 if (rep != NULL) 2912 xmlFree(rep); 2913 } 2914 2915 /* 2916 * Just output the reference 2917 */ 2918 buf[len++] = '&'; 2919 if (len > buf_size - i - 10) { 2920 growBuffer(buf); 2921 } 2922 for (;i > 0;i--) 2923 buf[len++] = *cur++; 2924 buf[len++] = ';'; 2925 } 2926 } 2927 } else { 2928 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 2929 if ((len != 0) || (!normalize)) { 2930 if ((!normalize) || (!in_space)) { 2931 COPY_BUF(l,buf,len,0x20); 2932 if (len > buf_size - 10) { 2933 growBuffer(buf); 2934 } 2935 } 2936 in_space = 1; 2937 } 2938 } else { 2939 in_space = 0; 2940 COPY_BUF(l,buf,len,c); 2941 if (len > buf_size - 10) { 2942 growBuffer(buf); 2943 } 2944 } 2945 NEXTL(l); 2946 } 2947 GROW; 2948 c = CUR_CHAR(l); 2949 } 2950 if ((in_space) && (normalize)) { 2951 while (buf[len - 1] == 0x20) len--; 2952 } 2953 buf[len] = 0; 2954 if (RAW == '<') { 2955 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 2956 } else if (RAW != limit) { 2957 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 2958 "AttValue: ' expected\n"); 2959 } else 2960 NEXT; 2961 if (attlen != NULL) *attlen = len; 2962 return(buf); 2963 2964mem_error: 2965 xmlErrMemory(ctxt, NULL); 2966 return(NULL); 2967} 2968 2969/** 2970 * xmlParseAttValue: 2971 * @ctxt: an XML parser context 2972 * 2973 * parse a value for an attribute 2974 * Note: the parser won't do substitution of entities here, this 2975 * will be handled later in xmlStringGetNodeList 2976 * 2977 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2978 * "'" ([^<&'] | Reference)* "'" 2979 * 2980 * 3.3.3 Attribute-Value Normalization: 2981 * Before the value of an attribute is passed to the application or 2982 * checked for validity, the XML processor must normalize it as follows: 2983 * - a character reference is processed by appending the referenced 2984 * character to the attribute value 2985 * - an entity reference is processed by recursively processing the 2986 * replacement text of the entity 2987 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2988 * appending #x20 to the normalized value, except that only a single 2989 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2990 * parsed entity or the literal entity value of an internal parsed entity 2991 * - other characters are processed by appending them to the normalized value 2992 * If the declared value is not CDATA, then the XML processor must further 2993 * process the normalized attribute value by discarding any leading and 2994 * trailing space (#x20) characters, and by replacing sequences of space 2995 * (#x20) characters by a single space (#x20) character. 2996 * All attributes for which no declaration has been read should be treated 2997 * by a non-validating parser as if declared CDATA. 2998 * 2999 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3000 */ 3001 3002 3003xmlChar * 3004xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3005 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3006 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3007} 3008 3009/** 3010 * xmlParseSystemLiteral: 3011 * @ctxt: an XML parser context 3012 * 3013 * parse an XML Literal 3014 * 3015 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3016 * 3017 * Returns the SystemLiteral parsed or NULL 3018 */ 3019 3020xmlChar * 3021xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3022 xmlChar *buf = NULL; 3023 int len = 0; 3024 int size = XML_PARSER_BUFFER_SIZE; 3025 int cur, l; 3026 xmlChar stop; 3027 int state = ctxt->instate; 3028 int count = 0; 3029 3030 SHRINK; 3031 if (RAW == '"') { 3032 NEXT; 3033 stop = '"'; 3034 } else if (RAW == '\'') { 3035 NEXT; 3036 stop = '\''; 3037 } else { 3038 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3039 return(NULL); 3040 } 3041 3042 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3043 if (buf == NULL) { 3044 xmlErrMemory(ctxt, NULL); 3045 return(NULL); 3046 } 3047 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3048 cur = CUR_CHAR(l); 3049 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3050 if (len + 5 >= size) { 3051 xmlChar *tmp; 3052 3053 size *= 2; 3054 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3055 if (tmp == NULL) { 3056 xmlFree(buf); 3057 xmlErrMemory(ctxt, NULL); 3058 ctxt->instate = (xmlParserInputState) state; 3059 return(NULL); 3060 } 3061 buf = tmp; 3062 } 3063 count++; 3064 if (count > 50) { 3065 GROW; 3066 count = 0; 3067 } 3068 COPY_BUF(l,buf,len,cur); 3069 NEXTL(l); 3070 cur = CUR_CHAR(l); 3071 if (cur == 0) { 3072 GROW; 3073 SHRINK; 3074 cur = CUR_CHAR(l); 3075 } 3076 } 3077 buf[len] = 0; 3078 ctxt->instate = (xmlParserInputState) state; 3079 if (!IS_CHAR(cur)) { 3080 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3081 } else { 3082 NEXT; 3083 } 3084 return(buf); 3085} 3086 3087/** 3088 * xmlParsePubidLiteral: 3089 * @ctxt: an XML parser context 3090 * 3091 * parse an XML public literal 3092 * 3093 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3094 * 3095 * Returns the PubidLiteral parsed or NULL. 3096 */ 3097 3098xmlChar * 3099xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3100 xmlChar *buf = NULL; 3101 int len = 0; 3102 int size = XML_PARSER_BUFFER_SIZE; 3103 xmlChar cur; 3104 xmlChar stop; 3105 int count = 0; 3106 xmlParserInputState oldstate = ctxt->instate; 3107 3108 SHRINK; 3109 if (RAW == '"') { 3110 NEXT; 3111 stop = '"'; 3112 } else if (RAW == '\'') { 3113 NEXT; 3114 stop = '\''; 3115 } else { 3116 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3117 return(NULL); 3118 } 3119 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3120 if (buf == NULL) { 3121 xmlErrMemory(ctxt, NULL); 3122 return(NULL); 3123 } 3124 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3125 cur = CUR; 3126 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3127 if (len + 1 >= size) { 3128 xmlChar *tmp; 3129 3130 size *= 2; 3131 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3132 if (tmp == NULL) { 3133 xmlErrMemory(ctxt, NULL); 3134 xmlFree(buf); 3135 return(NULL); 3136 } 3137 buf = tmp; 3138 } 3139 buf[len++] = cur; 3140 count++; 3141 if (count > 50) { 3142 GROW; 3143 count = 0; 3144 } 3145 NEXT; 3146 cur = CUR; 3147 if (cur == 0) { 3148 GROW; 3149 SHRINK; 3150 cur = CUR; 3151 } 3152 } 3153 buf[len] = 0; 3154 if (cur != stop) { 3155 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3156 } else { 3157 NEXT; 3158 } 3159 ctxt->instate = oldstate; 3160 return(buf); 3161} 3162 3163void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3164/** 3165 * xmlParseCharData: 3166 * @ctxt: an XML parser context 3167 * @cdata: int indicating whether we are within a CDATA section 3168 * 3169 * parse a CharData section. 3170 * if we are within a CDATA section ']]>' marks an end of section. 3171 * 3172 * The right angle bracket (>) may be represented using the string ">", 3173 * and must, for compatibility, be escaped using ">" or a character 3174 * reference when it appears in the string "]]>" in content, when that 3175 * string is not marking the end of a CDATA section. 3176 * 3177 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3178 */ 3179 3180void 3181xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3182 const xmlChar *in; 3183 int nbchar = 0; 3184 int line = ctxt->input->line; 3185 int col = ctxt->input->col; 3186 3187 SHRINK; 3188 GROW; 3189 /* 3190 * Accelerated common case where input don't need to be 3191 * modified before passing it to the handler. 3192 */ 3193 if (!cdata) { 3194 in = ctxt->input->cur; 3195 do { 3196get_more_space: 3197 while (*in == 0x20) in++; 3198 if (*in == 0xA) { 3199 ctxt->input->line++; 3200 in++; 3201 while (*in == 0xA) { 3202 ctxt->input->line++; 3203 in++; 3204 } 3205 goto get_more_space; 3206 } 3207 if (*in == '<') { 3208 nbchar = in - ctxt->input->cur; 3209 if (nbchar > 0) { 3210 const xmlChar *tmp = ctxt->input->cur; 3211 ctxt->input->cur = in; 3212 3213 if ((ctxt->sax != NULL) && 3214 (ctxt->sax->ignorableWhitespace != 3215 ctxt->sax->characters)) { 3216 if (areBlanks(ctxt, tmp, nbchar, 1)) { 3217 ctxt->sax->ignorableWhitespace(ctxt->userData, 3218 tmp, nbchar); 3219 } else if (ctxt->sax->characters != NULL) 3220 ctxt->sax->characters(ctxt->userData, 3221 tmp, nbchar); 3222 } else if ((ctxt->sax != NULL) && 3223 (ctxt->sax->characters != NULL)) { 3224 ctxt->sax->characters(ctxt->userData, 3225 tmp, nbchar); 3226 } 3227 } 3228 return; 3229 } 3230get_more: 3231 while (((*in > ']') && (*in <= 0x7F)) || 3232 ((*in > '&') && (*in < '<')) || 3233 ((*in > '<') && (*in < ']')) || 3234 ((*in >= 0x20) && (*in < '&')) || 3235 (*in == 0x09)) 3236 in++; 3237 if (*in == 0xA) { 3238 ctxt->input->line++; 3239 in++; 3240 while (*in == 0xA) { 3241 ctxt->input->line++; 3242 in++; 3243 } 3244 goto get_more; 3245 } 3246 if (*in == ']') { 3247 if ((in[1] == ']') && (in[2] == '>')) { 3248 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3249 ctxt->input->cur = in; 3250 return; 3251 } 3252 in++; 3253 goto get_more; 3254 } 3255 nbchar = in - ctxt->input->cur; 3256 if (nbchar > 0) { 3257 if ((ctxt->sax != NULL) && 3258 (ctxt->sax->ignorableWhitespace != 3259 ctxt->sax->characters) && 3260 (IS_BLANK_CH(*ctxt->input->cur))) { 3261 const xmlChar *tmp = ctxt->input->cur; 3262 ctxt->input->cur = in; 3263 3264 if (areBlanks(ctxt, tmp, nbchar, 0)) { 3265 ctxt->sax->ignorableWhitespace(ctxt->userData, 3266 tmp, nbchar); 3267 } else if (ctxt->sax->characters != NULL) 3268 ctxt->sax->characters(ctxt->userData, 3269 tmp, nbchar); 3270 line = ctxt->input->line; 3271 col = ctxt->input->col; 3272 } else if (ctxt->sax != NULL) { 3273 if (ctxt->sax->characters != NULL) 3274 ctxt->sax->characters(ctxt->userData, 3275 ctxt->input->cur, nbchar); 3276 line = ctxt->input->line; 3277 col = ctxt->input->col; 3278 } 3279 } 3280 ctxt->input->cur = in; 3281 if (*in == 0xD) { 3282 in++; 3283 if (*in == 0xA) { 3284 ctxt->input->cur = in; 3285 in++; 3286 ctxt->input->line++; 3287 continue; /* while */ 3288 } 3289 in--; 3290 } 3291 if (*in == '<') { 3292 return; 3293 } 3294 if (*in == '&') { 3295 return; 3296 } 3297 SHRINK; 3298 GROW; 3299 in = ctxt->input->cur; 3300 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3301 nbchar = 0; 3302 } 3303 ctxt->input->line = line; 3304 ctxt->input->col = col; 3305 xmlParseCharDataComplex(ctxt, cdata); 3306} 3307 3308/** 3309 * xmlParseCharDataComplex: 3310 * @ctxt: an XML parser context 3311 * @cdata: int indicating whether we are within a CDATA section 3312 * 3313 * parse a CharData section.this is the fallback function 3314 * of xmlParseCharData() when the parsing requires handling 3315 * of non-ASCII characters. 3316 */ 3317void 3318xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3319 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3320 int nbchar = 0; 3321 int cur, l; 3322 int count = 0; 3323 3324 SHRINK; 3325 GROW; 3326 cur = CUR_CHAR(l); 3327 while ((cur != '<') && /* checked */ 3328 (cur != '&') && 3329 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3330 if ((cur == ']') && (NXT(1) == ']') && 3331 (NXT(2) == '>')) { 3332 if (cdata) break; 3333 else { 3334 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3335 } 3336 } 3337 COPY_BUF(l,buf,nbchar,cur); 3338 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3339 buf[nbchar] = 0; 3340 3341 /* 3342 * OK the segment is to be consumed as chars. 3343 */ 3344 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3345 if (areBlanks(ctxt, buf, nbchar, 0)) { 3346 if (ctxt->sax->ignorableWhitespace != NULL) 3347 ctxt->sax->ignorableWhitespace(ctxt->userData, 3348 buf, nbchar); 3349 } else { 3350 if (ctxt->sax->characters != NULL) 3351 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3352 } 3353 } 3354 nbchar = 0; 3355 } 3356 count++; 3357 if (count > 50) { 3358 GROW; 3359 count = 0; 3360 } 3361 NEXTL(l); 3362 cur = CUR_CHAR(l); 3363 } 3364 if (nbchar != 0) { 3365 buf[nbchar] = 0; 3366 /* 3367 * OK the segment is to be consumed as chars. 3368 */ 3369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3370 if (areBlanks(ctxt, buf, nbchar, 0)) { 3371 if (ctxt->sax->ignorableWhitespace != NULL) 3372 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3373 } else { 3374 if (ctxt->sax->characters != NULL) 3375 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3376 } 3377 } 3378 } 3379} 3380 3381/** 3382 * xmlParseExternalID: 3383 * @ctxt: an XML parser context 3384 * @publicID: a xmlChar** receiving PubidLiteral 3385 * @strict: indicate whether we should restrict parsing to only 3386 * production [75], see NOTE below 3387 * 3388 * Parse an External ID or a Public ID 3389 * 3390 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3391 * 'PUBLIC' S PubidLiteral S SystemLiteral 3392 * 3393 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3394 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3395 * 3396 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3397 * 3398 * Returns the function returns SystemLiteral and in the second 3399 * case publicID receives PubidLiteral, is strict is off 3400 * it is possible to return NULL and have publicID set. 3401 */ 3402 3403xmlChar * 3404xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3405 xmlChar *URI = NULL; 3406 3407 SHRINK; 3408 3409 *publicID = NULL; 3410 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3411 SKIP(6); 3412 if (!IS_BLANK_CH(CUR)) { 3413 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3414 "Space required after 'SYSTEM'\n"); 3415 } 3416 SKIP_BLANKS; 3417 URI = xmlParseSystemLiteral(ctxt); 3418 if (URI == NULL) { 3419 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3420 } 3421 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3422 SKIP(6); 3423 if (!IS_BLANK_CH(CUR)) { 3424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3425 "Space required after 'PUBLIC'\n"); 3426 } 3427 SKIP_BLANKS; 3428 *publicID = xmlParsePubidLiteral(ctxt); 3429 if (*publicID == NULL) { 3430 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3431 } 3432 if (strict) { 3433 /* 3434 * We don't handle [83] so "S SystemLiteral" is required. 3435 */ 3436 if (!IS_BLANK_CH(CUR)) { 3437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3438 "Space required after the Public Identifier\n"); 3439 } 3440 } else { 3441 /* 3442 * We handle [83] so we return immediately, if 3443 * "S SystemLiteral" is not detected. From a purely parsing 3444 * point of view that's a nice mess. 3445 */ 3446 const xmlChar *ptr; 3447 GROW; 3448 3449 ptr = CUR_PTR; 3450 if (!IS_BLANK_CH(*ptr)) return(NULL); 3451 3452 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3453 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3454 } 3455 SKIP_BLANKS; 3456 URI = xmlParseSystemLiteral(ctxt); 3457 if (URI == NULL) { 3458 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3459 } 3460 } 3461 return(URI); 3462} 3463 3464/** 3465 * xmlParseComment: 3466 * @ctxt: an XML parser context 3467 * 3468 * Skip an XML (SGML) comment <!-- .... --> 3469 * The spec says that "For compatibility, the string "--" (double-hyphen) 3470 * must not occur within comments. " 3471 * 3472 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3473 */ 3474void 3475xmlParseComment(xmlParserCtxtPtr ctxt) { 3476 xmlChar *buf = NULL; 3477 int len; 3478 int size = XML_PARSER_BUFFER_SIZE; 3479 int q, ql; 3480 int r, rl; 3481 int cur, l; 3482 xmlParserInputState state; 3483 xmlParserInputPtr input = ctxt->input; 3484 int count = 0; 3485 3486 /* 3487 * Check that there is a comment right here. 3488 */ 3489 if ((RAW != '<') || (NXT(1) != '!') || 3490 (NXT(2) != '-') || (NXT(3) != '-')) return; 3491 3492 state = ctxt->instate; 3493 ctxt->instate = XML_PARSER_COMMENT; 3494 SHRINK; 3495 SKIP(4); 3496 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3497 if (buf == NULL) { 3498 xmlErrMemory(ctxt, NULL); 3499 ctxt->instate = state; 3500 return; 3501 } 3502 q = CUR_CHAR(ql); 3503 if (q == 0) 3504 goto not_terminated; 3505 NEXTL(ql); 3506 r = CUR_CHAR(rl); 3507 if (r == 0) 3508 goto not_terminated; 3509 NEXTL(rl); 3510 cur = CUR_CHAR(l); 3511 if (cur == 0) 3512 goto not_terminated; 3513 len = 0; 3514 while (IS_CHAR(cur) && /* checked */ 3515 ((cur != '>') || 3516 (r != '-') || (q != '-'))) { 3517 if ((r == '-') && (q == '-')) { 3518 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 3519 } 3520 if (len + 5 >= size) { 3521 xmlChar *new_buf; 3522 size *= 2; 3523 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3524 if (new_buf == NULL) { 3525 xmlFree (buf); 3526 xmlErrMemory(ctxt, NULL); 3527 ctxt->instate = state; 3528 return; 3529 } 3530 buf = new_buf; 3531 } 3532 COPY_BUF(ql,buf,len,q); 3533 q = r; 3534 ql = rl; 3535 r = cur; 3536 rl = l; 3537 3538 count++; 3539 if (count > 50) { 3540 GROW; 3541 count = 0; 3542 } 3543 NEXTL(l); 3544 cur = CUR_CHAR(l); 3545 if (cur == 0) { 3546 SHRINK; 3547 GROW; 3548 cur = CUR_CHAR(l); 3549 } 3550 } 3551 buf[len] = 0; 3552 if (!IS_CHAR(cur)) { 3553 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3554 "Comment not terminated \n<!--%.50s\n", buf); 3555 xmlFree(buf); 3556 } else { 3557 if (input != ctxt->input) { 3558 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3559 "Comment doesn't start and stop in the same entity\n"); 3560 } 3561 NEXT; 3562 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3563 (!ctxt->disableSAX)) 3564 ctxt->sax->comment(ctxt->userData, buf); 3565 xmlFree(buf); 3566 } 3567 ctxt->instate = state; 3568 return; 3569not_terminated: 3570 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3571 "Comment not terminated\n", NULL); 3572 xmlFree(buf); 3573} 3574 3575/** 3576 * xmlParsePITarget: 3577 * @ctxt: an XML parser context 3578 * 3579 * parse the name of a PI 3580 * 3581 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3582 * 3583 * Returns the PITarget name or NULL 3584 */ 3585 3586const xmlChar * 3587xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3588 const xmlChar *name; 3589 3590 name = xmlParseName(ctxt); 3591 if ((name != NULL) && 3592 ((name[0] == 'x') || (name[0] == 'X')) && 3593 ((name[1] == 'm') || (name[1] == 'M')) && 3594 ((name[2] == 'l') || (name[2] == 'L'))) { 3595 int i; 3596 if ((name[0] == 'x') && (name[1] == 'm') && 3597 (name[2] == 'l') && (name[3] == 0)) { 3598 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3599 "XML declaration allowed only at the start of the document\n"); 3600 return(name); 3601 } else if (name[3] == 0) { 3602 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 3603 return(name); 3604 } 3605 for (i = 0;;i++) { 3606 if (xmlW3CPIs[i] == NULL) break; 3607 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 3608 return(name); 3609 } 3610 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 3611 "xmlParsePITarget: invalid name prefix 'xml'\n", 3612 NULL, NULL); 3613 } 3614 return(name); 3615} 3616 3617#ifdef LIBXML_CATALOG_ENABLED 3618/** 3619 * xmlParseCatalogPI: 3620 * @ctxt: an XML parser context 3621 * @catalog: the PI value string 3622 * 3623 * parse an XML Catalog Processing Instruction. 3624 * 3625 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 3626 * 3627 * Occurs only if allowed by the user and if happening in the Misc 3628 * part of the document before any doctype informations 3629 * This will add the given catalog to the parsing context in order 3630 * to be used if there is a resolution need further down in the document 3631 */ 3632 3633static void 3634xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 3635 xmlChar *URL = NULL; 3636 const xmlChar *tmp, *base; 3637 xmlChar marker; 3638 3639 tmp = catalog; 3640 while (IS_BLANK_CH(*tmp)) tmp++; 3641 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 3642 goto error; 3643 tmp += 7; 3644 while (IS_BLANK_CH(*tmp)) tmp++; 3645 if (*tmp != '=') { 3646 return; 3647 } 3648 tmp++; 3649 while (IS_BLANK_CH(*tmp)) tmp++; 3650 marker = *tmp; 3651 if ((marker != '\'') && (marker != '"')) 3652 goto error; 3653 tmp++; 3654 base = tmp; 3655 while ((*tmp != 0) && (*tmp != marker)) tmp++; 3656 if (*tmp == 0) 3657 goto error; 3658 URL = xmlStrndup(base, tmp - base); 3659 tmp++; 3660 while (IS_BLANK_CH(*tmp)) tmp++; 3661 if (*tmp != 0) 3662 goto error; 3663 3664 if (URL != NULL) { 3665 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 3666 xmlFree(URL); 3667 } 3668 return; 3669 3670error: 3671 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 3672 "Catalog PI syntax error: %s\n", 3673 catalog, NULL); 3674 if (URL != NULL) 3675 xmlFree(URL); 3676} 3677#endif 3678 3679/** 3680 * xmlParsePI: 3681 * @ctxt: an XML parser context 3682 * 3683 * parse an XML Processing Instruction. 3684 * 3685 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3686 * 3687 * The processing is transfered to SAX once parsed. 3688 */ 3689 3690void 3691xmlParsePI(xmlParserCtxtPtr ctxt) { 3692 xmlChar *buf = NULL; 3693 int len = 0; 3694 int size = XML_PARSER_BUFFER_SIZE; 3695 int cur, l; 3696 const xmlChar *target; 3697 xmlParserInputState state; 3698 int count = 0; 3699 3700 if ((RAW == '<') && (NXT(1) == '?')) { 3701 xmlParserInputPtr input = ctxt->input; 3702 state = ctxt->instate; 3703 ctxt->instate = XML_PARSER_PI; 3704 /* 3705 * this is a Processing Instruction. 3706 */ 3707 SKIP(2); 3708 SHRINK; 3709 3710 /* 3711 * Parse the target name and check for special support like 3712 * namespace. 3713 */ 3714 target = xmlParsePITarget(ctxt); 3715 if (target != NULL) { 3716 if ((RAW == '?') && (NXT(1) == '>')) { 3717 if (input != ctxt->input) { 3718 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3719 "PI declaration doesn't start and stop in the same entity\n"); 3720 } 3721 SKIP(2); 3722 3723 /* 3724 * SAX: PI detected. 3725 */ 3726 if ((ctxt->sax) && (!ctxt->disableSAX) && 3727 (ctxt->sax->processingInstruction != NULL)) 3728 ctxt->sax->processingInstruction(ctxt->userData, 3729 target, NULL); 3730 ctxt->instate = state; 3731 return; 3732 } 3733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3734 if (buf == NULL) { 3735 xmlErrMemory(ctxt, NULL); 3736 ctxt->instate = state; 3737 return; 3738 } 3739 cur = CUR; 3740 if (!IS_BLANK(cur)) { 3741 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 3742 "ParsePI: PI %s space expected\n", target); 3743 } 3744 SKIP_BLANKS; 3745 cur = CUR_CHAR(l); 3746 while (IS_CHAR(cur) && /* checked */ 3747 ((cur != '?') || (NXT(1) != '>'))) { 3748 if (len + 5 >= size) { 3749 xmlChar *tmp; 3750 3751 size *= 2; 3752 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3753 if (tmp == NULL) { 3754 xmlErrMemory(ctxt, NULL); 3755 xmlFree(buf); 3756 ctxt->instate = state; 3757 return; 3758 } 3759 buf = tmp; 3760 } 3761 count++; 3762 if (count > 50) { 3763 GROW; 3764 count = 0; 3765 } 3766 COPY_BUF(l,buf,len,cur); 3767 NEXTL(l); 3768 cur = CUR_CHAR(l); 3769 if (cur == 0) { 3770 SHRINK; 3771 GROW; 3772 cur = CUR_CHAR(l); 3773 } 3774 } 3775 buf[len] = 0; 3776 if (cur != '?') { 3777 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 3778 "ParsePI: PI %s never end ...\n", target); 3779 } else { 3780 if (input != ctxt->input) { 3781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3782 "PI declaration doesn't start and stop in the same entity\n"); 3783 } 3784 SKIP(2); 3785 3786#ifdef LIBXML_CATALOG_ENABLED 3787 if (((state == XML_PARSER_MISC) || 3788 (state == XML_PARSER_START)) && 3789 (xmlStrEqual(target, XML_CATALOG_PI))) { 3790 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 3791 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 3792 (allow == XML_CATA_ALLOW_ALL)) 3793 xmlParseCatalogPI(ctxt, buf); 3794 } 3795#endif 3796 3797 3798 /* 3799 * SAX: PI detected. 3800 */ 3801 if ((ctxt->sax) && (!ctxt->disableSAX) && 3802 (ctxt->sax->processingInstruction != NULL)) 3803 ctxt->sax->processingInstruction(ctxt->userData, 3804 target, buf); 3805 } 3806 xmlFree(buf); 3807 } else { 3808 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 3809 } 3810 ctxt->instate = state; 3811 } 3812} 3813 3814/** 3815 * xmlParseNotationDecl: 3816 * @ctxt: an XML parser context 3817 * 3818 * parse a notation declaration 3819 * 3820 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3821 * 3822 * Hence there is actually 3 choices: 3823 * 'PUBLIC' S PubidLiteral 3824 * 'PUBLIC' S PubidLiteral S SystemLiteral 3825 * and 'SYSTEM' S SystemLiteral 3826 * 3827 * See the NOTE on xmlParseExternalID(). 3828 */ 3829 3830void 3831xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3832 const xmlChar *name; 3833 xmlChar *Pubid; 3834 xmlChar *Systemid; 3835 3836 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 3837 xmlParserInputPtr input = ctxt->input; 3838 SHRINK; 3839 SKIP(10); 3840 if (!IS_BLANK_CH(CUR)) { 3841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3842 "Space required after '<!NOTATION'\n"); 3843 return; 3844 } 3845 SKIP_BLANKS; 3846 3847 name = xmlParseName(ctxt); 3848 if (name == NULL) { 3849 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 3850 return; 3851 } 3852 if (!IS_BLANK_CH(CUR)) { 3853 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3854 "Space required after the NOTATION name'\n"); 3855 return; 3856 } 3857 SKIP_BLANKS; 3858 3859 /* 3860 * Parse the IDs. 3861 */ 3862 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 3863 SKIP_BLANKS; 3864 3865 if (RAW == '>') { 3866 if (input != ctxt->input) { 3867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3868 "Notation declaration doesn't start and stop in the same entity\n"); 3869 } 3870 NEXT; 3871 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 3872 (ctxt->sax->notationDecl != NULL)) 3873 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3874 } else { 3875 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 3876 } 3877 if (Systemid != NULL) xmlFree(Systemid); 3878 if (Pubid != NULL) xmlFree(Pubid); 3879 } 3880} 3881 3882/** 3883 * xmlParseEntityDecl: 3884 * @ctxt: an XML parser context 3885 * 3886 * parse <!ENTITY declarations 3887 * 3888 * [70] EntityDecl ::= GEDecl | PEDecl 3889 * 3890 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3891 * 3892 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3893 * 3894 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3895 * 3896 * [74] PEDef ::= EntityValue | ExternalID 3897 * 3898 * [76] NDataDecl ::= S 'NDATA' S Name 3899 * 3900 * [ VC: Notation Declared ] 3901 * The Name must match the declared name of a notation. 3902 */ 3903 3904void 3905xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3906 const xmlChar *name = NULL; 3907 xmlChar *value = NULL; 3908 xmlChar *URI = NULL, *literal = NULL; 3909 const xmlChar *ndata = NULL; 3910 int isParameter = 0; 3911 xmlChar *orig = NULL; 3912 int skipped; 3913 3914 GROW; 3915 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 3916 xmlParserInputPtr input = ctxt->input; 3917 SHRINK; 3918 SKIP(8); 3919 skipped = SKIP_BLANKS; 3920 if (skipped == 0) { 3921 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3922 "Space required after '<!ENTITY'\n"); 3923 } 3924 3925 if (RAW == '%') { 3926 NEXT; 3927 skipped = SKIP_BLANKS; 3928 if (skipped == 0) { 3929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3930 "Space required after '%'\n"); 3931 } 3932 isParameter = 1; 3933 } 3934 3935 name = xmlParseName(ctxt); 3936 if (name == NULL) { 3937 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 3938 "xmlParseEntityDecl: no name\n"); 3939 return; 3940 } 3941 skipped = SKIP_BLANKS; 3942 if (skipped == 0) { 3943 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3944 "Space required after the entity name\n"); 3945 } 3946 3947 ctxt->instate = XML_PARSER_ENTITY_DECL; 3948 /* 3949 * handle the various case of definitions... 3950 */ 3951 if (isParameter) { 3952 if ((RAW == '"') || (RAW == '\'')) { 3953 value = xmlParseEntityValue(ctxt, &orig); 3954 if (value) { 3955 if ((ctxt->sax != NULL) && 3956 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 3957 ctxt->sax->entityDecl(ctxt->userData, name, 3958 XML_INTERNAL_PARAMETER_ENTITY, 3959 NULL, NULL, value); 3960 } 3961 } else { 3962 URI = xmlParseExternalID(ctxt, &literal, 1); 3963 if ((URI == NULL) && (literal == NULL)) { 3964 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 3965 } 3966 if (URI) { 3967 xmlURIPtr uri; 3968 3969 uri = xmlParseURI((const char *) URI); 3970 if (uri == NULL) { 3971 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 3972 "Invalid URI: %s\n", URI); 3973 /* 3974 * This really ought to be a well formedness error 3975 * but the XML Core WG decided otherwise c.f. issue 3976 * E26 of the XML erratas. 3977 */ 3978 } else { 3979 if (uri->fragment != NULL) { 3980 /* 3981 * Okay this is foolish to block those but not 3982 * invalid URIs. 3983 */ 3984 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 3985 } else { 3986 if ((ctxt->sax != NULL) && 3987 (!ctxt->disableSAX) && 3988 (ctxt->sax->entityDecl != NULL)) 3989 ctxt->sax->entityDecl(ctxt->userData, name, 3990 XML_EXTERNAL_PARAMETER_ENTITY, 3991 literal, URI, NULL); 3992 } 3993 xmlFreeURI(uri); 3994 } 3995 } 3996 } 3997 } else { 3998 if ((RAW == '"') || (RAW == '\'')) { 3999 value = xmlParseEntityValue(ctxt, &orig); 4000 if ((ctxt->sax != NULL) && 4001 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4002 ctxt->sax->entityDecl(ctxt->userData, name, 4003 XML_INTERNAL_GENERAL_ENTITY, 4004 NULL, NULL, value); 4005 /* 4006 * For expat compatibility in SAX mode. 4007 */ 4008 if ((ctxt->myDoc == NULL) || 4009 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4010 if (ctxt->myDoc == NULL) { 4011 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4012 } 4013 if (ctxt->myDoc->intSubset == NULL) 4014 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4015 BAD_CAST "fake", NULL, NULL); 4016 4017 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4018 NULL, NULL, value); 4019 } 4020 } else { 4021 URI = xmlParseExternalID(ctxt, &literal, 1); 4022 if ((URI == NULL) && (literal == NULL)) { 4023 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4024 } 4025 if (URI) { 4026 xmlURIPtr uri; 4027 4028 uri = xmlParseURI((const char *)URI); 4029 if (uri == NULL) { 4030 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4031 "Invalid URI: %s\n", URI); 4032 /* 4033 * This really ought to be a well formedness error 4034 * but the XML Core WG decided otherwise c.f. issue 4035 * E26 of the XML erratas. 4036 */ 4037 } else { 4038 if (uri->fragment != NULL) { 4039 /* 4040 * Okay this is foolish to block those but not 4041 * invalid URIs. 4042 */ 4043 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4044 } 4045 xmlFreeURI(uri); 4046 } 4047 } 4048 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 4049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4050 "Space required before 'NDATA'\n"); 4051 } 4052 SKIP_BLANKS; 4053 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 4054 SKIP(5); 4055 if (!IS_BLANK_CH(CUR)) { 4056 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4057 "Space required after 'NDATA'\n"); 4058 } 4059 SKIP_BLANKS; 4060 ndata = xmlParseName(ctxt); 4061 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4062 (ctxt->sax->unparsedEntityDecl != NULL)) 4063 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4064 literal, URI, ndata); 4065 } else { 4066 if ((ctxt->sax != NULL) && 4067 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4068 ctxt->sax->entityDecl(ctxt->userData, name, 4069 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4070 literal, URI, NULL); 4071 /* 4072 * For expat compatibility in SAX mode. 4073 * assuming the entity repalcement was asked for 4074 */ 4075 if ((ctxt->replaceEntities != 0) && 4076 ((ctxt->myDoc == NULL) || 4077 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4078 if (ctxt->myDoc == NULL) { 4079 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4080 } 4081 4082 if (ctxt->myDoc->intSubset == NULL) 4083 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4084 BAD_CAST "fake", NULL, NULL); 4085 xmlSAX2EntityDecl(ctxt, name, 4086 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4087 literal, URI, NULL); 4088 } 4089 } 4090 } 4091 } 4092 SKIP_BLANKS; 4093 if (RAW != '>') { 4094 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4095 "xmlParseEntityDecl: entity %s not terminated\n", name); 4096 } else { 4097 if (input != ctxt->input) { 4098 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4099 "Entity declaration doesn't start and stop in the same entity\n"); 4100 } 4101 NEXT; 4102 } 4103 if (orig != NULL) { 4104 /* 4105 * Ugly mechanism to save the raw entity value. 4106 */ 4107 xmlEntityPtr cur = NULL; 4108 4109 if (isParameter) { 4110 if ((ctxt->sax != NULL) && 4111 (ctxt->sax->getParameterEntity != NULL)) 4112 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4113 } else { 4114 if ((ctxt->sax != NULL) && 4115 (ctxt->sax->getEntity != NULL)) 4116 cur = ctxt->sax->getEntity(ctxt->userData, name); 4117 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4118 cur = xmlSAX2GetEntity(ctxt, name); 4119 } 4120 } 4121 if (cur != NULL) { 4122 if (cur->orig != NULL) 4123 xmlFree(orig); 4124 else 4125 cur->orig = orig; 4126 } else 4127 xmlFree(orig); 4128 } 4129 if (value != NULL) xmlFree(value); 4130 if (URI != NULL) xmlFree(URI); 4131 if (literal != NULL) xmlFree(literal); 4132 } 4133} 4134 4135/** 4136 * xmlParseDefaultDecl: 4137 * @ctxt: an XML parser context 4138 * @value: Receive a possible fixed default value for the attribute 4139 * 4140 * Parse an attribute default declaration 4141 * 4142 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4143 * 4144 * [ VC: Required Attribute ] 4145 * if the default declaration is the keyword #REQUIRED, then the 4146 * attribute must be specified for all elements of the type in the 4147 * attribute-list declaration. 4148 * 4149 * [ VC: Attribute Default Legal ] 4150 * The declared default value must meet the lexical constraints of 4151 * the declared attribute type c.f. xmlValidateAttributeDecl() 4152 * 4153 * [ VC: Fixed Attribute Default ] 4154 * if an attribute has a default value declared with the #FIXED 4155 * keyword, instances of that attribute must match the default value. 4156 * 4157 * [ WFC: No < in Attribute Values ] 4158 * handled in xmlParseAttValue() 4159 * 4160 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4161 * or XML_ATTRIBUTE_FIXED. 4162 */ 4163 4164int 4165xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4166 int val; 4167 xmlChar *ret; 4168 4169 *value = NULL; 4170 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4171 SKIP(9); 4172 return(XML_ATTRIBUTE_REQUIRED); 4173 } 4174 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4175 SKIP(8); 4176 return(XML_ATTRIBUTE_IMPLIED); 4177 } 4178 val = XML_ATTRIBUTE_NONE; 4179 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4180 SKIP(6); 4181 val = XML_ATTRIBUTE_FIXED; 4182 if (!IS_BLANK_CH(CUR)) { 4183 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4184 "Space required after '#FIXED'\n"); 4185 } 4186 SKIP_BLANKS; 4187 } 4188 ret = xmlParseAttValue(ctxt); 4189 ctxt->instate = XML_PARSER_DTD; 4190 if (ret == NULL) { 4191 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4192 "Attribute default value declaration error\n"); 4193 } else 4194 *value = ret; 4195 return(val); 4196} 4197 4198/** 4199 * xmlParseNotationType: 4200 * @ctxt: an XML parser context 4201 * 4202 * parse an Notation attribute type. 4203 * 4204 * Note: the leading 'NOTATION' S part has already being parsed... 4205 * 4206 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4207 * 4208 * [ VC: Notation Attributes ] 4209 * Values of this type must match one of the notation names included 4210 * in the declaration; all notation names in the declaration must be declared. 4211 * 4212 * Returns: the notation attribute tree built while parsing 4213 */ 4214 4215xmlEnumerationPtr 4216xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4217 const xmlChar *name; 4218 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4219 4220 if (RAW != '(') { 4221 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4222 return(NULL); 4223 } 4224 SHRINK; 4225 do { 4226 NEXT; 4227 SKIP_BLANKS; 4228 name = xmlParseName(ctxt); 4229 if (name == NULL) { 4230 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4231 "Name expected in NOTATION declaration\n"); 4232 return(ret); 4233 } 4234 cur = xmlCreateEnumeration(name); 4235 if (cur == NULL) return(ret); 4236 if (last == NULL) ret = last = cur; 4237 else { 4238 last->next = cur; 4239 last = cur; 4240 } 4241 SKIP_BLANKS; 4242 } while (RAW == '|'); 4243 if (RAW != ')') { 4244 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4245 if ((last != NULL) && (last != ret)) 4246 xmlFreeEnumeration(last); 4247 return(ret); 4248 } 4249 NEXT; 4250 return(ret); 4251} 4252 4253/** 4254 * xmlParseEnumerationType: 4255 * @ctxt: an XML parser context 4256 * 4257 * parse an Enumeration attribute type. 4258 * 4259 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4260 * 4261 * [ VC: Enumeration ] 4262 * Values of this type must match one of the Nmtoken tokens in 4263 * the declaration 4264 * 4265 * Returns: the enumeration attribute tree built while parsing 4266 */ 4267 4268xmlEnumerationPtr 4269xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4270 xmlChar *name; 4271 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4272 4273 if (RAW != '(') { 4274 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4275 return(NULL); 4276 } 4277 SHRINK; 4278 do { 4279 NEXT; 4280 SKIP_BLANKS; 4281 name = xmlParseNmtoken(ctxt); 4282 if (name == NULL) { 4283 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4284 return(ret); 4285 } 4286 cur = xmlCreateEnumeration(name); 4287 xmlFree(name); 4288 if (cur == NULL) return(ret); 4289 if (last == NULL) ret = last = cur; 4290 else { 4291 last->next = cur; 4292 last = cur; 4293 } 4294 SKIP_BLANKS; 4295 } while (RAW == '|'); 4296 if (RAW != ')') { 4297 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4298 return(ret); 4299 } 4300 NEXT; 4301 return(ret); 4302} 4303 4304/** 4305 * xmlParseEnumeratedType: 4306 * @ctxt: an XML parser context 4307 * @tree: the enumeration tree built while parsing 4308 * 4309 * parse an Enumerated attribute type. 4310 * 4311 * [57] EnumeratedType ::= NotationType | Enumeration 4312 * 4313 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4314 * 4315 * 4316 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4317 */ 4318 4319int 4320xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4321 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4322 SKIP(8); 4323 if (!IS_BLANK_CH(CUR)) { 4324 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4325 "Space required after 'NOTATION'\n"); 4326 return(0); 4327 } 4328 SKIP_BLANKS; 4329 *tree = xmlParseNotationType(ctxt); 4330 if (*tree == NULL) return(0); 4331 return(XML_ATTRIBUTE_NOTATION); 4332 } 4333 *tree = xmlParseEnumerationType(ctxt); 4334 if (*tree == NULL) return(0); 4335 return(XML_ATTRIBUTE_ENUMERATION); 4336} 4337 4338/** 4339 * xmlParseAttributeType: 4340 * @ctxt: an XML parser context 4341 * @tree: the enumeration tree built while parsing 4342 * 4343 * parse the Attribute list def for an element 4344 * 4345 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4346 * 4347 * [55] StringType ::= 'CDATA' 4348 * 4349 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4350 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4351 * 4352 * Validity constraints for attribute values syntax are checked in 4353 * xmlValidateAttributeValue() 4354 * 4355 * [ VC: ID ] 4356 * Values of type ID must match the Name production. A name must not 4357 * appear more than once in an XML document as a value of this type; 4358 * i.e., ID values must uniquely identify the elements which bear them. 4359 * 4360 * [ VC: One ID per Element Type ] 4361 * No element type may have more than one ID attribute specified. 4362 * 4363 * [ VC: ID Attribute Default ] 4364 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4365 * 4366 * [ VC: IDREF ] 4367 * Values of type IDREF must match the Name production, and values 4368 * of type IDREFS must match Names; each IDREF Name must match the value 4369 * of an ID attribute on some element in the XML document; i.e. IDREF 4370 * values must match the value of some ID attribute. 4371 * 4372 * [ VC: Entity Name ] 4373 * Values of type ENTITY must match the Name production, values 4374 * of type ENTITIES must match Names; each Entity Name must match the 4375 * name of an unparsed entity declared in the DTD. 4376 * 4377 * [ VC: Name Token ] 4378 * Values of type NMTOKEN must match the Nmtoken production; values 4379 * of type NMTOKENS must match Nmtokens. 4380 * 4381 * Returns the attribute type 4382 */ 4383int 4384xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4385 SHRINK; 4386 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 4387 SKIP(5); 4388 return(XML_ATTRIBUTE_CDATA); 4389 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 4390 SKIP(6); 4391 return(XML_ATTRIBUTE_IDREFS); 4392 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 4393 SKIP(5); 4394 return(XML_ATTRIBUTE_IDREF); 4395 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4396 SKIP(2); 4397 return(XML_ATTRIBUTE_ID); 4398 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 4399 SKIP(6); 4400 return(XML_ATTRIBUTE_ENTITY); 4401 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 4402 SKIP(8); 4403 return(XML_ATTRIBUTE_ENTITIES); 4404 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 4405 SKIP(8); 4406 return(XML_ATTRIBUTE_NMTOKENS); 4407 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 4408 SKIP(7); 4409 return(XML_ATTRIBUTE_NMTOKEN); 4410 } 4411 return(xmlParseEnumeratedType(ctxt, tree)); 4412} 4413 4414/** 4415 * xmlParseAttributeListDecl: 4416 * @ctxt: an XML parser context 4417 * 4418 * : parse the Attribute list def for an element 4419 * 4420 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4421 * 4422 * [53] AttDef ::= S Name S AttType S DefaultDecl 4423 * 4424 */ 4425void 4426xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4427 const xmlChar *elemName; 4428 const xmlChar *attrName; 4429 xmlEnumerationPtr tree; 4430 4431 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 4432 xmlParserInputPtr input = ctxt->input; 4433 4434 SKIP(9); 4435 if (!IS_BLANK_CH(CUR)) { 4436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4437 "Space required after '<!ATTLIST'\n"); 4438 } 4439 SKIP_BLANKS; 4440 elemName = xmlParseName(ctxt); 4441 if (elemName == NULL) { 4442 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4443 "ATTLIST: no name for Element\n"); 4444 return; 4445 } 4446 SKIP_BLANKS; 4447 GROW; 4448 while (RAW != '>') { 4449 const xmlChar *check = CUR_PTR; 4450 int type; 4451 int def; 4452 xmlChar *defaultValue = NULL; 4453 4454 GROW; 4455 tree = NULL; 4456 attrName = xmlParseName(ctxt); 4457 if (attrName == NULL) { 4458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4459 "ATTLIST: no name for Attribute\n"); 4460 break; 4461 } 4462 GROW; 4463 if (!IS_BLANK_CH(CUR)) { 4464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4465 "Space required after the attribute name\n"); 4466 if (defaultValue != NULL) 4467 xmlFree(defaultValue); 4468 break; 4469 } 4470 SKIP_BLANKS; 4471 4472 type = xmlParseAttributeType(ctxt, &tree); 4473 if (type <= 0) { 4474 if (defaultValue != NULL) 4475 xmlFree(defaultValue); 4476 break; 4477 } 4478 4479 GROW; 4480 if (!IS_BLANK_CH(CUR)) { 4481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4482 "Space required after the attribute type\n"); 4483 if (defaultValue != NULL) 4484 xmlFree(defaultValue); 4485 if (tree != NULL) 4486 xmlFreeEnumeration(tree); 4487 break; 4488 } 4489 SKIP_BLANKS; 4490 4491 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4492 if (def <= 0) { 4493 if (defaultValue != NULL) 4494 xmlFree(defaultValue); 4495 if (tree != NULL) 4496 xmlFreeEnumeration(tree); 4497 break; 4498 } 4499 4500 GROW; 4501 if (RAW != '>') { 4502 if (!IS_BLANK_CH(CUR)) { 4503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4504 "Space required after the attribute default value\n"); 4505 if (defaultValue != NULL) 4506 xmlFree(defaultValue); 4507 if (tree != NULL) 4508 xmlFreeEnumeration(tree); 4509 break; 4510 } 4511 SKIP_BLANKS; 4512 } 4513 if (check == CUR_PTR) { 4514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 4515 "in xmlParseAttributeListDecl\n"); 4516 if (defaultValue != NULL) 4517 xmlFree(defaultValue); 4518 if (tree != NULL) 4519 xmlFreeEnumeration(tree); 4520 break; 4521 } 4522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4523 (ctxt->sax->attributeDecl != NULL)) 4524 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4525 type, def, defaultValue, tree); 4526 else if (tree != NULL) 4527 xmlFreeEnumeration(tree); 4528 4529 if ((ctxt->sax2) && (defaultValue != NULL) && 4530 (def != XML_ATTRIBUTE_IMPLIED) && 4531 (def != XML_ATTRIBUTE_REQUIRED)) { 4532 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 4533 } 4534 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { 4535 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 4536 } 4537 if (defaultValue != NULL) 4538 xmlFree(defaultValue); 4539 GROW; 4540 } 4541 if (RAW == '>') { 4542 if (input != ctxt->input) { 4543 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4544 "Attribute list declaration doesn't start and stop in the same entity\n"); 4545 } 4546 NEXT; 4547 } 4548 } 4549} 4550 4551/** 4552 * xmlParseElementMixedContentDecl: 4553 * @ctxt: an XML parser context 4554 * @inputchk: the input used for the current entity, needed for boundary checks 4555 * 4556 * parse the declaration for a Mixed Element content 4557 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4558 * 4559 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4560 * '(' S? '#PCDATA' S? ')' 4561 * 4562 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4563 * 4564 * [ VC: No Duplicate Types ] 4565 * The same name must not appear more than once in a single 4566 * mixed-content declaration. 4567 * 4568 * returns: the list of the xmlElementContentPtr describing the element choices 4569 */ 4570xmlElementContentPtr 4571xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 4572 xmlElementContentPtr ret = NULL, cur = NULL, n; 4573 const xmlChar *elem = NULL; 4574 4575 GROW; 4576 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 4577 SKIP(7); 4578 SKIP_BLANKS; 4579 SHRINK; 4580 if (RAW == ')') { 4581 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4582 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4583"Element content declaration doesn't start and stop in the same entity\n", 4584 NULL); 4585 } 4586 NEXT; 4587 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4588 if (RAW == '*') { 4589 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4590 NEXT; 4591 } 4592 return(ret); 4593 } 4594 if ((RAW == '(') || (RAW == '|')) { 4595 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4596 if (ret == NULL) return(NULL); 4597 } 4598 while (RAW == '|') { 4599 NEXT; 4600 if (elem == NULL) { 4601 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4602 if (ret == NULL) return(NULL); 4603 ret->c1 = cur; 4604 if (cur != NULL) 4605 cur->parent = ret; 4606 cur = ret; 4607 } else { 4608 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4609 if (n == NULL) return(NULL); 4610 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4611 if (n->c1 != NULL) 4612 n->c1->parent = n; 4613 cur->c2 = n; 4614 if (n != NULL) 4615 n->parent = cur; 4616 cur = n; 4617 } 4618 SKIP_BLANKS; 4619 elem = xmlParseName(ctxt); 4620 if (elem == NULL) { 4621 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4622 "xmlParseElementMixedContentDecl : Name expected\n"); 4623 xmlFreeElementContent(cur); 4624 return(NULL); 4625 } 4626 SKIP_BLANKS; 4627 GROW; 4628 } 4629 if ((RAW == ')') && (NXT(1) == '*')) { 4630 if (elem != NULL) { 4631 cur->c2 = xmlNewElementContent(elem, 4632 XML_ELEMENT_CONTENT_ELEMENT); 4633 if (cur->c2 != NULL) 4634 cur->c2->parent = cur; 4635 } 4636 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4637 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4638 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4639"Element content declaration doesn't start and stop in the same entity\n", 4640 NULL); 4641 } 4642 SKIP(2); 4643 } else { 4644 xmlFreeElementContent(ret); 4645 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 4646 return(NULL); 4647 } 4648 4649 } else { 4650 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 4651 } 4652 return(ret); 4653} 4654 4655/** 4656 * xmlParseElementChildrenContentDecl: 4657 * @ctxt: an XML parser context 4658 * @inputchk: the input used for the current entity, needed for boundary checks 4659 * 4660 * parse the declaration for a Mixed Element content 4661 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4662 * 4663 * 4664 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4665 * 4666 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4667 * 4668 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4669 * 4670 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4671 * 4672 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4673 * TODO Parameter-entity replacement text must be properly nested 4674 * with parenthesized groups. That is to say, if either of the 4675 * opening or closing parentheses in a choice, seq, or Mixed 4676 * construct is contained in the replacement text for a parameter 4677 * entity, both must be contained in the same replacement text. For 4678 * interoperability, if a parameter-entity reference appears in a 4679 * choice, seq, or Mixed construct, its replacement text should not 4680 * be empty, and neither the first nor last non-blank character of 4681 * the replacement text should be a connector (| or ,). 4682 * 4683 * Returns the tree of xmlElementContentPtr describing the element 4684 * hierarchy. 4685 */ 4686xmlElementContentPtr 4687xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 4688 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4689 const xmlChar *elem; 4690 xmlChar type = 0; 4691 4692 SKIP_BLANKS; 4693 GROW; 4694 if (RAW == '(') { 4695 int inputid = ctxt->input->id; 4696 4697 /* Recurse on first child */ 4698 NEXT; 4699 SKIP_BLANKS; 4700 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 4701 SKIP_BLANKS; 4702 GROW; 4703 } else { 4704 elem = xmlParseName(ctxt); 4705 if (elem == NULL) { 4706 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 4707 return(NULL); 4708 } 4709 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4710 if (cur == NULL) { 4711 xmlErrMemory(ctxt, NULL); 4712 return(NULL); 4713 } 4714 GROW; 4715 if (RAW == '?') { 4716 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4717 NEXT; 4718 } else if (RAW == '*') { 4719 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4720 NEXT; 4721 } else if (RAW == '+') { 4722 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4723 NEXT; 4724 } else { 4725 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4726 } 4727 GROW; 4728 } 4729 SKIP_BLANKS; 4730 SHRINK; 4731 while (RAW != ')') { 4732 /* 4733 * Each loop we parse one separator and one element. 4734 */ 4735 if (RAW == ',') { 4736 if (type == 0) type = CUR; 4737 4738 /* 4739 * Detect "Name | Name , Name" error 4740 */ 4741 else if (type != CUR) { 4742 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4743 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4744 type); 4745 if ((last != NULL) && (last != ret)) 4746 xmlFreeElementContent(last); 4747 if (ret != NULL) 4748 xmlFreeElementContent(ret); 4749 return(NULL); 4750 } 4751 NEXT; 4752 4753 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4754 if (op == NULL) { 4755 if ((last != NULL) && (last != ret)) 4756 xmlFreeElementContent(last); 4757 xmlFreeElementContent(ret); 4758 return(NULL); 4759 } 4760 if (last == NULL) { 4761 op->c1 = ret; 4762 if (ret != NULL) 4763 ret->parent = op; 4764 ret = cur = op; 4765 } else { 4766 cur->c2 = op; 4767 if (op != NULL) 4768 op->parent = cur; 4769 op->c1 = last; 4770 if (last != NULL) 4771 last->parent = op; 4772 cur =op; 4773 last = NULL; 4774 } 4775 } else if (RAW == '|') { 4776 if (type == 0) type = CUR; 4777 4778 /* 4779 * Detect "Name , Name | Name" error 4780 */ 4781 else if (type != CUR) { 4782 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 4783 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4784 type); 4785 if ((last != NULL) && (last != ret)) 4786 xmlFreeElementContent(last); 4787 if (ret != NULL) 4788 xmlFreeElementContent(ret); 4789 return(NULL); 4790 } 4791 NEXT; 4792 4793 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4794 if (op == NULL) { 4795 if ((last != NULL) && (last != ret)) 4796 xmlFreeElementContent(last); 4797 if (ret != NULL) 4798 xmlFreeElementContent(ret); 4799 return(NULL); 4800 } 4801 if (last == NULL) { 4802 op->c1 = ret; 4803 if (ret != NULL) 4804 ret->parent = op; 4805 ret = cur = op; 4806 } else { 4807 cur->c2 = op; 4808 if (op != NULL) 4809 op->parent = cur; 4810 op->c1 = last; 4811 if (last != NULL) 4812 last->parent = op; 4813 cur =op; 4814 last = NULL; 4815 } 4816 } else { 4817 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 4818 if (ret != NULL) 4819 xmlFreeElementContent(ret); 4820 return(NULL); 4821 } 4822 GROW; 4823 SKIP_BLANKS; 4824 GROW; 4825 if (RAW == '(') { 4826 int inputid = ctxt->input->id; 4827 /* Recurse on second child */ 4828 NEXT; 4829 SKIP_BLANKS; 4830 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 4831 SKIP_BLANKS; 4832 } else { 4833 elem = xmlParseName(ctxt); 4834 if (elem == NULL) { 4835 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 4836 if (ret != NULL) 4837 xmlFreeElementContent(ret); 4838 return(NULL); 4839 } 4840 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4841 if (RAW == '?') { 4842 last->ocur = XML_ELEMENT_CONTENT_OPT; 4843 NEXT; 4844 } else if (RAW == '*') { 4845 last->ocur = XML_ELEMENT_CONTENT_MULT; 4846 NEXT; 4847 } else if (RAW == '+') { 4848 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4849 NEXT; 4850 } else { 4851 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4852 } 4853 } 4854 SKIP_BLANKS; 4855 GROW; 4856 } 4857 if ((cur != NULL) && (last != NULL)) { 4858 cur->c2 = last; 4859 if (last != NULL) 4860 last->parent = cur; 4861 } 4862 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 4863 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 4864"Element content declaration doesn't start and stop in the same entity\n", 4865 NULL); 4866 } 4867 NEXT; 4868 if (RAW == '?') { 4869 if (ret != NULL) { 4870 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 4871 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 4872 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4873 else 4874 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4875 } 4876 NEXT; 4877 } else if (RAW == '*') { 4878 if (ret != NULL) { 4879 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4880 cur = ret; 4881 /* 4882 * Some normalization: 4883 * (a | b* | c?)* == (a | b | c)* 4884 */ 4885 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4886 if ((cur->c1 != NULL) && 4887 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4888 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 4889 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4890 if ((cur->c2 != NULL) && 4891 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4892 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 4893 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4894 cur = cur->c2; 4895 } 4896 } 4897 NEXT; 4898 } else if (RAW == '+') { 4899 if (ret != NULL) { 4900 int found = 0; 4901 4902 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 4903 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 4904 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4905 else 4906 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4907 /* 4908 * Some normalization: 4909 * (a | b*)+ == (a | b)* 4910 * (a | b?)+ == (a | b)* 4911 */ 4912 while (cur->type == XML_ELEMENT_CONTENT_OR) { 4913 if ((cur->c1 != NULL) && 4914 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 4915 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 4916 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 4917 found = 1; 4918 } 4919 if ((cur->c2 != NULL) && 4920 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 4921 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 4922 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 4923 found = 1; 4924 } 4925 cur = cur->c2; 4926 } 4927 if (found) 4928 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4929 } 4930 NEXT; 4931 } 4932 return(ret); 4933} 4934 4935/** 4936 * xmlParseElementContentDecl: 4937 * @ctxt: an XML parser context 4938 * @name: the name of the element being defined. 4939 * @result: the Element Content pointer will be stored here if any 4940 * 4941 * parse the declaration for an Element content either Mixed or Children, 4942 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4943 * 4944 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4945 * 4946 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4947 */ 4948 4949int 4950xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 4951 xmlElementContentPtr *result) { 4952 4953 xmlElementContentPtr tree = NULL; 4954 int inputid = ctxt->input->id; 4955 int res; 4956 4957 *result = NULL; 4958 4959 if (RAW != '(') { 4960 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 4961 "xmlParseElementContentDecl : %s '(' expected\n", name); 4962 return(-1); 4963 } 4964 NEXT; 4965 GROW; 4966 SKIP_BLANKS; 4967 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 4968 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 4969 res = XML_ELEMENT_TYPE_MIXED; 4970 } else { 4971 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 4972 res = XML_ELEMENT_TYPE_ELEMENT; 4973 } 4974 SKIP_BLANKS; 4975 *result = tree; 4976 return(res); 4977} 4978 4979/** 4980 * xmlParseElementDecl: 4981 * @ctxt: an XML parser context 4982 * 4983 * parse an Element declaration. 4984 * 4985 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4986 * 4987 * [ VC: Unique Element Type Declaration ] 4988 * No element type may be declared more than once 4989 * 4990 * Returns the type of the element, or -1 in case of error 4991 */ 4992int 4993xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4994 const xmlChar *name; 4995 int ret = -1; 4996 xmlElementContentPtr content = NULL; 4997 4998 GROW; 4999 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 5000 xmlParserInputPtr input = ctxt->input; 5001 5002 SKIP(9); 5003 if (!IS_BLANK_CH(CUR)) { 5004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5005 "Space required after 'ELEMENT'\n"); 5006 } 5007 SKIP_BLANKS; 5008 name = xmlParseName(ctxt); 5009 if (name == NULL) { 5010 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5011 "xmlParseElementDecl: no name for Element\n"); 5012 return(-1); 5013 } 5014 while ((RAW == 0) && (ctxt->inputNr > 1)) 5015 xmlPopInput(ctxt); 5016 if (!IS_BLANK_CH(CUR)) { 5017 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5018 "Space required after the element name\n"); 5019 } 5020 SKIP_BLANKS; 5021 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 5022 SKIP(5); 5023 /* 5024 * Element must always be empty. 5025 */ 5026 ret = XML_ELEMENT_TYPE_EMPTY; 5027 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5028 (NXT(2) == 'Y')) { 5029 SKIP(3); 5030 /* 5031 * Element is a generic container. 5032 */ 5033 ret = XML_ELEMENT_TYPE_ANY; 5034 } else if (RAW == '(') { 5035 ret = xmlParseElementContentDecl(ctxt, name, &content); 5036 } else { 5037 /* 5038 * [ WFC: PEs in Internal Subset ] error handling. 5039 */ 5040 if ((RAW == '%') && (ctxt->external == 0) && 5041 (ctxt->inputNr == 1)) { 5042 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 5043 "PEReference: forbidden within markup decl in internal subset\n"); 5044 } else { 5045 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5046 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5047 } 5048 return(-1); 5049 } 5050 5051 SKIP_BLANKS; 5052 /* 5053 * Pop-up of finished entities. 5054 */ 5055 while ((RAW == 0) && (ctxt->inputNr > 1)) 5056 xmlPopInput(ctxt); 5057 SKIP_BLANKS; 5058 5059 if (RAW != '>') { 5060 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5061 } else { 5062 if (input != ctxt->input) { 5063 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5064 "Element declaration doesn't start and stop in the same entity\n"); 5065 } 5066 5067 NEXT; 5068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5069 (ctxt->sax->elementDecl != NULL)) 5070 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5071 content); 5072 } 5073 if (content != NULL) { 5074 xmlFreeElementContent(content); 5075 } 5076 } 5077 return(ret); 5078} 5079 5080/** 5081 * xmlParseConditionalSections 5082 * @ctxt: an XML parser context 5083 * 5084 * [61] conditionalSect ::= includeSect | ignoreSect 5085 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5086 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5087 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5088 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5089 */ 5090 5091static void 5092xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5093 SKIP(3); 5094 SKIP_BLANKS; 5095 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 5096 SKIP(7); 5097 SKIP_BLANKS; 5098 if (RAW != '[') { 5099 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5100 } else { 5101 NEXT; 5102 } 5103 if (xmlParserDebugEntities) { 5104 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5105 xmlGenericError(xmlGenericErrorContext, 5106 "%s(%d): ", ctxt->input->filename, 5107 ctxt->input->line); 5108 xmlGenericError(xmlGenericErrorContext, 5109 "Entering INCLUDE Conditional Section\n"); 5110 } 5111 5112 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5113 (NXT(2) != '>'))) { 5114 const xmlChar *check = CUR_PTR; 5115 unsigned int cons = ctxt->input->consumed; 5116 5117 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5118 xmlParseConditionalSections(ctxt); 5119 } else if (IS_BLANK_CH(CUR)) { 5120 NEXT; 5121 } else if (RAW == '%') { 5122 xmlParsePEReference(ctxt); 5123 } else 5124 xmlParseMarkupDecl(ctxt); 5125 5126 /* 5127 * Pop-up of finished entities. 5128 */ 5129 while ((RAW == 0) && (ctxt->inputNr > 1)) 5130 xmlPopInput(ctxt); 5131 5132 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5133 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5134 break; 5135 } 5136 } 5137 if (xmlParserDebugEntities) { 5138 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5139 xmlGenericError(xmlGenericErrorContext, 5140 "%s(%d): ", ctxt->input->filename, 5141 ctxt->input->line); 5142 xmlGenericError(xmlGenericErrorContext, 5143 "Leaving INCLUDE Conditional Section\n"); 5144 } 5145 5146 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5147 int state; 5148 xmlParserInputState instate; 5149 int depth = 0; 5150 5151 SKIP(6); 5152 SKIP_BLANKS; 5153 if (RAW != '[') { 5154 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5155 } else { 5156 NEXT; 5157 } 5158 if (xmlParserDebugEntities) { 5159 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5160 xmlGenericError(xmlGenericErrorContext, 5161 "%s(%d): ", ctxt->input->filename, 5162 ctxt->input->line); 5163 xmlGenericError(xmlGenericErrorContext, 5164 "Entering IGNORE Conditional Section\n"); 5165 } 5166 5167 /* 5168 * Parse up to the end of the conditional section 5169 * But disable SAX event generating DTD building in the meantime 5170 */ 5171 state = ctxt->disableSAX; 5172 instate = ctxt->instate; 5173 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5174 ctxt->instate = XML_PARSER_IGNORE; 5175 5176 while ((depth >= 0) && (RAW != 0)) { 5177 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5178 depth++; 5179 SKIP(3); 5180 continue; 5181 } 5182 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5183 if (--depth >= 0) SKIP(3); 5184 continue; 5185 } 5186 NEXT; 5187 continue; 5188 } 5189 5190 ctxt->disableSAX = state; 5191 ctxt->instate = instate; 5192 5193 if (xmlParserDebugEntities) { 5194 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5195 xmlGenericError(xmlGenericErrorContext, 5196 "%s(%d): ", ctxt->input->filename, 5197 ctxt->input->line); 5198 xmlGenericError(xmlGenericErrorContext, 5199 "Leaving IGNORE Conditional Section\n"); 5200 } 5201 5202 } else { 5203 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5204 } 5205 5206 if (RAW == 0) 5207 SHRINK; 5208 5209 if (RAW == 0) { 5210 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5211 } else { 5212 SKIP(3); 5213 } 5214} 5215 5216/** 5217 * xmlParseMarkupDecl: 5218 * @ctxt: an XML parser context 5219 * 5220 * parse Markup declarations 5221 * 5222 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5223 * NotationDecl | PI | Comment 5224 * 5225 * [ VC: Proper Declaration/PE Nesting ] 5226 * Parameter-entity replacement text must be properly nested with 5227 * markup declarations. That is to say, if either the first character 5228 * or the last character of a markup declaration (markupdecl above) is 5229 * contained in the replacement text for a parameter-entity reference, 5230 * both must be contained in the same replacement text. 5231 * 5232 * [ WFC: PEs in Internal Subset ] 5233 * In the internal DTD subset, parameter-entity references can occur 5234 * only where markup declarations can occur, not within markup declarations. 5235 * (This does not apply to references that occur in external parameter 5236 * entities or to the external subset.) 5237 */ 5238void 5239xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5240 GROW; 5241 xmlParseElementDecl(ctxt); 5242 xmlParseAttributeListDecl(ctxt); 5243 xmlParseEntityDecl(ctxt); 5244 xmlParseNotationDecl(ctxt); 5245 xmlParsePI(ctxt); 5246 xmlParseComment(ctxt); 5247 /* 5248 * This is only for internal subset. On external entities, 5249 * the replacement is done before parsing stage 5250 */ 5251 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5252 xmlParsePEReference(ctxt); 5253 5254 /* 5255 * Conditional sections are allowed from entities included 5256 * by PE References in the internal subset. 5257 */ 5258 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5259 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5260 xmlParseConditionalSections(ctxt); 5261 } 5262 } 5263 5264 ctxt->instate = XML_PARSER_DTD; 5265} 5266 5267/** 5268 * xmlParseTextDecl: 5269 * @ctxt: an XML parser context 5270 * 5271 * parse an XML declaration header for external entities 5272 * 5273 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5274 * 5275 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5276 */ 5277 5278void 5279xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5280 xmlChar *version; 5281 const xmlChar *encoding; 5282 5283 /* 5284 * We know that '<?xml' is here. 5285 */ 5286 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5287 SKIP(5); 5288 } else { 5289 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5290 return; 5291 } 5292 5293 if (!IS_BLANK_CH(CUR)) { 5294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5295 "Space needed after '<?xml'\n"); 5296 } 5297 SKIP_BLANKS; 5298 5299 /* 5300 * We may have the VersionInfo here. 5301 */ 5302 version = xmlParseVersionInfo(ctxt); 5303 if (version == NULL) 5304 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5305 else { 5306 if (!IS_BLANK_CH(CUR)) { 5307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5308 "Space needed here\n"); 5309 } 5310 } 5311 ctxt->input->version = version; 5312 5313 /* 5314 * We must have the encoding declaration 5315 */ 5316 encoding = xmlParseEncodingDecl(ctxt); 5317 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5318 /* 5319 * The XML REC instructs us to stop parsing right here 5320 */ 5321 return; 5322 } 5323 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 5324 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 5325 "Missing encoding in text declaration\n"); 5326 } 5327 5328 SKIP_BLANKS; 5329 if ((RAW == '?') && (NXT(1) == '>')) { 5330 SKIP(2); 5331 } else if (RAW == '>') { 5332 /* Deprecated old WD ... */ 5333 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5334 NEXT; 5335 } else { 5336 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5337 MOVETO_ENDTAG(CUR_PTR); 5338 NEXT; 5339 } 5340} 5341 5342/** 5343 * xmlParseExternalSubset: 5344 * @ctxt: an XML parser context 5345 * @ExternalID: the external identifier 5346 * @SystemID: the system identifier (or URL) 5347 * 5348 * parse Markup declarations from an external subset 5349 * 5350 * [30] extSubset ::= textDecl? extSubsetDecl 5351 * 5352 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5353 */ 5354void 5355xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5356 const xmlChar *SystemID) { 5357 xmlDetectSAX2(ctxt); 5358 GROW; 5359 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 5360 xmlParseTextDecl(ctxt); 5361 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5362 /* 5363 * The XML REC instructs us to stop parsing right here 5364 */ 5365 ctxt->instate = XML_PARSER_EOF; 5366 return; 5367 } 5368 } 5369 if (ctxt->myDoc == NULL) { 5370 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5371 } 5372 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5373 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5374 5375 ctxt->instate = XML_PARSER_DTD; 5376 ctxt->external = 1; 5377 while (((RAW == '<') && (NXT(1) == '?')) || 5378 ((RAW == '<') && (NXT(1) == '!')) || 5379 (RAW == '%') || IS_BLANK_CH(CUR)) { 5380 const xmlChar *check = CUR_PTR; 5381 unsigned int cons = ctxt->input->consumed; 5382 5383 GROW; 5384 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5385 xmlParseConditionalSections(ctxt); 5386 } else if (IS_BLANK_CH(CUR)) { 5387 NEXT; 5388 } else if (RAW == '%') { 5389 xmlParsePEReference(ctxt); 5390 } else 5391 xmlParseMarkupDecl(ctxt); 5392 5393 /* 5394 * Pop-up of finished entities. 5395 */ 5396 while ((RAW == 0) && (ctxt->inputNr > 1)) 5397 xmlPopInput(ctxt); 5398 5399 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5400 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5401 break; 5402 } 5403 } 5404 5405 if (RAW != 0) { 5406 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5407 } 5408 5409} 5410 5411/** 5412 * xmlParseReference: 5413 * @ctxt: an XML parser context 5414 * 5415 * parse and handle entity references in content, depending on the SAX 5416 * interface, this may end-up in a call to character() if this is a 5417 * CharRef, a predefined entity, if there is no reference() callback. 5418 * or if the parser was asked to switch to that mode. 5419 * 5420 * [67] Reference ::= EntityRef | CharRef 5421 */ 5422void 5423xmlParseReference(xmlParserCtxtPtr ctxt) { 5424 xmlEntityPtr ent; 5425 xmlChar *val; 5426 if (RAW != '&') return; 5427 5428 if (NXT(1) == '#') { 5429 int i = 0; 5430 xmlChar out[10]; 5431 int hex = NXT(2); 5432 int value = xmlParseCharRef(ctxt); 5433 5434 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5435 /* 5436 * So we are using non-UTF-8 buffers 5437 * Check that the char fit on 8bits, if not 5438 * generate a CharRef. 5439 */ 5440 if (value <= 0xFF) { 5441 out[0] = value; 5442 out[1] = 0; 5443 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5444 (!ctxt->disableSAX)) 5445 ctxt->sax->characters(ctxt->userData, out, 1); 5446 } else { 5447 if ((hex == 'x') || (hex == 'X')) 5448 snprintf((char *)out, sizeof(out), "#x%X", value); 5449 else 5450 snprintf((char *)out, sizeof(out), "#%d", value); 5451 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5452 (!ctxt->disableSAX)) 5453 ctxt->sax->reference(ctxt->userData, out); 5454 } 5455 } else { 5456 /* 5457 * Just encode the value in UTF-8 5458 */ 5459 COPY_BUF(0 ,out, i, value); 5460 out[i] = 0; 5461 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5462 (!ctxt->disableSAX)) 5463 ctxt->sax->characters(ctxt->userData, out, i); 5464 } 5465 } else { 5466 ent = xmlParseEntityRef(ctxt); 5467 if (ent == NULL) return; 5468 if (!ctxt->wellFormed) 5469 return; 5470 if ((ent->name != NULL) && 5471 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5472 xmlNodePtr list = NULL; 5473 xmlParserErrors ret = XML_ERR_OK; 5474 5475 5476 /* 5477 * The first reference to the entity trigger a parsing phase 5478 * where the ent->children is filled with the result from 5479 * the parsing. 5480 */ 5481 if (ent->children == NULL) { 5482 xmlChar *value; 5483 value = ent->content; 5484 5485 /* 5486 * Check that this entity is well formed 5487 */ 5488 if ((value != NULL) && (value[0] != 0) && 5489 (value[1] == 0) && (value[0] == '<') && 5490 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5491 /* 5492 * DONE: get definite answer on this !!! 5493 * Lots of entity decls are used to declare a single 5494 * char 5495 * <!ENTITY lt "<"> 5496 * Which seems to be valid since 5497 * 2.4: The ampersand character (&) and the left angle 5498 * bracket (<) may appear in their literal form only 5499 * when used ... They are also legal within the literal 5500 * entity value of an internal entity declaration;i 5501 * see "4.3.2 Well-Formed Parsed Entities". 5502 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5503 * Looking at the OASIS test suite and James Clark 5504 * tests, this is broken. However the XML REC uses 5505 * it. Is the XML REC not well-formed ???? 5506 * This is a hack to avoid this problem 5507 * 5508 * ANSWER: since lt gt amp .. are already defined, 5509 * this is a redefinition and hence the fact that the 5510 * content is not well balanced is not a Wf error, this 5511 * is lousy but acceptable. 5512 */ 5513 list = xmlNewDocText(ctxt->myDoc, value); 5514 if (list != NULL) { 5515 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 5516 (ent->children == NULL)) { 5517 ent->children = list; 5518 ent->last = list; 5519 ent->owner = 1; 5520 list->parent = (xmlNodePtr) ent; 5521 } else { 5522 xmlFreeNodeList(list); 5523 } 5524 } else if (list != NULL) { 5525 xmlFreeNodeList(list); 5526 } 5527 } else { 5528 /* 5529 * 4.3.2: An internal general parsed entity is well-formed 5530 * if its replacement text matches the production labeled 5531 * content. 5532 */ 5533 5534 void *user_data; 5535 /* 5536 * This is a bit hackish but this seems the best 5537 * way to make sure both SAX and DOM entity support 5538 * behaves okay. 5539 */ 5540 if (ctxt->userData == ctxt) 5541 user_data = NULL; 5542 else 5543 user_data = ctxt->userData; 5544 5545 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 5546 ctxt->depth++; 5547 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 5548 value, user_data, &list); 5549 ctxt->depth--; 5550 } else if (ent->etype == 5551 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 5552 ctxt->depth++; 5553 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 5554 ctxt->sax, user_data, ctxt->depth, 5555 ent->URI, ent->ExternalID, &list); 5556 ctxt->depth--; 5557 } else { 5558 ret = XML_ERR_ENTITY_PE_INTERNAL; 5559 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 5560 "invalid entity type found\n", NULL); 5561 } 5562 if (ret == XML_ERR_ENTITY_LOOP) { 5563 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 5564 return; 5565 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 5566 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 5567 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 5568 (ent->children == NULL)) { 5569 ent->children = list; 5570 if (ctxt->replaceEntities) { 5571 /* 5572 * Prune it directly in the generated document 5573 * except for single text nodes. 5574 */ 5575 if (((list->type == XML_TEXT_NODE) && 5576 (list->next == NULL)) || 5577 (ctxt->parseMode == XML_PARSE_READER)) { 5578 list->parent = (xmlNodePtr) ent; 5579 list = NULL; 5580 ent->owner = 1; 5581 } else { 5582 ent->owner = 0; 5583 while (list != NULL) { 5584 list->parent = (xmlNodePtr) ctxt->node; 5585 list->doc = ctxt->myDoc; 5586 if (list->next == NULL) 5587 ent->last = list; 5588 list = list->next; 5589 } 5590 list = ent->children; 5591#ifdef LIBXML_LEGACY_ENABLED 5592 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5593 xmlAddEntityReference(ent, list, NULL); 5594#endif /* LIBXML_LEGACY_ENABLED */ 5595 } 5596 } else { 5597 ent->owner = 1; 5598 while (list != NULL) { 5599 list->parent = (xmlNodePtr) ent; 5600 if (list->next == NULL) 5601 ent->last = list; 5602 list = list->next; 5603 } 5604 } 5605 } else { 5606 xmlFreeNodeList(list); 5607 list = NULL; 5608 } 5609 } else if ((ret != XML_ERR_OK) && 5610 (ret != XML_WAR_UNDECLARED_ENTITY)) { 5611 xmlFatalErr(ctxt, ret, NULL); 5612 } else if (list != NULL) { 5613 xmlFreeNodeList(list); 5614 list = NULL; 5615 } 5616 } 5617 } 5618 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5619 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 5620 /* 5621 * Create a node. 5622 */ 5623 ctxt->sax->reference(ctxt->userData, ent->name); 5624 return; 5625 } else if (ctxt->replaceEntities) { 5626 /* 5627 * There is a problem on the handling of _private for entities 5628 * (bug 155816): Should we copy the content of the field from 5629 * the entity (possibly overwriting some value set by the user 5630 * when a copy is created), should we leave it alone, or should 5631 * we try to take care of different situations? The problem 5632 * is exacerbated by the usage of this field by the xmlReader. 5633 * To fix this bug, we look at _private on the created node 5634 * and, if it's NULL, we copy in whatever was in the entity. 5635 * If it's not NULL we leave it alone. This is somewhat of a 5636 * hack - maybe we should have further tests to determine 5637 * what to do. 5638 */ 5639 if ((ctxt->node != NULL) && (ent->children != NULL)) { 5640 /* 5641 * Seems we are generating the DOM content, do 5642 * a simple tree copy for all references except the first 5643 * In the first occurrence list contains the replacement. 5644 * progressive == 2 means we are operating on the Reader 5645 * and since nodes are discarded we must copy all the time. 5646 */ 5647 if (((list == NULL) && (ent->owner == 0)) || 5648 (ctxt->parseMode == XML_PARSE_READER)) { 5649 xmlNodePtr nw = NULL, cur, firstChild = NULL; 5650 5651 /* 5652 * when operating on a reader, the entities definitions 5653 * are always owning the entities subtree. 5654 if (ctxt->parseMode == XML_PARSE_READER) 5655 ent->owner = 1; 5656 */ 5657 5658 cur = ent->children; 5659 while (cur != NULL) { 5660 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 5661 if (nw != NULL) { 5662 if (nw->_private == NULL) 5663 nw->_private = cur->_private; 5664 if (firstChild == NULL){ 5665 firstChild = nw; 5666 } 5667 nw = xmlAddChild(ctxt->node, nw); 5668 } 5669 if (cur == ent->last) { 5670 /* 5671 * needed to detect some strange empty 5672 * node cases in the reader tests 5673 */ 5674 if ((ctxt->parseMode == XML_PARSE_READER) && 5675 (nw->type == XML_ELEMENT_NODE) && 5676 (nw->children == NULL)) 5677 nw->extra = 1; 5678 5679 break; 5680 } 5681 cur = cur->next; 5682 } 5683#ifdef LIBXML_LEGACY_ENABLED 5684 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5685 xmlAddEntityReference(ent, firstChild, nw); 5686#endif /* LIBXML_LEGACY_ENABLED */ 5687 } else if (list == NULL) { 5688 xmlNodePtr nw = NULL, cur, next, last, 5689 firstChild = NULL; 5690 /* 5691 * Copy the entity child list and make it the new 5692 * entity child list. The goal is to make sure any 5693 * ID or REF referenced will be the one from the 5694 * document content and not the entity copy. 5695 */ 5696 cur = ent->children; 5697 ent->children = NULL; 5698 last = ent->last; 5699 ent->last = NULL; 5700 while (cur != NULL) { 5701 next = cur->next; 5702 cur->next = NULL; 5703 cur->parent = NULL; 5704 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 5705 if (nw != NULL) { 5706 if (nw->_private == NULL) 5707 nw->_private = cur->_private; 5708 if (firstChild == NULL){ 5709 firstChild = cur; 5710 } 5711 xmlAddChild((xmlNodePtr) ent, nw); 5712 xmlAddChild(ctxt->node, cur); 5713 } 5714 if (cur == last) 5715 break; 5716 cur = next; 5717 } 5718 ent->owner = 1; 5719#ifdef LIBXML_LEGACY_ENABLED 5720 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 5721 xmlAddEntityReference(ent, firstChild, nw); 5722#endif /* LIBXML_LEGACY_ENABLED */ 5723 } else { 5724 const xmlChar *nbktext; 5725 5726 /* 5727 * the name change is to avoid coalescing of the 5728 * node with a possible previous text one which 5729 * would make ent->children a dangling pointer 5730 */ 5731 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 5732 -1); 5733 if (ent->children->type == XML_TEXT_NODE) 5734 ent->children->name = nbktext; 5735 if ((ent->last != ent->children) && 5736 (ent->last->type == XML_TEXT_NODE)) 5737 ent->last->name = nbktext; 5738 xmlAddChildList(ctxt->node, ent->children); 5739 } 5740 5741 /* 5742 * This is to avoid a nasty side effect, see 5743 * characters() in SAX.c 5744 */ 5745 ctxt->nodemem = 0; 5746 ctxt->nodelen = 0; 5747 return; 5748 } else { 5749 /* 5750 * Probably running in SAX mode 5751 */ 5752 xmlParserInputPtr input; 5753 5754 input = xmlNewEntityInputStream(ctxt, ent); 5755 xmlPushInput(ctxt, input); 5756 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && 5757 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 5758 (IS_BLANK_CH(NXT(5)))) { 5759 xmlParseTextDecl(ctxt); 5760 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5761 /* 5762 * The XML REC instructs us to stop parsing right here 5763 */ 5764 ctxt->instate = XML_PARSER_EOF; 5765 return; 5766 } 5767 if (input->standalone == 1) { 5768 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE, 5769 NULL); 5770 } 5771 } 5772 return; 5773 } 5774 } 5775 } else { 5776 val = ent->content; 5777 if (val == NULL) return; 5778 /* 5779 * inline the entity. 5780 */ 5781 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5782 (!ctxt->disableSAX)) 5783 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5784 } 5785 } 5786} 5787 5788/** 5789 * xmlParseEntityRef: 5790 * @ctxt: an XML parser context 5791 * 5792 * parse ENTITY references declarations 5793 * 5794 * [68] EntityRef ::= '&' Name ';' 5795 * 5796 * [ WFC: Entity Declared ] 5797 * In a document without any DTD, a document with only an internal DTD 5798 * subset which contains no parameter entity references, or a document 5799 * with "standalone='yes'", the Name given in the entity reference 5800 * must match that in an entity declaration, except that well-formed 5801 * documents need not declare any of the following entities: amp, lt, 5802 * gt, apos, quot. The declaration of a parameter entity must precede 5803 * any reference to it. Similarly, the declaration of a general entity 5804 * must precede any reference to it which appears in a default value in an 5805 * attribute-list declaration. Note that if entities are declared in the 5806 * external subset or in external parameter entities, a non-validating 5807 * processor is not obligated to read and process their declarations; 5808 * for such documents, the rule that an entity must be declared is a 5809 * well-formedness constraint only if standalone='yes'. 5810 * 5811 * [ WFC: Parsed Entity ] 5812 * An entity reference must not contain the name of an unparsed entity 5813 * 5814 * Returns the xmlEntityPtr if found, or NULL otherwise. 5815 */ 5816xmlEntityPtr 5817xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5818 const xmlChar *name; 5819 xmlEntityPtr ent = NULL; 5820 5821 GROW; 5822 5823 if (RAW == '&') { 5824 NEXT; 5825 name = xmlParseName(ctxt); 5826 if (name == NULL) { 5827 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5828 "xmlParseEntityRef: no name\n"); 5829 } else { 5830 if (RAW == ';') { 5831 NEXT; 5832 /* 5833 * Ask first SAX for entity resolution, otherwise try the 5834 * predefined set. 5835 */ 5836 if (ctxt->sax != NULL) { 5837 if (ctxt->sax->getEntity != NULL) 5838 ent = ctxt->sax->getEntity(ctxt->userData, name); 5839 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 5840 ent = xmlGetPredefinedEntity(name); 5841 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 5842 (ctxt->userData==ctxt)) { 5843 ent = xmlSAX2GetEntity(ctxt, name); 5844 } 5845 } 5846 /* 5847 * [ WFC: Entity Declared ] 5848 * In a document without any DTD, a document with only an 5849 * internal DTD subset which contains no parameter entity 5850 * references, or a document with "standalone='yes'", the 5851 * Name given in the entity reference must match that in an 5852 * entity declaration, except that well-formed documents 5853 * need not declare any of the following entities: amp, lt, 5854 * gt, apos, quot. 5855 * The declaration of a parameter entity must precede any 5856 * reference to it. 5857 * Similarly, the declaration of a general entity must 5858 * precede any reference to it which appears in a default 5859 * value in an attribute-list declaration. Note that if 5860 * entities are declared in the external subset or in 5861 * external parameter entities, a non-validating processor 5862 * is not obligated to read and process their declarations; 5863 * for such documents, the rule that an entity must be 5864 * declared is a well-formedness constraint only if 5865 * standalone='yes'. 5866 */ 5867 if (ent == NULL) { 5868 if ((ctxt->standalone == 1) || 5869 ((ctxt->hasExternalSubset == 0) && 5870 (ctxt->hasPErefs == 0))) { 5871 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 5872 "Entity '%s' not defined\n", name); 5873 } else { 5874 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 5875 "Entity '%s' not defined\n", name); 5876 } 5877 ctxt->valid = 0; 5878 } 5879 5880 /* 5881 * [ WFC: Parsed Entity ] 5882 * An entity reference must not contain the name of an 5883 * unparsed entity 5884 */ 5885 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 5887 "Entity reference to unparsed entity %s\n", name); 5888 } 5889 5890 /* 5891 * [ WFC: No External Entity References ] 5892 * Attribute values cannot contain direct or indirect 5893 * entity references to external entities. 5894 */ 5895 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5896 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5897 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 5898 "Attribute references external entity '%s'\n", name); 5899 } 5900 /* 5901 * [ WFC: No < in Attribute Values ] 5902 * The replacement text of any entity referred to directly or 5903 * indirectly in an attribute value (other than "<") must 5904 * not contain a <. 5905 */ 5906 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5907 (ent != NULL) && 5908 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 5909 (ent->content != NULL) && 5910 (xmlStrchr(ent->content, '<'))) { 5911 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 5912 "'<' in entity '%s' is not allowed in attributes values\n", name); 5913 } 5914 5915 /* 5916 * Internal check, no parameter entities here ... 5917 */ 5918 else { 5919 switch (ent->etype) { 5920 case XML_INTERNAL_PARAMETER_ENTITY: 5921 case XML_EXTERNAL_PARAMETER_ENTITY: 5922 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 5923 "Attempt to reference the parameter entity '%s'\n", 5924 name); 5925 break; 5926 default: 5927 break; 5928 } 5929 } 5930 5931 /* 5932 * [ WFC: No Recursion ] 5933 * A parsed entity must not contain a recursive reference 5934 * to itself, either directly or indirectly. 5935 * Done somewhere else 5936 */ 5937 5938 } else { 5939 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 5940 } 5941 } 5942 } 5943 return(ent); 5944} 5945 5946/** 5947 * xmlParseStringEntityRef: 5948 * @ctxt: an XML parser context 5949 * @str: a pointer to an index in the string 5950 * 5951 * parse ENTITY references declarations, but this version parses it from 5952 * a string value. 5953 * 5954 * [68] EntityRef ::= '&' Name ';' 5955 * 5956 * [ WFC: Entity Declared ] 5957 * In a document without any DTD, a document with only an internal DTD 5958 * subset which contains no parameter entity references, or a document 5959 * with "standalone='yes'", the Name given in the entity reference 5960 * must match that in an entity declaration, except that well-formed 5961 * documents need not declare any of the following entities: amp, lt, 5962 * gt, apos, quot. The declaration of a parameter entity must precede 5963 * any reference to it. Similarly, the declaration of a general entity 5964 * must precede any reference to it which appears in a default value in an 5965 * attribute-list declaration. Note that if entities are declared in the 5966 * external subset or in external parameter entities, a non-validating 5967 * processor is not obligated to read and process their declarations; 5968 * for such documents, the rule that an entity must be declared is a 5969 * well-formedness constraint only if standalone='yes'. 5970 * 5971 * [ WFC: Parsed Entity ] 5972 * An entity reference must not contain the name of an unparsed entity 5973 * 5974 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5975 * is updated to the current location in the string. 5976 */ 5977xmlEntityPtr 5978xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5979 xmlChar *name; 5980 const xmlChar *ptr; 5981 xmlChar cur; 5982 xmlEntityPtr ent = NULL; 5983 5984 if ((str == NULL) || (*str == NULL)) 5985 return(NULL); 5986 ptr = *str; 5987 cur = *ptr; 5988 if (cur == '&') { 5989 ptr++; 5990 cur = *ptr; 5991 name = xmlParseStringName(ctxt, &ptr); 5992 if (name == NULL) { 5993 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5994 "xmlParseStringEntityRef: no name\n"); 5995 } else { 5996 if (*ptr == ';') { 5997 ptr++; 5998 /* 5999 * Ask first SAX for entity resolution, otherwise try the 6000 * predefined set. 6001 */ 6002 if (ctxt->sax != NULL) { 6003 if (ctxt->sax->getEntity != NULL) 6004 ent = ctxt->sax->getEntity(ctxt->userData, name); 6005 if (ent == NULL) 6006 ent = xmlGetPredefinedEntity(name); 6007 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6008 ent = xmlSAX2GetEntity(ctxt, name); 6009 } 6010 } 6011 /* 6012 * [ WFC: Entity Declared ] 6013 * In a document without any DTD, a document with only an 6014 * internal DTD subset which contains no parameter entity 6015 * references, or a document with "standalone='yes'", the 6016 * Name given in the entity reference must match that in an 6017 * entity declaration, except that well-formed documents 6018 * need not declare any of the following entities: amp, lt, 6019 * gt, apos, quot. 6020 * The declaration of a parameter entity must precede any 6021 * reference to it. 6022 * Similarly, the declaration of a general entity must 6023 * precede any reference to it which appears in a default 6024 * value in an attribute-list declaration. Note that if 6025 * entities are declared in the external subset or in 6026 * external parameter entities, a non-validating processor 6027 * is not obligated to read and process their declarations; 6028 * for such documents, the rule that an entity must be 6029 * declared is a well-formedness constraint only if 6030 * standalone='yes'. 6031 */ 6032 if (ent == NULL) { 6033 if ((ctxt->standalone == 1) || 6034 ((ctxt->hasExternalSubset == 0) && 6035 (ctxt->hasPErefs == 0))) { 6036 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6037 "Entity '%s' not defined\n", name); 6038 } else { 6039 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6040 "Entity '%s' not defined\n", 6041 name); 6042 } 6043 /* TODO ? check regressions ctxt->valid = 0; */ 6044 } 6045 6046 /* 6047 * [ WFC: Parsed Entity ] 6048 * An entity reference must not contain the name of an 6049 * unparsed entity 6050 */ 6051 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6052 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6053 "Entity reference to unparsed entity %s\n", name); 6054 } 6055 6056 /* 6057 * [ WFC: No External Entity References ] 6058 * Attribute values cannot contain direct or indirect 6059 * entity references to external entities. 6060 */ 6061 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6062 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6063 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6064 "Attribute references external entity '%s'\n", name); 6065 } 6066 /* 6067 * [ WFC: No < in Attribute Values ] 6068 * The replacement text of any entity referred to directly or 6069 * indirectly in an attribute value (other than "<") must 6070 * not contain a <. 6071 */ 6072 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6073 (ent != NULL) && 6074 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6075 (ent->content != NULL) && 6076 (xmlStrchr(ent->content, '<'))) { 6077 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6078 "'<' in entity '%s' is not allowed in attributes values\n", 6079 name); 6080 } 6081 6082 /* 6083 * Internal check, no parameter entities here ... 6084 */ 6085 else { 6086 switch (ent->etype) { 6087 case XML_INTERNAL_PARAMETER_ENTITY: 6088 case XML_EXTERNAL_PARAMETER_ENTITY: 6089 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6090 "Attempt to reference the parameter entity '%s'\n", 6091 name); 6092 break; 6093 default: 6094 break; 6095 } 6096 } 6097 6098 /* 6099 * [ WFC: No Recursion ] 6100 * A parsed entity must not contain a recursive reference 6101 * to itself, either directly or indirectly. 6102 * Done somewhere else 6103 */ 6104 6105 } else { 6106 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6107 } 6108 xmlFree(name); 6109 } 6110 } 6111 *str = ptr; 6112 return(ent); 6113} 6114 6115/** 6116 * xmlParsePEReference: 6117 * @ctxt: an XML parser context 6118 * 6119 * parse PEReference declarations 6120 * The entity content is handled directly by pushing it's content as 6121 * a new input stream. 6122 * 6123 * [69] PEReference ::= '%' Name ';' 6124 * 6125 * [ WFC: No Recursion ] 6126 * A parsed entity must not contain a recursive 6127 * reference to itself, either directly or indirectly. 6128 * 6129 * [ WFC: Entity Declared ] 6130 * In a document without any DTD, a document with only an internal DTD 6131 * subset which contains no parameter entity references, or a document 6132 * with "standalone='yes'", ... ... The declaration of a parameter 6133 * entity must precede any reference to it... 6134 * 6135 * [ VC: Entity Declared ] 6136 * In a document with an external subset or external parameter entities 6137 * with "standalone='no'", ... ... The declaration of a parameter entity 6138 * must precede any reference to it... 6139 * 6140 * [ WFC: In DTD ] 6141 * Parameter-entity references may only appear in the DTD. 6142 * NOTE: misleading but this is handled. 6143 */ 6144void 6145xmlParsePEReference(xmlParserCtxtPtr ctxt) 6146{ 6147 const xmlChar *name; 6148 xmlEntityPtr entity = NULL; 6149 xmlParserInputPtr input; 6150 6151 if (RAW == '%') { 6152 NEXT; 6153 name = xmlParseName(ctxt); 6154 if (name == NULL) { 6155 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6156 "xmlParsePEReference: no name\n"); 6157 } else { 6158 if (RAW == ';') { 6159 NEXT; 6160 if ((ctxt->sax != NULL) && 6161 (ctxt->sax->getParameterEntity != NULL)) 6162 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6163 name); 6164 if (entity == NULL) { 6165 /* 6166 * [ WFC: Entity Declared ] 6167 * In a document without any DTD, a document with only an 6168 * internal DTD subset which contains no parameter entity 6169 * references, or a document with "standalone='yes'", ... 6170 * ... The declaration of a parameter entity must precede 6171 * any reference to it... 6172 */ 6173 if ((ctxt->standalone == 1) || 6174 ((ctxt->hasExternalSubset == 0) && 6175 (ctxt->hasPErefs == 0))) { 6176 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6177 "PEReference: %%%s; not found\n", 6178 name); 6179 } else { 6180 /* 6181 * [ VC: Entity Declared ] 6182 * In a document with an external subset or external 6183 * parameter entities with "standalone='no'", ... 6184 * ... The declaration of a parameter entity must 6185 * precede any reference to it... 6186 */ 6187 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6188 "PEReference: %%%s; not found\n", 6189 name, NULL); 6190 ctxt->valid = 0; 6191 } 6192 } else { 6193 /* 6194 * Internal checking in case the entity quest barfed 6195 */ 6196 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6197 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6198 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6199 "Internal: %%%s; is not a parameter entity\n", 6200 name, NULL); 6201 } else if (ctxt->input->free != deallocblankswrapper) { 6202 input = 6203 xmlNewBlanksWrapperInputStream(ctxt, entity); 6204 xmlPushInput(ctxt, input); 6205 } else { 6206 /* 6207 * TODO !!! 6208 * handle the extra spaces added before and after 6209 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6210 */ 6211 input = xmlNewEntityInputStream(ctxt, entity); 6212 xmlPushInput(ctxt, input); 6213 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6214 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6215 (IS_BLANK_CH(NXT(5)))) { 6216 xmlParseTextDecl(ctxt); 6217 if (ctxt->errNo == 6218 XML_ERR_UNSUPPORTED_ENCODING) { 6219 /* 6220 * The XML REC instructs us to stop parsing 6221 * right here 6222 */ 6223 ctxt->instate = XML_PARSER_EOF; 6224 return; 6225 } 6226 } 6227 } 6228 } 6229 ctxt->hasPErefs = 1; 6230 } else { 6231 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6232 } 6233 } 6234 } 6235} 6236 6237/** 6238 * xmlParseStringPEReference: 6239 * @ctxt: an XML parser context 6240 * @str: a pointer to an index in the string 6241 * 6242 * parse PEReference declarations 6243 * 6244 * [69] PEReference ::= '%' Name ';' 6245 * 6246 * [ WFC: No Recursion ] 6247 * A parsed entity must not contain a recursive 6248 * reference to itself, either directly or indirectly. 6249 * 6250 * [ WFC: Entity Declared ] 6251 * In a document without any DTD, a document with only an internal DTD 6252 * subset which contains no parameter entity references, or a document 6253 * with "standalone='yes'", ... ... The declaration of a parameter 6254 * entity must precede any reference to it... 6255 * 6256 * [ VC: Entity Declared ] 6257 * In a document with an external subset or external parameter entities 6258 * with "standalone='no'", ... ... The declaration of a parameter entity 6259 * must precede any reference to it... 6260 * 6261 * [ WFC: In DTD ] 6262 * Parameter-entity references may only appear in the DTD. 6263 * NOTE: misleading but this is handled. 6264 * 6265 * Returns the string of the entity content. 6266 * str is updated to the current value of the index 6267 */ 6268xmlEntityPtr 6269xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6270 const xmlChar *ptr; 6271 xmlChar cur; 6272 xmlChar *name; 6273 xmlEntityPtr entity = NULL; 6274 6275 if ((str == NULL) || (*str == NULL)) return(NULL); 6276 ptr = *str; 6277 cur = *ptr; 6278 if (cur == '%') { 6279 ptr++; 6280 cur = *ptr; 6281 name = xmlParseStringName(ctxt, &ptr); 6282 if (name == NULL) { 6283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6284 "xmlParseStringPEReference: no name\n"); 6285 } else { 6286 cur = *ptr; 6287 if (cur == ';') { 6288 ptr++; 6289 cur = *ptr; 6290 if ((ctxt->sax != NULL) && 6291 (ctxt->sax->getParameterEntity != NULL)) 6292 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6293 name); 6294 if (entity == NULL) { 6295 /* 6296 * [ WFC: Entity Declared ] 6297 * In a document without any DTD, a document with only an 6298 * internal DTD subset which contains no parameter entity 6299 * references, or a document with "standalone='yes'", ... 6300 * ... The declaration of a parameter entity must precede 6301 * any reference to it... 6302 */ 6303 if ((ctxt->standalone == 1) || 6304 ((ctxt->hasExternalSubset == 0) && 6305 (ctxt->hasPErefs == 0))) { 6306 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6307 "PEReference: %%%s; not found\n", name); 6308 } else { 6309 /* 6310 * [ VC: Entity Declared ] 6311 * In a document with an external subset or external 6312 * parameter entities with "standalone='no'", ... 6313 * ... The declaration of a parameter entity must 6314 * precede any reference to it... 6315 */ 6316 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6317 "PEReference: %%%s; not found\n", 6318 name, NULL); 6319 ctxt->valid = 0; 6320 } 6321 } else { 6322 /* 6323 * Internal checking in case the entity quest barfed 6324 */ 6325 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6326 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6327 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6328 "%%%s; is not a parameter entity\n", 6329 name, NULL); 6330 } 6331 } 6332 ctxt->hasPErefs = 1; 6333 } else { 6334 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6335 } 6336 xmlFree(name); 6337 } 6338 } 6339 *str = ptr; 6340 return(entity); 6341} 6342 6343/** 6344 * xmlParseDocTypeDecl: 6345 * @ctxt: an XML parser context 6346 * 6347 * parse a DOCTYPE declaration 6348 * 6349 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6350 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6351 * 6352 * [ VC: Root Element Type ] 6353 * The Name in the document type declaration must match the element 6354 * type of the root element. 6355 */ 6356 6357void 6358xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6359 const xmlChar *name = NULL; 6360 xmlChar *ExternalID = NULL; 6361 xmlChar *URI = NULL; 6362 6363 /* 6364 * We know that '<!DOCTYPE' has been detected. 6365 */ 6366 SKIP(9); 6367 6368 SKIP_BLANKS; 6369 6370 /* 6371 * Parse the DOCTYPE name. 6372 */ 6373 name = xmlParseName(ctxt); 6374 if (name == NULL) { 6375 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6376 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6377 } 6378 ctxt->intSubName = name; 6379 6380 SKIP_BLANKS; 6381 6382 /* 6383 * Check for SystemID and ExternalID 6384 */ 6385 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6386 6387 if ((URI != NULL) || (ExternalID != NULL)) { 6388 ctxt->hasExternalSubset = 1; 6389 } 6390 ctxt->extSubURI = URI; 6391 ctxt->extSubSystem = ExternalID; 6392 6393 SKIP_BLANKS; 6394 6395 /* 6396 * Create and update the internal subset. 6397 */ 6398 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6399 (!ctxt->disableSAX)) 6400 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6401 6402 /* 6403 * Is there any internal subset declarations ? 6404 * they are handled separately in xmlParseInternalSubset() 6405 */ 6406 if (RAW == '[') 6407 return; 6408 6409 /* 6410 * We should be at the end of the DOCTYPE declaration. 6411 */ 6412 if (RAW != '>') { 6413 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6414 } 6415 NEXT; 6416} 6417 6418/** 6419 * xmlParseInternalSubset: 6420 * @ctxt: an XML parser context 6421 * 6422 * parse the internal subset declaration 6423 * 6424 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6425 */ 6426 6427static void 6428xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6429 /* 6430 * Is there any DTD definition ? 6431 */ 6432 if (RAW == '[') { 6433 ctxt->instate = XML_PARSER_DTD; 6434 NEXT; 6435 /* 6436 * Parse the succession of Markup declarations and 6437 * PEReferences. 6438 * Subsequence (markupdecl | PEReference | S)* 6439 */ 6440 while (RAW != ']') { 6441 const xmlChar *check = CUR_PTR; 6442 unsigned int cons = ctxt->input->consumed; 6443 6444 SKIP_BLANKS; 6445 xmlParseMarkupDecl(ctxt); 6446 xmlParsePEReference(ctxt); 6447 6448 /* 6449 * Pop-up of finished entities. 6450 */ 6451 while ((RAW == 0) && (ctxt->inputNr > 1)) 6452 xmlPopInput(ctxt); 6453 6454 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6455 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6456 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6457 break; 6458 } 6459 } 6460 if (RAW == ']') { 6461 NEXT; 6462 SKIP_BLANKS; 6463 } 6464 } 6465 6466 /* 6467 * We should be at the end of the DOCTYPE declaration. 6468 */ 6469 if (RAW != '>') { 6470 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6471 } 6472 NEXT; 6473} 6474 6475#ifdef LIBXML_SAX1_ENABLED 6476/** 6477 * xmlParseAttribute: 6478 * @ctxt: an XML parser context 6479 * @value: a xmlChar ** used to store the value of the attribute 6480 * 6481 * parse an attribute 6482 * 6483 * [41] Attribute ::= Name Eq AttValue 6484 * 6485 * [ WFC: No External Entity References ] 6486 * Attribute values cannot contain direct or indirect entity references 6487 * to external entities. 6488 * 6489 * [ WFC: No < in Attribute Values ] 6490 * The replacement text of any entity referred to directly or indirectly in 6491 * an attribute value (other than "<") must not contain a <. 6492 * 6493 * [ VC: Attribute Value Type ] 6494 * The attribute must have been declared; the value must be of the type 6495 * declared for it. 6496 * 6497 * [25] Eq ::= S? '=' S? 6498 * 6499 * With namespace: 6500 * 6501 * [NS 11] Attribute ::= QName Eq AttValue 6502 * 6503 * Also the case QName == xmlns:??? is handled independently as a namespace 6504 * definition. 6505 * 6506 * Returns the attribute name, and the value in *value. 6507 */ 6508 6509const xmlChar * 6510xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 6511 const xmlChar *name; 6512 xmlChar *val; 6513 6514 *value = NULL; 6515 GROW; 6516 name = xmlParseName(ctxt); 6517 if (name == NULL) { 6518 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6519 "error parsing attribute name\n"); 6520 return(NULL); 6521 } 6522 6523 /* 6524 * read the value 6525 */ 6526 SKIP_BLANKS; 6527 if (RAW == '=') { 6528 NEXT; 6529 SKIP_BLANKS; 6530 val = xmlParseAttValue(ctxt); 6531 ctxt->instate = XML_PARSER_CONTENT; 6532 } else { 6533 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6534 "Specification mandate value for attribute %s\n", name); 6535 return(NULL); 6536 } 6537 6538 /* 6539 * Check that xml:lang conforms to the specification 6540 * No more registered as an error, just generate a warning now 6541 * since this was deprecated in XML second edition 6542 */ 6543 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 6544 if (!xmlCheckLanguageID(val)) { 6545 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 6546 "Malformed value for xml:lang : %s\n", 6547 val, NULL); 6548 } 6549 } 6550 6551 /* 6552 * Check that xml:space conforms to the specification 6553 */ 6554 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 6555 if (xmlStrEqual(val, BAD_CAST "default")) 6556 *(ctxt->space) = 0; 6557 else if (xmlStrEqual(val, BAD_CAST "preserve")) 6558 *(ctxt->space) = 1; 6559 else { 6560 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 6561"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 6562 val); 6563 } 6564 } 6565 6566 *value = val; 6567 return(name); 6568} 6569 6570/** 6571 * xmlParseStartTag: 6572 * @ctxt: an XML parser context 6573 * 6574 * parse a start of tag either for rule element or 6575 * EmptyElement. In both case we don't parse the tag closing chars. 6576 * 6577 * [40] STag ::= '<' Name (S Attribute)* S? '>' 6578 * 6579 * [ WFC: Unique Att Spec ] 6580 * No attribute name may appear more than once in the same start-tag or 6581 * empty-element tag. 6582 * 6583 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 6584 * 6585 * [ WFC: Unique Att Spec ] 6586 * No attribute name may appear more than once in the same start-tag or 6587 * empty-element tag. 6588 * 6589 * With namespace: 6590 * 6591 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 6592 * 6593 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 6594 * 6595 * Returns the element name parsed 6596 */ 6597 6598const xmlChar * 6599xmlParseStartTag(xmlParserCtxtPtr ctxt) { 6600 const xmlChar *name; 6601 const xmlChar *attname; 6602 xmlChar *attvalue; 6603 const xmlChar **atts = ctxt->atts; 6604 int nbatts = 0; 6605 int maxatts = ctxt->maxatts; 6606 int i; 6607 6608 if (RAW != '<') return(NULL); 6609 NEXT1; 6610 6611 name = xmlParseName(ctxt); 6612 if (name == NULL) { 6613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6614 "xmlParseStartTag: invalid element name\n"); 6615 return(NULL); 6616 } 6617 6618 /* 6619 * Now parse the attributes, it ends up with the ending 6620 * 6621 * (S Attribute)* S? 6622 */ 6623 SKIP_BLANKS; 6624 GROW; 6625 6626 while ((RAW != '>') && 6627 ((RAW != '/') || (NXT(1) != '>')) && 6628 (IS_BYTE_CHAR(RAW))) { 6629 const xmlChar *q = CUR_PTR; 6630 unsigned int cons = ctxt->input->consumed; 6631 6632 attname = xmlParseAttribute(ctxt, &attvalue); 6633 if ((attname != NULL) && (attvalue != NULL)) { 6634 /* 6635 * [ WFC: Unique Att Spec ] 6636 * No attribute name may appear more than once in the same 6637 * start-tag or empty-element tag. 6638 */ 6639 for (i = 0; i < nbatts;i += 2) { 6640 if (xmlStrEqual(atts[i], attname)) { 6641 xmlErrAttributeDup(ctxt, NULL, attname); 6642 xmlFree(attvalue); 6643 goto failed; 6644 } 6645 } 6646 /* 6647 * Add the pair to atts 6648 */ 6649 if (atts == NULL) { 6650 maxatts = 22; /* allow for 10 attrs by default */ 6651 atts = (const xmlChar **) 6652 xmlMalloc(maxatts * sizeof(xmlChar *)); 6653 if (atts == NULL) { 6654 xmlErrMemory(ctxt, NULL); 6655 if (attvalue != NULL) 6656 xmlFree(attvalue); 6657 goto failed; 6658 } 6659 ctxt->atts = atts; 6660 ctxt->maxatts = maxatts; 6661 } else if (nbatts + 4 > maxatts) { 6662 const xmlChar **n; 6663 6664 maxatts *= 2; 6665 n = (const xmlChar **) xmlRealloc((void *) atts, 6666 maxatts * sizeof(const xmlChar *)); 6667 if (n == NULL) { 6668 xmlErrMemory(ctxt, NULL); 6669 if (attvalue != NULL) 6670 xmlFree(attvalue); 6671 goto failed; 6672 } 6673 atts = n; 6674 ctxt->atts = atts; 6675 ctxt->maxatts = maxatts; 6676 } 6677 atts[nbatts++] = attname; 6678 atts[nbatts++] = attvalue; 6679 atts[nbatts] = NULL; 6680 atts[nbatts + 1] = NULL; 6681 } else { 6682 if (attvalue != NULL) 6683 xmlFree(attvalue); 6684 } 6685 6686failed: 6687 6688 GROW 6689 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 6690 break; 6691 if (!IS_BLANK_CH(RAW)) { 6692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6693 "attributes construct error\n"); 6694 } 6695 SKIP_BLANKS; 6696 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 6697 (attname == NULL) && (attvalue == NULL)) { 6698 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 6699 "xmlParseStartTag: problem parsing attributes\n"); 6700 break; 6701 } 6702 SHRINK; 6703 GROW; 6704 } 6705 6706 /* 6707 * SAX: Start of Element ! 6708 */ 6709 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 6710 (!ctxt->disableSAX)) { 6711 if (nbatts > 0) 6712 ctxt->sax->startElement(ctxt->userData, name, atts); 6713 else 6714 ctxt->sax->startElement(ctxt->userData, name, NULL); 6715 } 6716 6717 if (atts != NULL) { 6718 /* Free only the content strings */ 6719 for (i = 1;i < nbatts;i+=2) 6720 if (atts[i] != NULL) 6721 xmlFree((xmlChar *) atts[i]); 6722 } 6723 return(name); 6724} 6725 6726/** 6727 * xmlParseEndTag1: 6728 * @ctxt: an XML parser context 6729 * @line: line of the start tag 6730 * @nsNr: number of namespaces on the start tag 6731 * 6732 * parse an end of tag 6733 * 6734 * [42] ETag ::= '</' Name S? '>' 6735 * 6736 * With namespace 6737 * 6738 * [NS 9] ETag ::= '</' QName S? '>' 6739 */ 6740 6741static void 6742xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 6743 const xmlChar *name; 6744 6745 GROW; 6746 if ((RAW != '<') || (NXT(1) != '/')) { 6747 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 6748 "xmlParseEndTag: '</' not found\n"); 6749 return; 6750 } 6751 SKIP(2); 6752 6753 name = xmlParseNameAndCompare(ctxt,ctxt->name); 6754 6755 /* 6756 * We should definitely be at the ending "S? '>'" part 6757 */ 6758 GROW; 6759 SKIP_BLANKS; 6760 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 6761 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6762 } else 6763 NEXT1; 6764 6765 /* 6766 * [ WFC: Element Type Match ] 6767 * The Name in an element's end-tag must match the element type in the 6768 * start-tag. 6769 * 6770 */ 6771 if (name != (xmlChar*)1) { 6772 if (name == NULL) name = BAD_CAST "unparseable"; 6773 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 6774 "Opening and ending tag mismatch: %s line %d and %s\n", 6775 ctxt->name, line, name); 6776 } 6777 6778 /* 6779 * SAX: End of Tag 6780 */ 6781 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 6782 (!ctxt->disableSAX)) 6783 ctxt->sax->endElement(ctxt->userData, ctxt->name); 6784 6785 namePop(ctxt); 6786 spacePop(ctxt); 6787 return; 6788} 6789 6790/** 6791 * xmlParseEndTag: 6792 * @ctxt: an XML parser context 6793 * 6794 * parse an end of tag 6795 * 6796 * [42] ETag ::= '</' Name S? '>' 6797 * 6798 * With namespace 6799 * 6800 * [NS 9] ETag ::= '</' QName S? '>' 6801 */ 6802 6803void 6804xmlParseEndTag(xmlParserCtxtPtr ctxt) { 6805 xmlParseEndTag1(ctxt, 0); 6806} 6807#endif /* LIBXML_SAX1_ENABLED */ 6808 6809/************************************************************************ 6810 * * 6811 * SAX 2 specific operations * 6812 * * 6813 ************************************************************************/ 6814 6815static const xmlChar * 6816xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 6817 int len = 0, l; 6818 int c; 6819 int count = 0; 6820 6821 /* 6822 * Handler for more complex cases 6823 */ 6824 GROW; 6825 c = CUR_CHAR(l); 6826 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 6827 (!IS_LETTER(c) && (c != '_'))) { 6828 return(NULL); 6829 } 6830 6831 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 6832 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 6833 (c == '.') || (c == '-') || (c == '_') || 6834 (IS_COMBINING(c)) || 6835 (IS_EXTENDER(c)))) { 6836 if (count++ > 100) { 6837 count = 0; 6838 GROW; 6839 } 6840 len += l; 6841 NEXTL(l); 6842 c = CUR_CHAR(l); 6843 } 6844 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 6845} 6846 6847/* 6848 * xmlGetNamespace: 6849 * @ctxt: an XML parser context 6850 * @prefix: the prefix to lookup 6851 * 6852 * Lookup the namespace name for the @prefix (which ca be NULL) 6853 * The prefix must come from the @ctxt->dict dictionnary 6854 * 6855 * Returns the namespace name or NULL if not bound 6856 */ 6857static const xmlChar * 6858xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 6859 int i; 6860 6861 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 6862 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 6863 if (ctxt->nsTab[i] == prefix) { 6864 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 6865 return(NULL); 6866 return(ctxt->nsTab[i + 1]); 6867 } 6868 return(NULL); 6869} 6870 6871/** 6872 * xmlParseNCName: 6873 * @ctxt: an XML parser context 6874 * @len: lenght of the string parsed 6875 * 6876 * parse an XML name. 6877 * 6878 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 6879 * CombiningChar | Extender 6880 * 6881 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 6882 * 6883 * Returns the Name parsed or NULL 6884 */ 6885 6886static const xmlChar * 6887xmlParseNCName(xmlParserCtxtPtr ctxt) { 6888 const xmlChar *in; 6889 const xmlChar *ret; 6890 int count = 0; 6891 6892 /* 6893 * Accelerator for simple ASCII names 6894 */ 6895 in = ctxt->input->cur; 6896 if (((*in >= 0x61) && (*in <= 0x7A)) || 6897 ((*in >= 0x41) && (*in <= 0x5A)) || 6898 (*in == '_')) { 6899 in++; 6900 while (((*in >= 0x61) && (*in <= 0x7A)) || 6901 ((*in >= 0x41) && (*in <= 0x5A)) || 6902 ((*in >= 0x30) && (*in <= 0x39)) || 6903 (*in == '_') || (*in == '-') || 6904 (*in == '.')) 6905 in++; 6906 if ((*in > 0) && (*in < 0x80)) { 6907 count = in - ctxt->input->cur; 6908 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 6909 ctxt->input->cur = in; 6910 ctxt->nbChars += count; 6911 ctxt->input->col += count; 6912 if (ret == NULL) { 6913 xmlErrMemory(ctxt, NULL); 6914 } 6915 return(ret); 6916 } 6917 } 6918 return(xmlParseNCNameComplex(ctxt)); 6919} 6920 6921/** 6922 * xmlParseQName: 6923 * @ctxt: an XML parser context 6924 * @prefix: pointer to store the prefix part 6925 * 6926 * parse an XML Namespace QName 6927 * 6928 * [6] QName ::= (Prefix ':')? LocalPart 6929 * [7] Prefix ::= NCName 6930 * [8] LocalPart ::= NCName 6931 * 6932 * Returns the Name parsed or NULL 6933 */ 6934 6935static const xmlChar * 6936xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 6937 const xmlChar *l, *p; 6938 6939 GROW; 6940 6941 l = xmlParseNCName(ctxt); 6942 if (l == NULL) { 6943 if (CUR == ':') { 6944 l = xmlParseName(ctxt); 6945 if (l != NULL) { 6946 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6947 "Failed to parse QName '%s'\n", l, NULL, NULL); 6948 *prefix = NULL; 6949 return(l); 6950 } 6951 } 6952 return(NULL); 6953 } 6954 if (CUR == ':') { 6955 NEXT; 6956 p = l; 6957 l = xmlParseNCName(ctxt); 6958 if (l == NULL) { 6959 xmlChar *tmp; 6960 6961 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6962 "Failed to parse QName '%s:'\n", p, NULL, NULL); 6963 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 6964 p = xmlDictLookup(ctxt->dict, tmp, -1); 6965 if (tmp != NULL) xmlFree(tmp); 6966 *prefix = NULL; 6967 return(p); 6968 } 6969 if (CUR == ':') { 6970 xmlChar *tmp; 6971 6972 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 6973 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 6974 NEXT; 6975 tmp = (xmlChar *) xmlParseName(ctxt); 6976 if (tmp != NULL) { 6977 tmp = xmlBuildQName(tmp, l, NULL, 0); 6978 l = xmlDictLookup(ctxt->dict, tmp, -1); 6979 if (tmp != NULL) xmlFree(tmp); 6980 *prefix = p; 6981 return(l); 6982 } 6983 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 6984 l = xmlDictLookup(ctxt->dict, tmp, -1); 6985 if (tmp != NULL) xmlFree(tmp); 6986 *prefix = p; 6987 return(l); 6988 } 6989 *prefix = p; 6990 } else 6991 *prefix = NULL; 6992 return(l); 6993} 6994 6995/** 6996 * xmlParseQNameAndCompare: 6997 * @ctxt: an XML parser context 6998 * @name: the localname 6999 * @prefix: the prefix, if any. 7000 * 7001 * parse an XML name and compares for match 7002 * (specialized for endtag parsing) 7003 * 7004 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7005 * and the name for mismatch 7006 */ 7007 7008static const xmlChar * 7009xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7010 xmlChar const *prefix) { 7011 const xmlChar *cmp = name; 7012 const xmlChar *in; 7013 const xmlChar *ret; 7014 const xmlChar *prefix2; 7015 7016 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7017 7018 GROW; 7019 in = ctxt->input->cur; 7020 7021 cmp = prefix; 7022 while (*in != 0 && *in == *cmp) { 7023 ++in; 7024 ++cmp; 7025 } 7026 if ((*cmp == 0) && (*in == ':')) { 7027 in++; 7028 cmp = name; 7029 while (*in != 0 && *in == *cmp) { 7030 ++in; 7031 ++cmp; 7032 } 7033 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 7034 /* success */ 7035 ctxt->input->cur = in; 7036 return((const xmlChar*) 1); 7037 } 7038 } 7039 /* 7040 * all strings coms from the dictionary, equality can be done directly 7041 */ 7042 ret = xmlParseQName (ctxt, &prefix2); 7043 if ((ret == name) && (prefix == prefix2)) 7044 return((const xmlChar*) 1); 7045 return ret; 7046} 7047 7048/** 7049 * xmlParseAttValueInternal: 7050 * @ctxt: an XML parser context 7051 * @len: attribute len result 7052 * @alloc: whether the attribute was reallocated as a new string 7053 * @normalize: if 1 then further non-CDATA normalization must be done 7054 * 7055 * parse a value for an attribute. 7056 * NOTE: if no normalization is needed, the routine will return pointers 7057 * directly from the data buffer. 7058 * 7059 * 3.3.3 Attribute-Value Normalization: 7060 * Before the value of an attribute is passed to the application or 7061 * checked for validity, the XML processor must normalize it as follows: 7062 * - a character reference is processed by appending the referenced 7063 * character to the attribute value 7064 * - an entity reference is processed by recursively processing the 7065 * replacement text of the entity 7066 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7067 * appending #x20 to the normalized value, except that only a single 7068 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7069 * parsed entity or the literal entity value of an internal parsed entity 7070 * - other characters are processed by appending them to the normalized value 7071 * If the declared value is not CDATA, then the XML processor must further 7072 * process the normalized attribute value by discarding any leading and 7073 * trailing space (#x20) characters, and by replacing sequences of space 7074 * (#x20) characters by a single space (#x20) character. 7075 * All attributes for which no declaration has been read should be treated 7076 * by a non-validating parser as if declared CDATA. 7077 * 7078 * Returns the AttValue parsed or NULL. The value has to be freed by the 7079 * caller if it was copied, this can be detected by val[*len] == 0. 7080 */ 7081 7082static xmlChar * 7083xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7084 int normalize) 7085{ 7086 xmlChar limit = 0; 7087 const xmlChar *in = NULL, *start, *end, *last; 7088 xmlChar *ret = NULL; 7089 7090 GROW; 7091 in = (xmlChar *) CUR_PTR; 7092 if (*in != '"' && *in != '\'') { 7093 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7094 return (NULL); 7095 } 7096 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7097 7098 /* 7099 * try to handle in this routine the most common case where no 7100 * allocation of a new string is required and where content is 7101 * pure ASCII. 7102 */ 7103 limit = *in++; 7104 end = ctxt->input->end; 7105 start = in; 7106 if (in >= end) { 7107 const xmlChar *oldbase = ctxt->input->base; 7108 GROW; 7109 if (oldbase != ctxt->input->base) { 7110 long delta = ctxt->input->base - oldbase; 7111 start = start + delta; 7112 in = in + delta; 7113 } 7114 end = ctxt->input->end; 7115 } 7116 if (normalize) { 7117 /* 7118 * Skip any leading spaces 7119 */ 7120 while ((in < end) && (*in != limit) && 7121 ((*in == 0x20) || (*in == 0x9) || 7122 (*in == 0xA) || (*in == 0xD))) { 7123 in++; 7124 start = in; 7125 if (in >= end) { 7126 const xmlChar *oldbase = ctxt->input->base; 7127 GROW; 7128 if (oldbase != ctxt->input->base) { 7129 long delta = ctxt->input->base - oldbase; 7130 start = start + delta; 7131 in = in + delta; 7132 } 7133 end = ctxt->input->end; 7134 } 7135 } 7136 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7137 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7138 if ((*in++ == 0x20) && (*in == 0x20)) break; 7139 if (in >= end) { 7140 const xmlChar *oldbase = ctxt->input->base; 7141 GROW; 7142 if (oldbase != ctxt->input->base) { 7143 long delta = ctxt->input->base - oldbase; 7144 start = start + delta; 7145 in = in + delta; 7146 } 7147 end = ctxt->input->end; 7148 } 7149 } 7150 last = in; 7151 /* 7152 * skip the trailing blanks 7153 */ 7154 while ((last[-1] == 0x20) && (last > start)) last--; 7155 while ((in < end) && (*in != limit) && 7156 ((*in == 0x20) || (*in == 0x9) || 7157 (*in == 0xA) || (*in == 0xD))) { 7158 in++; 7159 if (in >= end) { 7160 const xmlChar *oldbase = ctxt->input->base; 7161 GROW; 7162 if (oldbase != ctxt->input->base) { 7163 long delta = ctxt->input->base - oldbase; 7164 start = start + delta; 7165 in = in + delta; 7166 last = last + delta; 7167 } 7168 end = ctxt->input->end; 7169 } 7170 } 7171 if (*in != limit) goto need_complex; 7172 } else { 7173 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7174 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7175 in++; 7176 if (in >= end) { 7177 const xmlChar *oldbase = ctxt->input->base; 7178 GROW; 7179 if (oldbase != ctxt->input->base) { 7180 long delta = ctxt->input->base - oldbase; 7181 start = start + delta; 7182 in = in + delta; 7183 } 7184 end = ctxt->input->end; 7185 } 7186 } 7187 last = in; 7188 if (*in != limit) goto need_complex; 7189 } 7190 in++; 7191 if (len != NULL) { 7192 *len = last - start; 7193 ret = (xmlChar *) start; 7194 } else { 7195 if (alloc) *alloc = 1; 7196 ret = xmlStrndup(start, last - start); 7197 } 7198 CUR_PTR = in; 7199 if (alloc) *alloc = 0; 7200 return ret; 7201need_complex: 7202 if (alloc) *alloc = 1; 7203 return xmlParseAttValueComplex(ctxt, len, normalize); 7204} 7205 7206/** 7207 * xmlParseAttribute2: 7208 * @ctxt: an XML parser context 7209 * @pref: the element prefix 7210 * @elem: the element name 7211 * @prefix: a xmlChar ** used to store the value of the attribute prefix 7212 * @value: a xmlChar ** used to store the value of the attribute 7213 * @len: an int * to save the length of the attribute 7214 * @alloc: an int * to indicate if the attribute was allocated 7215 * 7216 * parse an attribute in the new SAX2 framework. 7217 * 7218 * Returns the attribute name, and the value in *value, . 7219 */ 7220 7221static const xmlChar * 7222xmlParseAttribute2(xmlParserCtxtPtr ctxt, 7223 const xmlChar *pref, const xmlChar *elem, 7224 const xmlChar **prefix, xmlChar **value, 7225 int *len, int *alloc) { 7226 const xmlChar *name; 7227 xmlChar *val; 7228 int normalize = 0; 7229 7230 *value = NULL; 7231 GROW; 7232 name = xmlParseQName(ctxt, prefix); 7233 if (name == NULL) { 7234 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7235 "error parsing attribute name\n"); 7236 return(NULL); 7237 } 7238 7239 /* 7240 * get the type if needed 7241 */ 7242 if (ctxt->attsSpecial != NULL) { 7243 int type; 7244 7245 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 7246 pref, elem, *prefix, name); 7247 if (type != 0) normalize = 1; 7248 } 7249 7250 /* 7251 * read the value 7252 */ 7253 SKIP_BLANKS; 7254 if (RAW == '=') { 7255 NEXT; 7256 SKIP_BLANKS; 7257 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 7258 ctxt->instate = XML_PARSER_CONTENT; 7259 } else { 7260 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7261 "Specification mandate value for attribute %s\n", name); 7262 return(NULL); 7263 } 7264 7265 /* 7266 * Check that xml:lang conforms to the specification 7267 * No more registered as an error, just generate a warning now 7268 * since this was deprecated in XML second edition 7269 */ 7270 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7271 if (!xmlCheckLanguageID(val)) { 7272 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7273 "Malformed value for xml:lang : %s\n", 7274 val, NULL); 7275 } 7276 } 7277 7278 /* 7279 * Check that xml:space conforms to the specification 7280 */ 7281 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7282 if (xmlStrEqual(val, BAD_CAST "default")) 7283 *(ctxt->space) = 0; 7284 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7285 *(ctxt->space) = 1; 7286 else { 7287 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7288"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7289 val); 7290 } 7291 } 7292 7293 *value = val; 7294 return(name); 7295} 7296 7297/** 7298 * xmlParseStartTag2: 7299 * @ctxt: an XML parser context 7300 * 7301 * parse a start of tag either for rule element or 7302 * EmptyElement. In both case we don't parse the tag closing chars. 7303 * This routine is called when running SAX2 parsing 7304 * 7305 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7306 * 7307 * [ WFC: Unique Att Spec ] 7308 * No attribute name may appear more than once in the same start-tag or 7309 * empty-element tag. 7310 * 7311 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7312 * 7313 * [ WFC: Unique Att Spec ] 7314 * No attribute name may appear more than once in the same start-tag or 7315 * empty-element tag. 7316 * 7317 * With namespace: 7318 * 7319 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7320 * 7321 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7322 * 7323 * Returns the element name parsed 7324 */ 7325 7326static const xmlChar * 7327xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 7328 const xmlChar **URI, int *tlen) { 7329 const xmlChar *localname; 7330 const xmlChar *prefix; 7331 const xmlChar *attname; 7332 const xmlChar *aprefix; 7333 const xmlChar *nsname; 7334 xmlChar *attvalue; 7335 const xmlChar **atts = ctxt->atts; 7336 int maxatts = ctxt->maxatts; 7337 int nratts, nbatts, nbdef; 7338 int i, j, nbNs, attval; 7339 const xmlChar *base; 7340 unsigned long cur; 7341 7342 if (RAW != '<') return(NULL); 7343 NEXT1; 7344 7345 /* 7346 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 7347 * point since the attribute values may be stored as pointers to 7348 * the buffer and calling SHRINK would destroy them ! 7349 * The Shrinking is only possible once the full set of attribute 7350 * callbacks have been done. 7351 */ 7352reparse: 7353 SHRINK; 7354 base = ctxt->input->base; 7355 cur = ctxt->input->cur - ctxt->input->base; 7356 nbatts = 0; 7357 nratts = 0; 7358 nbdef = 0; 7359 nbNs = 0; 7360 attval = 0; 7361 7362 localname = xmlParseQName(ctxt, &prefix); 7363 if (localname == NULL) { 7364 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7365 "StartTag: invalid element name\n"); 7366 return(NULL); 7367 } 7368 *tlen = ctxt->input->cur - ctxt->input->base - cur; 7369 7370 /* 7371 * Now parse the attributes, it ends up with the ending 7372 * 7373 * (S Attribute)* S? 7374 */ 7375 SKIP_BLANKS; 7376 GROW; 7377 if (ctxt->input->base != base) goto base_changed; 7378 7379 while ((RAW != '>') && 7380 ((RAW != '/') || (NXT(1) != '>')) && 7381 (IS_BYTE_CHAR(RAW))) { 7382 const xmlChar *q = CUR_PTR; 7383 unsigned int cons = ctxt->input->consumed; 7384 int len = -1, alloc = 0; 7385 7386 attname = xmlParseAttribute2(ctxt, prefix, localname, 7387 &aprefix, &attvalue, &len, &alloc); 7388 if ((attname != NULL) && (attvalue != NULL)) { 7389 if (len < 0) len = xmlStrlen(attvalue); 7390 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7391 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7392 xmlURIPtr uri; 7393 7394 if (*URL != 0) { 7395 uri = xmlParseURI((const char *) URL); 7396 if (uri == NULL) { 7397 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7398 "xmlns: %s not a valid URI\n", 7399 URL, NULL); 7400 } else { 7401 if (uri->scheme == NULL) { 7402 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7403 "xmlns: URI %s is not absolute\n", 7404 URL, NULL); 7405 } 7406 xmlFreeURI(uri); 7407 } 7408 } 7409 /* 7410 * check that it's not a defined namespace 7411 */ 7412 for (j = 1;j <= nbNs;j++) 7413 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7414 break; 7415 if (j <= nbNs) 7416 xmlErrAttributeDup(ctxt, NULL, attname); 7417 else 7418 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 7419 if (alloc != 0) xmlFree(attvalue); 7420 SKIP_BLANKS; 7421 continue; 7422 } 7423 if (aprefix == ctxt->str_xmlns) { 7424 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7425 xmlURIPtr uri; 7426 7427 if (attname == ctxt->str_xml) { 7428 if (URL != ctxt->str_xml_ns) { 7429 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 7430 "xml namespace prefix mapped to wrong URI\n", 7431 NULL, NULL, NULL); 7432 } 7433 /* 7434 * Do not keep a namespace definition node 7435 */ 7436 if (alloc != 0) xmlFree(attvalue); 7437 SKIP_BLANKS; 7438 continue; 7439 } 7440 uri = xmlParseURI((const char *) URL); 7441 if (uri == NULL) { 7442 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7443 "xmlns:%s: '%s' is not a valid URI\n", 7444 attname, URL); 7445 } else { 7446 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 7447 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7448 "xmlns:%s: URI %s is not absolute\n", 7449 attname, URL); 7450 } 7451 xmlFreeURI(uri); 7452 } 7453 7454 /* 7455 * check that it's not a defined namespace 7456 */ 7457 for (j = 1;j <= nbNs;j++) 7458 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7459 break; 7460 if (j <= nbNs) 7461 xmlErrAttributeDup(ctxt, aprefix, attname); 7462 else 7463 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 7464 if (alloc != 0) xmlFree(attvalue); 7465 SKIP_BLANKS; 7466 if (ctxt->input->base != base) goto base_changed; 7467 continue; 7468 } 7469 7470 /* 7471 * Add the pair to atts 7472 */ 7473 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7474 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7475 if (attvalue[len] == 0) 7476 xmlFree(attvalue); 7477 goto failed; 7478 } 7479 maxatts = ctxt->maxatts; 7480 atts = ctxt->atts; 7481 } 7482 ctxt->attallocs[nratts++] = alloc; 7483 atts[nbatts++] = attname; 7484 atts[nbatts++] = aprefix; 7485 atts[nbatts++] = NULL; /* the URI will be fetched later */ 7486 atts[nbatts++] = attvalue; 7487 attvalue += len; 7488 atts[nbatts++] = attvalue; 7489 /* 7490 * tag if some deallocation is needed 7491 */ 7492 if (alloc != 0) attval = 1; 7493 } else { 7494 if ((attvalue != NULL) && (attvalue[len] == 0)) 7495 xmlFree(attvalue); 7496 } 7497 7498failed: 7499 7500 GROW 7501 if (ctxt->input->base != base) goto base_changed; 7502 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7503 break; 7504 if (!IS_BLANK_CH(RAW)) { 7505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7506 "attributes construct error\n"); 7507 break; 7508 } 7509 SKIP_BLANKS; 7510 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7511 (attname == NULL) && (attvalue == NULL)) { 7512 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7513 "xmlParseStartTag: problem parsing attributes\n"); 7514 break; 7515 } 7516 GROW; 7517 if (ctxt->input->base != base) goto base_changed; 7518 } 7519 7520 /* 7521 * The attributes defaulting 7522 */ 7523 if (ctxt->attsDefault != NULL) { 7524 xmlDefAttrsPtr defaults; 7525 7526 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 7527 if (defaults != NULL) { 7528 for (i = 0;i < defaults->nbAttrs;i++) { 7529 attname = defaults->values[4 * i]; 7530 aprefix = defaults->values[4 * i + 1]; 7531 7532 /* 7533 * special work for namespaces defaulted defs 7534 */ 7535 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7536 /* 7537 * check that it's not a defined namespace 7538 */ 7539 for (j = 1;j <= nbNs;j++) 7540 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7541 break; 7542 if (j <= nbNs) continue; 7543 7544 nsname = xmlGetNamespace(ctxt, NULL); 7545 if (nsname != defaults->values[4 * i + 2]) { 7546 if (nsPush(ctxt, NULL, 7547 defaults->values[4 * i + 2]) > 0) 7548 nbNs++; 7549 } 7550 } else if (aprefix == ctxt->str_xmlns) { 7551 /* 7552 * check that it's not a defined namespace 7553 */ 7554 for (j = 1;j <= nbNs;j++) 7555 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 7556 break; 7557 if (j <= nbNs) continue; 7558 7559 nsname = xmlGetNamespace(ctxt, attname); 7560 if (nsname != defaults->values[2]) { 7561 if (nsPush(ctxt, attname, 7562 defaults->values[4 * i + 2]) > 0) 7563 nbNs++; 7564 } 7565 } else { 7566 /* 7567 * check that it's not a defined attribute 7568 */ 7569 for (j = 0;j < nbatts;j+=5) { 7570 if ((attname == atts[j]) && (aprefix == atts[j+1])) 7571 break; 7572 } 7573 if (j < nbatts) continue; 7574 7575 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 7576 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 7577 return(NULL); 7578 } 7579 maxatts = ctxt->maxatts; 7580 atts = ctxt->atts; 7581 } 7582 atts[nbatts++] = attname; 7583 atts[nbatts++] = aprefix; 7584 if (aprefix == NULL) 7585 atts[nbatts++] = NULL; 7586 else 7587 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 7588 atts[nbatts++] = defaults->values[4 * i + 2]; 7589 atts[nbatts++] = defaults->values[4 * i + 3]; 7590 nbdef++; 7591 } 7592 } 7593 } 7594 } 7595 7596 /* 7597 * The attributes checkings 7598 */ 7599 for (i = 0; i < nbatts;i += 5) { 7600 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 7601 if ((atts[i + 1] != NULL) && (nsname == NULL)) { 7602 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7603 "Namespace prefix %s for %s on %s is not defined\n", 7604 atts[i + 1], atts[i], localname); 7605 } 7606 atts[i + 2] = nsname; 7607 /* 7608 * [ WFC: Unique Att Spec ] 7609 * No attribute name may appear more than once in the same 7610 * start-tag or empty-element tag. 7611 * As extended by the Namespace in XML REC. 7612 */ 7613 for (j = 0; j < i;j += 5) { 7614 if (atts[i] == atts[j]) { 7615 if (atts[i+1] == atts[j+1]) { 7616 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 7617 break; 7618 } 7619 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 7620 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 7621 "Namespaced Attribute %s in '%s' redefined\n", 7622 atts[i], nsname, NULL); 7623 break; 7624 } 7625 } 7626 } 7627 } 7628 7629 nsname = xmlGetNamespace(ctxt, prefix); 7630 if ((prefix != NULL) && (nsname == NULL)) { 7631 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 7632 "Namespace prefix %s on %s is not defined\n", 7633 prefix, localname, NULL); 7634 } 7635 *pref = prefix; 7636 *URI = nsname; 7637 7638 /* 7639 * SAX: Start of Element ! 7640 */ 7641 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 7642 (!ctxt->disableSAX)) { 7643 if (nbNs > 0) 7644 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7645 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 7646 nbatts / 5, nbdef, atts); 7647 else 7648 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 7649 nsname, 0, NULL, nbatts / 5, nbdef, atts); 7650 } 7651 7652 /* 7653 * Free up attribute allocated strings if needed 7654 */ 7655 if (attval != 0) { 7656 for (i = 3,j = 0; j < nratts;i += 5,j++) 7657 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7658 xmlFree((xmlChar *) atts[i]); 7659 } 7660 7661 return(localname); 7662 7663base_changed: 7664 /* 7665 * the attribute strings are valid iif the base didn't changed 7666 */ 7667 if (attval != 0) { 7668 for (i = 3,j = 0; j < nratts;i += 5,j++) 7669 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 7670 xmlFree((xmlChar *) atts[i]); 7671 } 7672 ctxt->input->cur = ctxt->input->base + cur; 7673 if (ctxt->wellFormed == 1) { 7674 goto reparse; 7675 } 7676 return(NULL); 7677} 7678 7679/** 7680 * xmlParseEndTag2: 7681 * @ctxt: an XML parser context 7682 * @line: line of the start tag 7683 * @nsNr: number of namespaces on the start tag 7684 * 7685 * parse an end of tag 7686 * 7687 * [42] ETag ::= '</' Name S? '>' 7688 * 7689 * With namespace 7690 * 7691 * [NS 9] ETag ::= '</' QName S? '>' 7692 */ 7693 7694static void 7695xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 7696 const xmlChar *URI, int line, int nsNr, int tlen) { 7697 const xmlChar *name; 7698 7699 GROW; 7700 if ((RAW != '<') || (NXT(1) != '/')) { 7701 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 7702 return; 7703 } 7704 SKIP(2); 7705 7706 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 7707 if (ctxt->input->cur[tlen] == '>') { 7708 ctxt->input->cur += tlen + 1; 7709 goto done; 7710 } 7711 ctxt->input->cur += tlen; 7712 name = (xmlChar*)1; 7713 } else { 7714 if (prefix == NULL) 7715 name = xmlParseNameAndCompare(ctxt, ctxt->name); 7716 else 7717 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 7718 } 7719 7720 /* 7721 * We should definitely be at the ending "S? '>'" part 7722 */ 7723 GROW; 7724 SKIP_BLANKS; 7725 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7726 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7727 } else 7728 NEXT1; 7729 7730 /* 7731 * [ WFC: Element Type Match ] 7732 * The Name in an element's end-tag must match the element type in the 7733 * start-tag. 7734 * 7735 */ 7736 if (name != (xmlChar*)1) { 7737 if (name == NULL) name = BAD_CAST "unparseable"; 7738 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7739 "Opening and ending tag mismatch: %s line %d and %s\n", 7740 ctxt->name, line, name); 7741 } 7742 7743 /* 7744 * SAX: End of Tag 7745 */ 7746done: 7747 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 7748 (!ctxt->disableSAX)) 7749 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 7750 7751 spacePop(ctxt); 7752 if (nsNr != 0) 7753 nsPop(ctxt, nsNr); 7754 return; 7755} 7756 7757/** 7758 * xmlParseCDSect: 7759 * @ctxt: an XML parser context 7760 * 7761 * Parse escaped pure raw content. 7762 * 7763 * [18] CDSect ::= CDStart CData CDEnd 7764 * 7765 * [19] CDStart ::= '<![CDATA[' 7766 * 7767 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 7768 * 7769 * [21] CDEnd ::= ']]>' 7770 */ 7771void 7772xmlParseCDSect(xmlParserCtxtPtr ctxt) { 7773 xmlChar *buf = NULL; 7774 int len = 0; 7775 int size = XML_PARSER_BUFFER_SIZE; 7776 int r, rl; 7777 int s, sl; 7778 int cur, l; 7779 int count = 0; 7780 7781 /* Check 2.6.0 was NXT(0) not RAW */ 7782 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 7783 SKIP(9); 7784 } else 7785 return; 7786 7787 ctxt->instate = XML_PARSER_CDATA_SECTION; 7788 r = CUR_CHAR(rl); 7789 if (!IS_CHAR(r)) { 7790 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7791 ctxt->instate = XML_PARSER_CONTENT; 7792 return; 7793 } 7794 NEXTL(rl); 7795 s = CUR_CHAR(sl); 7796 if (!IS_CHAR(s)) { 7797 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 7798 ctxt->instate = XML_PARSER_CONTENT; 7799 return; 7800 } 7801 NEXTL(sl); 7802 cur = CUR_CHAR(l); 7803 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 7804 if (buf == NULL) { 7805 xmlErrMemory(ctxt, NULL); 7806 return; 7807 } 7808 while (IS_CHAR(cur) && 7809 ((r != ']') || (s != ']') || (cur != '>'))) { 7810 if (len + 5 >= size) { 7811 xmlChar *tmp; 7812 7813 size *= 2; 7814 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 7815 if (tmp == NULL) { 7816 xmlFree(buf); 7817 xmlErrMemory(ctxt, NULL); 7818 return; 7819 } 7820 buf = tmp; 7821 } 7822 COPY_BUF(rl,buf,len,r); 7823 r = s; 7824 rl = sl; 7825 s = cur; 7826 sl = l; 7827 count++; 7828 if (count > 50) { 7829 GROW; 7830 count = 0; 7831 } 7832 NEXTL(l); 7833 cur = CUR_CHAR(l); 7834 } 7835 buf[len] = 0; 7836 ctxt->instate = XML_PARSER_CONTENT; 7837 if (cur != '>') { 7838 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 7839 "CData section not finished\n%.50s\n", buf); 7840 xmlFree(buf); 7841 return; 7842 } 7843 NEXTL(l); 7844 7845 /* 7846 * OK the buffer is to be consumed as cdata. 7847 */ 7848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 7849 if (ctxt->sax->cdataBlock != NULL) 7850 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 7851 else if (ctxt->sax->characters != NULL) 7852 ctxt->sax->characters(ctxt->userData, buf, len); 7853 } 7854 xmlFree(buf); 7855} 7856 7857/** 7858 * xmlParseContent: 7859 * @ctxt: an XML parser context 7860 * 7861 * Parse a content: 7862 * 7863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 7864 */ 7865 7866void 7867xmlParseContent(xmlParserCtxtPtr ctxt) { 7868 GROW; 7869 while ((RAW != 0) && 7870 ((RAW != '<') || (NXT(1) != '/'))) { 7871 const xmlChar *test = CUR_PTR; 7872 unsigned int cons = ctxt->input->consumed; 7873 const xmlChar *cur = ctxt->input->cur; 7874 7875 /* 7876 * First case : a Processing Instruction. 7877 */ 7878 if ((*cur == '<') && (cur[1] == '?')) { 7879 xmlParsePI(ctxt); 7880 } 7881 7882 /* 7883 * Second case : a CDSection 7884 */ 7885 /* 2.6.0 test was *cur not RAW */ 7886 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 7887 xmlParseCDSect(ctxt); 7888 } 7889 7890 /* 7891 * Third case : a comment 7892 */ 7893 else if ((*cur == '<') && (NXT(1) == '!') && 7894 (NXT(2) == '-') && (NXT(3) == '-')) { 7895 xmlParseComment(ctxt); 7896 ctxt->instate = XML_PARSER_CONTENT; 7897 } 7898 7899 /* 7900 * Fourth case : a sub-element. 7901 */ 7902 else if (*cur == '<') { 7903 xmlParseElement(ctxt); 7904 } 7905 7906 /* 7907 * Fifth case : a reference. If if has not been resolved, 7908 * parsing returns it's Name, create the node 7909 */ 7910 7911 else if (*cur == '&') { 7912 xmlParseReference(ctxt); 7913 } 7914 7915 /* 7916 * Last case, text. Note that References are handled directly. 7917 */ 7918 else { 7919 xmlParseCharData(ctxt, 0); 7920 } 7921 7922 GROW; 7923 /* 7924 * Pop-up of finished entities. 7925 */ 7926 while ((RAW == 0) && (ctxt->inputNr > 1)) 7927 xmlPopInput(ctxt); 7928 SHRINK; 7929 7930 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 7931 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7932 "detected an error in element content\n"); 7933 ctxt->instate = XML_PARSER_EOF; 7934 break; 7935 } 7936 } 7937} 7938 7939/** 7940 * xmlParseElement: 7941 * @ctxt: an XML parser context 7942 * 7943 * parse an XML element, this is highly recursive 7944 * 7945 * [39] element ::= EmptyElemTag | STag content ETag 7946 * 7947 * [ WFC: Element Type Match ] 7948 * The Name in an element's end-tag must match the element type in the 7949 * start-tag. 7950 * 7951 */ 7952 7953void 7954xmlParseElement(xmlParserCtxtPtr ctxt) { 7955 const xmlChar *name; 7956 const xmlChar *prefix; 7957 const xmlChar *URI; 7958 xmlParserNodeInfo node_info; 7959 int line, tlen; 7960 xmlNodePtr ret; 7961 int nsNr = ctxt->nsNr; 7962 7963 /* Capture start position */ 7964 if (ctxt->record_info) { 7965 node_info.begin_pos = ctxt->input->consumed + 7966 (CUR_PTR - ctxt->input->base); 7967 node_info.begin_line = ctxt->input->line; 7968 } 7969 7970 if (ctxt->spaceNr == 0) 7971 spacePush(ctxt, -1); 7972 else 7973 spacePush(ctxt, *ctxt->space); 7974 7975 line = ctxt->input->line; 7976#ifdef LIBXML_SAX1_ENABLED 7977 if (ctxt->sax2) 7978#endif /* LIBXML_SAX1_ENABLED */ 7979 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 7980#ifdef LIBXML_SAX1_ENABLED 7981 else 7982 name = xmlParseStartTag(ctxt); 7983#endif /* LIBXML_SAX1_ENABLED */ 7984 if (name == NULL) { 7985 spacePop(ctxt); 7986 return; 7987 } 7988 namePush(ctxt, name); 7989 ret = ctxt->node; 7990 7991#ifdef LIBXML_VALID_ENABLED 7992 /* 7993 * [ VC: Root Element Type ] 7994 * The Name in the document type declaration must match the element 7995 * type of the root element. 7996 */ 7997 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7998 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 7999 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8000#endif /* LIBXML_VALID_ENABLED */ 8001 8002 /* 8003 * Check for an Empty Element. 8004 */ 8005 if ((RAW == '/') && (NXT(1) == '>')) { 8006 SKIP(2); 8007 if (ctxt->sax2) { 8008 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8009 (!ctxt->disableSAX)) 8010 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8011#ifdef LIBXML_SAX1_ENABLED 8012 } else { 8013 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8014 (!ctxt->disableSAX)) 8015 ctxt->sax->endElement(ctxt->userData, name); 8016#endif /* LIBXML_SAX1_ENABLED */ 8017 } 8018 namePop(ctxt); 8019 spacePop(ctxt); 8020 if (nsNr != ctxt->nsNr) 8021 nsPop(ctxt, ctxt->nsNr - nsNr); 8022 if ( ret != NULL && ctxt->record_info ) { 8023 node_info.end_pos = ctxt->input->consumed + 8024 (CUR_PTR - ctxt->input->base); 8025 node_info.end_line = ctxt->input->line; 8026 node_info.node = ret; 8027 xmlParserAddNodeInfo(ctxt, &node_info); 8028 } 8029 return; 8030 } 8031 if (RAW == '>') { 8032 NEXT1; 8033 } else { 8034 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 8035 "Couldn't find end of Start Tag %s line %d\n", 8036 name, line, NULL); 8037 8038 /* 8039 * end of parsing of this node. 8040 */ 8041 nodePop(ctxt); 8042 namePop(ctxt); 8043 spacePop(ctxt); 8044 if (nsNr != ctxt->nsNr) 8045 nsPop(ctxt, ctxt->nsNr - nsNr); 8046 8047 /* 8048 * Capture end position and add node 8049 */ 8050 if ( ret != NULL && ctxt->record_info ) { 8051 node_info.end_pos = ctxt->input->consumed + 8052 (CUR_PTR - ctxt->input->base); 8053 node_info.end_line = ctxt->input->line; 8054 node_info.node = ret; 8055 xmlParserAddNodeInfo(ctxt, &node_info); 8056 } 8057 return; 8058 } 8059 8060 /* 8061 * Parse the content of the element: 8062 */ 8063 xmlParseContent(ctxt); 8064 if (!IS_BYTE_CHAR(RAW)) { 8065 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 8066 "Premature end of data in tag %s line %d\n", 8067 name, line, NULL); 8068 8069 /* 8070 * end of parsing of this node. 8071 */ 8072 nodePop(ctxt); 8073 namePop(ctxt); 8074 spacePop(ctxt); 8075 if (nsNr != ctxt->nsNr) 8076 nsPop(ctxt, ctxt->nsNr - nsNr); 8077 return; 8078 } 8079 8080 /* 8081 * parse the end of tag: '</' should be here. 8082 */ 8083 if (ctxt->sax2) { 8084 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 8085 namePop(ctxt); 8086 } 8087#ifdef LIBXML_SAX1_ENABLED 8088 else 8089 xmlParseEndTag1(ctxt, line); 8090#endif /* LIBXML_SAX1_ENABLED */ 8091 8092 /* 8093 * Capture end position and add node 8094 */ 8095 if ( ret != NULL && ctxt->record_info ) { 8096 node_info.end_pos = ctxt->input->consumed + 8097 (CUR_PTR - ctxt->input->base); 8098 node_info.end_line = ctxt->input->line; 8099 node_info.node = ret; 8100 xmlParserAddNodeInfo(ctxt, &node_info); 8101 } 8102} 8103 8104/** 8105 * xmlParseVersionNum: 8106 * @ctxt: an XML parser context 8107 * 8108 * parse the XML version value. 8109 * 8110 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 8111 * 8112 * Returns the string giving the XML version number, or NULL 8113 */ 8114xmlChar * 8115xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 8116 xmlChar *buf = NULL; 8117 int len = 0; 8118 int size = 10; 8119 xmlChar cur; 8120 8121 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8122 if (buf == NULL) { 8123 xmlErrMemory(ctxt, NULL); 8124 return(NULL); 8125 } 8126 cur = CUR; 8127 while (((cur >= 'a') && (cur <= 'z')) || 8128 ((cur >= 'A') && (cur <= 'Z')) || 8129 ((cur >= '0') && (cur <= '9')) || 8130 (cur == '_') || (cur == '.') || 8131 (cur == ':') || (cur == '-')) { 8132 if (len + 1 >= size) { 8133 xmlChar *tmp; 8134 8135 size *= 2; 8136 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8137 if (tmp == NULL) { 8138 xmlErrMemory(ctxt, NULL); 8139 return(NULL); 8140 } 8141 buf = tmp; 8142 } 8143 buf[len++] = cur; 8144 NEXT; 8145 cur=CUR; 8146 } 8147 buf[len] = 0; 8148 return(buf); 8149} 8150 8151/** 8152 * xmlParseVersionInfo: 8153 * @ctxt: an XML parser context 8154 * 8155 * parse the XML version. 8156 * 8157 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 8158 * 8159 * [25] Eq ::= S? '=' S? 8160 * 8161 * Returns the version string, e.g. "1.0" 8162 */ 8163 8164xmlChar * 8165xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 8166 xmlChar *version = NULL; 8167 8168 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 8169 SKIP(7); 8170 SKIP_BLANKS; 8171 if (RAW != '=') { 8172 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8173 return(NULL); 8174 } 8175 NEXT; 8176 SKIP_BLANKS; 8177 if (RAW == '"') { 8178 NEXT; 8179 version = xmlParseVersionNum(ctxt); 8180 if (RAW != '"') { 8181 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8182 } else 8183 NEXT; 8184 } else if (RAW == '\''){ 8185 NEXT; 8186 version = xmlParseVersionNum(ctxt); 8187 if (RAW != '\'') { 8188 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8189 } else 8190 NEXT; 8191 } else { 8192 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8193 } 8194 } 8195 return(version); 8196} 8197 8198/** 8199 * xmlParseEncName: 8200 * @ctxt: an XML parser context 8201 * 8202 * parse the XML encoding name 8203 * 8204 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 8205 * 8206 * Returns the encoding name value or NULL 8207 */ 8208xmlChar * 8209xmlParseEncName(xmlParserCtxtPtr ctxt) { 8210 xmlChar *buf = NULL; 8211 int len = 0; 8212 int size = 10; 8213 xmlChar cur; 8214 8215 cur = CUR; 8216 if (((cur >= 'a') && (cur <= 'z')) || 8217 ((cur >= 'A') && (cur <= 'Z'))) { 8218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8219 if (buf == NULL) { 8220 xmlErrMemory(ctxt, NULL); 8221 return(NULL); 8222 } 8223 8224 buf[len++] = cur; 8225 NEXT; 8226 cur = CUR; 8227 while (((cur >= 'a') && (cur <= 'z')) || 8228 ((cur >= 'A') && (cur <= 'Z')) || 8229 ((cur >= '0') && (cur <= '9')) || 8230 (cur == '.') || (cur == '_') || 8231 (cur == '-')) { 8232 if (len + 1 >= size) { 8233 xmlChar *tmp; 8234 8235 size *= 2; 8236 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8237 if (tmp == NULL) { 8238 xmlErrMemory(ctxt, NULL); 8239 xmlFree(buf); 8240 return(NULL); 8241 } 8242 buf = tmp; 8243 } 8244 buf[len++] = cur; 8245 NEXT; 8246 cur = CUR; 8247 if (cur == 0) { 8248 SHRINK; 8249 GROW; 8250 cur = CUR; 8251 } 8252 } 8253 buf[len] = 0; 8254 } else { 8255 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 8256 } 8257 return(buf); 8258} 8259 8260/** 8261 * xmlParseEncodingDecl: 8262 * @ctxt: an XML parser context 8263 * 8264 * parse the XML encoding declaration 8265 * 8266 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 8267 * 8268 * this setups the conversion filters. 8269 * 8270 * Returns the encoding value or NULL 8271 */ 8272 8273const xmlChar * 8274xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 8275 xmlChar *encoding = NULL; 8276 8277 SKIP_BLANKS; 8278 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 8279 SKIP(8); 8280 SKIP_BLANKS; 8281 if (RAW != '=') { 8282 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8283 return(NULL); 8284 } 8285 NEXT; 8286 SKIP_BLANKS; 8287 if (RAW == '"') { 8288 NEXT; 8289 encoding = xmlParseEncName(ctxt); 8290 if (RAW != '"') { 8291 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8292 } else 8293 NEXT; 8294 } else if (RAW == '\''){ 8295 NEXT; 8296 encoding = xmlParseEncName(ctxt); 8297 if (RAW != '\'') { 8298 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8299 } else 8300 NEXT; 8301 } else { 8302 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8303 } 8304 /* 8305 * UTF-16 encoding stwich has already taken place at this stage, 8306 * more over the little-endian/big-endian selection is already done 8307 */ 8308 if ((encoding != NULL) && 8309 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 8310 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 8311 if (ctxt->encoding != NULL) 8312 xmlFree((xmlChar *) ctxt->encoding); 8313 ctxt->encoding = encoding; 8314 } 8315 /* 8316 * UTF-8 encoding is handled natively 8317 */ 8318 else if ((encoding != NULL) && 8319 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 8320 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 8321 if (ctxt->encoding != NULL) 8322 xmlFree((xmlChar *) ctxt->encoding); 8323 ctxt->encoding = encoding; 8324 } 8325 else if (encoding != NULL) { 8326 xmlCharEncodingHandlerPtr handler; 8327 8328 if (ctxt->input->encoding != NULL) 8329 xmlFree((xmlChar *) ctxt->input->encoding); 8330 ctxt->input->encoding = encoding; 8331 8332 handler = xmlFindCharEncodingHandler((const char *) encoding); 8333 if (handler != NULL) { 8334 xmlSwitchToEncoding(ctxt, handler); 8335 } else { 8336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 8337 "Unsupported encoding %s\n", encoding); 8338 return(NULL); 8339 } 8340 } 8341 } 8342 return(encoding); 8343} 8344 8345/** 8346 * xmlParseSDDecl: 8347 * @ctxt: an XML parser context 8348 * 8349 * parse the XML standalone declaration 8350 * 8351 * [32] SDDecl ::= S 'standalone' Eq 8352 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 8353 * 8354 * [ VC: Standalone Document Declaration ] 8355 * TODO The standalone document declaration must have the value "no" 8356 * if any external markup declarations contain declarations of: 8357 * - attributes with default values, if elements to which these 8358 * attributes apply appear in the document without specifications 8359 * of values for these attributes, or 8360 * - entities (other than amp, lt, gt, apos, quot), if references 8361 * to those entities appear in the document, or 8362 * - attributes with values subject to normalization, where the 8363 * attribute appears in the document with a value which will change 8364 * as a result of normalization, or 8365 * - element types with element content, if white space occurs directly 8366 * within any instance of those types. 8367 * 8368 * Returns 1 if standalone, 0 otherwise 8369 */ 8370 8371int 8372xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 8373 int standalone = -1; 8374 8375 SKIP_BLANKS; 8376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 8377 SKIP(10); 8378 SKIP_BLANKS; 8379 if (RAW != '=') { 8380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8381 return(standalone); 8382 } 8383 NEXT; 8384 SKIP_BLANKS; 8385 if (RAW == '\''){ 8386 NEXT; 8387 if ((RAW == 'n') && (NXT(1) == 'o')) { 8388 standalone = 0; 8389 SKIP(2); 8390 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8391 (NXT(2) == 's')) { 8392 standalone = 1; 8393 SKIP(3); 8394 } else { 8395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8396 } 8397 if (RAW != '\'') { 8398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8399 } else 8400 NEXT; 8401 } else if (RAW == '"'){ 8402 NEXT; 8403 if ((RAW == 'n') && (NXT(1) == 'o')) { 8404 standalone = 0; 8405 SKIP(2); 8406 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8407 (NXT(2) == 's')) { 8408 standalone = 1; 8409 SKIP(3); 8410 } else { 8411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8412 } 8413 if (RAW != '"') { 8414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8415 } else 8416 NEXT; 8417 } else { 8418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8419 } 8420 } 8421 return(standalone); 8422} 8423 8424/** 8425 * xmlParseXMLDecl: 8426 * @ctxt: an XML parser context 8427 * 8428 * parse an XML declaration header 8429 * 8430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 8431 */ 8432 8433void 8434xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 8435 xmlChar *version; 8436 8437 /* 8438 * We know that '<?xml' is here. 8439 */ 8440 SKIP(5); 8441 8442 if (!IS_BLANK_CH(RAW)) { 8443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8444 "Blank needed after '<?xml'\n"); 8445 } 8446 SKIP_BLANKS; 8447 8448 /* 8449 * We must have the VersionInfo here. 8450 */ 8451 version = xmlParseVersionInfo(ctxt); 8452 if (version == NULL) { 8453 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 8454 } else { 8455 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 8456 /* 8457 * TODO: Blueberry should be detected here 8458 */ 8459 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 8460 "Unsupported version '%s'\n", 8461 version, NULL); 8462 } 8463 if (ctxt->version != NULL) 8464 xmlFree((void *) ctxt->version); 8465 ctxt->version = version; 8466 } 8467 8468 /* 8469 * We may have the encoding declaration 8470 */ 8471 if (!IS_BLANK_CH(RAW)) { 8472 if ((RAW == '?') && (NXT(1) == '>')) { 8473 SKIP(2); 8474 return; 8475 } 8476 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8477 } 8478 xmlParseEncodingDecl(ctxt); 8479 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8480 /* 8481 * The XML REC instructs us to stop parsing right here 8482 */ 8483 return; 8484 } 8485 8486 /* 8487 * We may have the standalone status. 8488 */ 8489 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 8490 if ((RAW == '?') && (NXT(1) == '>')) { 8491 SKIP(2); 8492 return; 8493 } 8494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 8495 } 8496 SKIP_BLANKS; 8497 ctxt->input->standalone = xmlParseSDDecl(ctxt); 8498 8499 SKIP_BLANKS; 8500 if ((RAW == '?') && (NXT(1) == '>')) { 8501 SKIP(2); 8502 } else if (RAW == '>') { 8503 /* Deprecated old WD ... */ 8504 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8505 NEXT; 8506 } else { 8507 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 8508 MOVETO_ENDTAG(CUR_PTR); 8509 NEXT; 8510 } 8511} 8512 8513/** 8514 * xmlParseMisc: 8515 * @ctxt: an XML parser context 8516 * 8517 * parse an XML Misc* optional field. 8518 * 8519 * [27] Misc ::= Comment | PI | S 8520 */ 8521 8522void 8523xmlParseMisc(xmlParserCtxtPtr ctxt) { 8524 while (((RAW == '<') && (NXT(1) == '?')) || 8525 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 8526 IS_BLANK_CH(CUR)) { 8527 if ((RAW == '<') && (NXT(1) == '?')) { 8528 xmlParsePI(ctxt); 8529 } else if (IS_BLANK_CH(CUR)) { 8530 NEXT; 8531 } else 8532 xmlParseComment(ctxt); 8533 } 8534} 8535 8536/** 8537 * xmlParseDocument: 8538 * @ctxt: an XML parser context 8539 * 8540 * parse an XML document (and build a tree if using the standard SAX 8541 * interface). 8542 * 8543 * [1] document ::= prolog element Misc* 8544 * 8545 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 8546 * 8547 * Returns 0, -1 in case of error. the parser context is augmented 8548 * as a result of the parsing. 8549 */ 8550 8551int 8552xmlParseDocument(xmlParserCtxtPtr ctxt) { 8553 xmlChar start[4]; 8554 xmlCharEncoding enc; 8555 8556 xmlInitParser(); 8557 8558 if ((ctxt == NULL) || (ctxt->input == NULL)) 8559 return(-1); 8560 8561 GROW; 8562 8563 /* 8564 * SAX: detecting the level. 8565 */ 8566 xmlDetectSAX2(ctxt); 8567 8568 /* 8569 * SAX: beginning of the document processing. 8570 */ 8571 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8572 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8573 8574 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 8575 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 8576 /* 8577 * Get the 4 first bytes and decode the charset 8578 * if enc != XML_CHAR_ENCODING_NONE 8579 * plug some encoding conversion routines. 8580 */ 8581 start[0] = RAW; 8582 start[1] = NXT(1); 8583 start[2] = NXT(2); 8584 start[3] = NXT(3); 8585 enc = xmlDetectCharEncoding(&start[0], 4); 8586 if (enc != XML_CHAR_ENCODING_NONE) { 8587 xmlSwitchEncoding(ctxt, enc); 8588 } 8589 } 8590 8591 8592 if (CUR == 0) { 8593 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8594 } 8595 8596 /* 8597 * Check for the XMLDecl in the Prolog. 8598 */ 8599 GROW; 8600 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8601 8602 /* 8603 * Note that we will switch encoding on the fly. 8604 */ 8605 xmlParseXMLDecl(ctxt); 8606 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8607 /* 8608 * The XML REC instructs us to stop parsing right here 8609 */ 8610 return(-1); 8611 } 8612 ctxt->standalone = ctxt->input->standalone; 8613 SKIP_BLANKS; 8614 } else { 8615 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8616 } 8617 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8618 ctxt->sax->startDocument(ctxt->userData); 8619 8620 /* 8621 * The Misc part of the Prolog 8622 */ 8623 GROW; 8624 xmlParseMisc(ctxt); 8625 8626 /* 8627 * Then possibly doc type declaration(s) and more Misc 8628 * (doctypedecl Misc*)? 8629 */ 8630 GROW; 8631 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 8632 8633 ctxt->inSubset = 1; 8634 xmlParseDocTypeDecl(ctxt); 8635 if (RAW == '[') { 8636 ctxt->instate = XML_PARSER_DTD; 8637 xmlParseInternalSubset(ctxt); 8638 } 8639 8640 /* 8641 * Create and update the external subset. 8642 */ 8643 ctxt->inSubset = 2; 8644 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 8645 (!ctxt->disableSAX)) 8646 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 8647 ctxt->extSubSystem, ctxt->extSubURI); 8648 ctxt->inSubset = 0; 8649 8650 8651 ctxt->instate = XML_PARSER_PROLOG; 8652 xmlParseMisc(ctxt); 8653 } 8654 8655 /* 8656 * Time to start parsing the tree itself 8657 */ 8658 GROW; 8659 if (RAW != '<') { 8660 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 8661 "Start tag expected, '<' not found\n"); 8662 } else { 8663 ctxt->instate = XML_PARSER_CONTENT; 8664 xmlParseElement(ctxt); 8665 ctxt->instate = XML_PARSER_EPILOG; 8666 8667 8668 /* 8669 * The Misc part at the end 8670 */ 8671 xmlParseMisc(ctxt); 8672 8673 if (RAW != 0) { 8674 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 8675 } 8676 ctxt->instate = XML_PARSER_EOF; 8677 } 8678 8679 /* 8680 * SAX: end of the document processing. 8681 */ 8682 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8683 ctxt->sax->endDocument(ctxt->userData); 8684 8685 /* 8686 * Remove locally kept entity definitions if the tree was not built 8687 */ 8688 if ((ctxt->myDoc != NULL) && 8689 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 8690 xmlFreeDoc(ctxt->myDoc); 8691 ctxt->myDoc = NULL; 8692 } 8693 8694 if (! ctxt->wellFormed) { 8695 ctxt->valid = 0; 8696 return(-1); 8697 } 8698 return(0); 8699} 8700 8701/** 8702 * xmlParseExtParsedEnt: 8703 * @ctxt: an XML parser context 8704 * 8705 * parse a general parsed entity 8706 * An external general parsed entity is well-formed if it matches the 8707 * production labeled extParsedEnt. 8708 * 8709 * [78] extParsedEnt ::= TextDecl? content 8710 * 8711 * Returns 0, -1 in case of error. the parser context is augmented 8712 * as a result of the parsing. 8713 */ 8714 8715int 8716xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 8717 xmlChar start[4]; 8718 xmlCharEncoding enc; 8719 8720 if ((ctxt == NULL) || (ctxt->input == NULL)) 8721 return(-1); 8722 8723 xmlDefaultSAXHandlerInit(); 8724 8725 xmlDetectSAX2(ctxt); 8726 8727 GROW; 8728 8729 /* 8730 * SAX: beginning of the document processing. 8731 */ 8732 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 8733 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 8734 8735 /* 8736 * Get the 4 first bytes and decode the charset 8737 * if enc != XML_CHAR_ENCODING_NONE 8738 * plug some encoding conversion routines. 8739 */ 8740 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 8741 start[0] = RAW; 8742 start[1] = NXT(1); 8743 start[2] = NXT(2); 8744 start[3] = NXT(3); 8745 enc = xmlDetectCharEncoding(start, 4); 8746 if (enc != XML_CHAR_ENCODING_NONE) { 8747 xmlSwitchEncoding(ctxt, enc); 8748 } 8749 } 8750 8751 8752 if (CUR == 0) { 8753 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 8754 } 8755 8756 /* 8757 * Check for the XMLDecl in the Prolog. 8758 */ 8759 GROW; 8760 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8761 8762 /* 8763 * Note that we will switch encoding on the fly. 8764 */ 8765 xmlParseXMLDecl(ctxt); 8766 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 8767 /* 8768 * The XML REC instructs us to stop parsing right here 8769 */ 8770 return(-1); 8771 } 8772 SKIP_BLANKS; 8773 } else { 8774 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 8775 } 8776 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 8777 ctxt->sax->startDocument(ctxt->userData); 8778 8779 /* 8780 * Doing validity checking on chunk doesn't make sense 8781 */ 8782 ctxt->instate = XML_PARSER_CONTENT; 8783 ctxt->validate = 0; 8784 ctxt->loadsubset = 0; 8785 ctxt->depth = 0; 8786 8787 xmlParseContent(ctxt); 8788 8789 if ((RAW == '<') && (NXT(1) == '/')) { 8790 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 8791 } else if (RAW != 0) { 8792 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 8793 } 8794 8795 /* 8796 * SAX: end of the document processing. 8797 */ 8798 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 8799 ctxt->sax->endDocument(ctxt->userData); 8800 8801 if (! ctxt->wellFormed) return(-1); 8802 return(0); 8803} 8804 8805#ifdef LIBXML_PUSH_ENABLED 8806/************************************************************************ 8807 * * 8808 * Progressive parsing interfaces * 8809 * * 8810 ************************************************************************/ 8811 8812/** 8813 * xmlParseLookupSequence: 8814 * @ctxt: an XML parser context 8815 * @first: the first char to lookup 8816 * @next: the next char to lookup or zero 8817 * @third: the next char to lookup or zero 8818 * 8819 * Try to find if a sequence (first, next, third) or just (first next) or 8820 * (first) is available in the input stream. 8821 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 8822 * to avoid rescanning sequences of bytes, it DOES change the state of the 8823 * parser, do not use liberally. 8824 * 8825 * Returns the index to the current parsing point if the full sequence 8826 * is available, -1 otherwise. 8827 */ 8828static int 8829xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 8830 xmlChar next, xmlChar third) { 8831 int base, len; 8832 xmlParserInputPtr in; 8833 const xmlChar *buf; 8834 8835 in = ctxt->input; 8836 if (in == NULL) return(-1); 8837 base = in->cur - in->base; 8838 if (base < 0) return(-1); 8839 if (ctxt->checkIndex > base) 8840 base = ctxt->checkIndex; 8841 if (in->buf == NULL) { 8842 buf = in->base; 8843 len = in->length; 8844 } else { 8845 buf = in->buf->buffer->content; 8846 len = in->buf->buffer->use; 8847 } 8848 /* take into account the sequence length */ 8849 if (third) len -= 2; 8850 else if (next) len --; 8851 for (;base < len;base++) { 8852 if (buf[base] == first) { 8853 if (third != 0) { 8854 if ((buf[base + 1] != next) || 8855 (buf[base + 2] != third)) continue; 8856 } else if (next != 0) { 8857 if (buf[base + 1] != next) continue; 8858 } 8859 ctxt->checkIndex = 0; 8860#ifdef DEBUG_PUSH 8861 if (next == 0) 8862 xmlGenericError(xmlGenericErrorContext, 8863 "PP: lookup '%c' found at %d\n", 8864 first, base); 8865 else if (third == 0) 8866 xmlGenericError(xmlGenericErrorContext, 8867 "PP: lookup '%c%c' found at %d\n", 8868 first, next, base); 8869 else 8870 xmlGenericError(xmlGenericErrorContext, 8871 "PP: lookup '%c%c%c' found at %d\n", 8872 first, next, third, base); 8873#endif 8874 return(base - (in->cur - in->base)); 8875 } 8876 } 8877 ctxt->checkIndex = base; 8878#ifdef DEBUG_PUSH 8879 if (next == 0) 8880 xmlGenericError(xmlGenericErrorContext, 8881 "PP: lookup '%c' failed\n", first); 8882 else if (third == 0) 8883 xmlGenericError(xmlGenericErrorContext, 8884 "PP: lookup '%c%c' failed\n", first, next); 8885 else 8886 xmlGenericError(xmlGenericErrorContext, 8887 "PP: lookup '%c%c%c' failed\n", first, next, third); 8888#endif 8889 return(-1); 8890} 8891 8892/** 8893 * xmlParseGetLasts: 8894 * @ctxt: an XML parser context 8895 * @lastlt: pointer to store the last '<' from the input 8896 * @lastgt: pointer to store the last '>' from the input 8897 * 8898 * Lookup the last < and > in the current chunk 8899 */ 8900static void 8901xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 8902 const xmlChar **lastgt) { 8903 const xmlChar *tmp; 8904 8905 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 8906 xmlGenericError(xmlGenericErrorContext, 8907 "Internal error: xmlParseGetLasts\n"); 8908 return; 8909 } 8910 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 8911 tmp = ctxt->input->end; 8912 tmp--; 8913 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 8914 if (tmp < ctxt->input->base) { 8915 *lastlt = NULL; 8916 *lastgt = NULL; 8917 } else { 8918 *lastlt = tmp; 8919 tmp++; 8920 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 8921 if (*tmp == '\'') { 8922 tmp++; 8923 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 8924 if (tmp < ctxt->input->end) tmp++; 8925 } else if (*tmp == '"') { 8926 tmp++; 8927 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 8928 if (tmp < ctxt->input->end) tmp++; 8929 } else 8930 tmp++; 8931 } 8932 if (tmp < ctxt->input->end) 8933 *lastgt = tmp; 8934 else { 8935 tmp = *lastlt; 8936 tmp--; 8937 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 8938 if (tmp >= ctxt->input->base) 8939 *lastgt = tmp; 8940 else 8941 *lastgt = NULL; 8942 } 8943 } 8944 } else { 8945 *lastlt = NULL; 8946 *lastgt = NULL; 8947 } 8948} 8949/** 8950 * xmlParseTryOrFinish: 8951 * @ctxt: an XML parser context 8952 * @terminate: last chunk indicator 8953 * 8954 * Try to progress on parsing 8955 * 8956 * Returns zero if no parsing was possible 8957 */ 8958static int 8959xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 8960 int ret = 0; 8961 int avail, tlen; 8962 xmlChar cur, next; 8963 const xmlChar *lastlt, *lastgt; 8964 8965 if (ctxt->input == NULL) 8966 return(0); 8967 8968#ifdef DEBUG_PUSH 8969 switch (ctxt->instate) { 8970 case XML_PARSER_EOF: 8971 xmlGenericError(xmlGenericErrorContext, 8972 "PP: try EOF\n"); break; 8973 case XML_PARSER_START: 8974 xmlGenericError(xmlGenericErrorContext, 8975 "PP: try START\n"); break; 8976 case XML_PARSER_MISC: 8977 xmlGenericError(xmlGenericErrorContext, 8978 "PP: try MISC\n");break; 8979 case XML_PARSER_COMMENT: 8980 xmlGenericError(xmlGenericErrorContext, 8981 "PP: try COMMENT\n");break; 8982 case XML_PARSER_PROLOG: 8983 xmlGenericError(xmlGenericErrorContext, 8984 "PP: try PROLOG\n");break; 8985 case XML_PARSER_START_TAG: 8986 xmlGenericError(xmlGenericErrorContext, 8987 "PP: try START_TAG\n");break; 8988 case XML_PARSER_CONTENT: 8989 xmlGenericError(xmlGenericErrorContext, 8990 "PP: try CONTENT\n");break; 8991 case XML_PARSER_CDATA_SECTION: 8992 xmlGenericError(xmlGenericErrorContext, 8993 "PP: try CDATA_SECTION\n");break; 8994 case XML_PARSER_END_TAG: 8995 xmlGenericError(xmlGenericErrorContext, 8996 "PP: try END_TAG\n");break; 8997 case XML_PARSER_ENTITY_DECL: 8998 xmlGenericError(xmlGenericErrorContext, 8999 "PP: try ENTITY_DECL\n");break; 9000 case XML_PARSER_ENTITY_VALUE: 9001 xmlGenericError(xmlGenericErrorContext, 9002 "PP: try ENTITY_VALUE\n");break; 9003 case XML_PARSER_ATTRIBUTE_VALUE: 9004 xmlGenericError(xmlGenericErrorContext, 9005 "PP: try ATTRIBUTE_VALUE\n");break; 9006 case XML_PARSER_DTD: 9007 xmlGenericError(xmlGenericErrorContext, 9008 "PP: try DTD\n");break; 9009 case XML_PARSER_EPILOG: 9010 xmlGenericError(xmlGenericErrorContext, 9011 "PP: try EPILOG\n");break; 9012 case XML_PARSER_PI: 9013 xmlGenericError(xmlGenericErrorContext, 9014 "PP: try PI\n");break; 9015 case XML_PARSER_IGNORE: 9016 xmlGenericError(xmlGenericErrorContext, 9017 "PP: try IGNORE\n");break; 9018 } 9019#endif 9020 9021 if ((ctxt->input != NULL) && 9022 (ctxt->input->cur - ctxt->input->base > 4096)) { 9023 xmlSHRINK(ctxt); 9024 ctxt->checkIndex = 0; 9025 } 9026 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9027 9028 while (1) { 9029 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9030 return(0); 9031 9032 9033 /* 9034 * Pop-up of finished entities. 9035 */ 9036 while ((RAW == 0) && (ctxt->inputNr > 1)) 9037 xmlPopInput(ctxt); 9038 9039 if (ctxt->input == NULL) break; 9040 if (ctxt->input->buf == NULL) 9041 avail = ctxt->input->length - 9042 (ctxt->input->cur - ctxt->input->base); 9043 else { 9044 /* 9045 * If we are operating on converted input, try to flush 9046 * remainng chars to avoid them stalling in the non-converted 9047 * buffer. 9048 */ 9049 if ((ctxt->input->buf->raw != NULL) && 9050 (ctxt->input->buf->raw->use > 0)) { 9051 int base = ctxt->input->base - 9052 ctxt->input->buf->buffer->content; 9053 int current = ctxt->input->cur - ctxt->input->base; 9054 9055 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 9056 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9057 ctxt->input->cur = ctxt->input->base + current; 9058 ctxt->input->end = 9059 &ctxt->input->buf->buffer->content[ 9060 ctxt->input->buf->buffer->use]; 9061 } 9062 avail = ctxt->input->buf->buffer->use - 9063 (ctxt->input->cur - ctxt->input->base); 9064 } 9065 if (avail < 1) 9066 goto done; 9067 switch (ctxt->instate) { 9068 case XML_PARSER_EOF: 9069 /* 9070 * Document parsing is done ! 9071 */ 9072 goto done; 9073 case XML_PARSER_START: 9074 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 9075 xmlChar start[4]; 9076 xmlCharEncoding enc; 9077 9078 /* 9079 * Very first chars read from the document flow. 9080 */ 9081 if (avail < 4) 9082 goto done; 9083 9084 /* 9085 * Get the 4 first bytes and decode the charset 9086 * if enc != XML_CHAR_ENCODING_NONE 9087 * plug some encoding conversion routines. 9088 */ 9089 start[0] = RAW; 9090 start[1] = NXT(1); 9091 start[2] = NXT(2); 9092 start[3] = NXT(3); 9093 enc = xmlDetectCharEncoding(start, 4); 9094 if (enc != XML_CHAR_ENCODING_NONE) { 9095 xmlSwitchEncoding(ctxt, enc); 9096 } 9097 break; 9098 } 9099 9100 if (avail < 2) 9101 goto done; 9102 cur = ctxt->input->cur[0]; 9103 next = ctxt->input->cur[1]; 9104 if (cur == 0) { 9105 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9106 ctxt->sax->setDocumentLocator(ctxt->userData, 9107 &xmlDefaultSAXLocator); 9108 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9109 ctxt->instate = XML_PARSER_EOF; 9110#ifdef DEBUG_PUSH 9111 xmlGenericError(xmlGenericErrorContext, 9112 "PP: entering EOF\n"); 9113#endif 9114 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9115 ctxt->sax->endDocument(ctxt->userData); 9116 goto done; 9117 } 9118 if ((cur == '<') && (next == '?')) { 9119 /* PI or XML decl */ 9120 if (avail < 5) return(ret); 9121 if ((!terminate) && 9122 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9123 return(ret); 9124 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9125 ctxt->sax->setDocumentLocator(ctxt->userData, 9126 &xmlDefaultSAXLocator); 9127 if ((ctxt->input->cur[2] == 'x') && 9128 (ctxt->input->cur[3] == 'm') && 9129 (ctxt->input->cur[4] == 'l') && 9130 (IS_BLANK_CH(ctxt->input->cur[5]))) { 9131 ret += 5; 9132#ifdef DEBUG_PUSH 9133 xmlGenericError(xmlGenericErrorContext, 9134 "PP: Parsing XML Decl\n"); 9135#endif 9136 xmlParseXMLDecl(ctxt); 9137 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9138 /* 9139 * The XML REC instructs us to stop parsing right 9140 * here 9141 */ 9142 ctxt->instate = XML_PARSER_EOF; 9143 return(0); 9144 } 9145 ctxt->standalone = ctxt->input->standalone; 9146 if ((ctxt->encoding == NULL) && 9147 (ctxt->input->encoding != NULL)) 9148 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 9149 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9150 (!ctxt->disableSAX)) 9151 ctxt->sax->startDocument(ctxt->userData); 9152 ctxt->instate = XML_PARSER_MISC; 9153#ifdef DEBUG_PUSH 9154 xmlGenericError(xmlGenericErrorContext, 9155 "PP: entering MISC\n"); 9156#endif 9157 } else { 9158 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9159 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9160 (!ctxt->disableSAX)) 9161 ctxt->sax->startDocument(ctxt->userData); 9162 ctxt->instate = XML_PARSER_MISC; 9163#ifdef DEBUG_PUSH 9164 xmlGenericError(xmlGenericErrorContext, 9165 "PP: entering MISC\n"); 9166#endif 9167 } 9168 } else { 9169 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9170 ctxt->sax->setDocumentLocator(ctxt->userData, 9171 &xmlDefaultSAXLocator); 9172 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9173 if (ctxt->version == NULL) { 9174 xmlErrMemory(ctxt, NULL); 9175 break; 9176 } 9177 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9178 (!ctxt->disableSAX)) 9179 ctxt->sax->startDocument(ctxt->userData); 9180 ctxt->instate = XML_PARSER_MISC; 9181#ifdef DEBUG_PUSH 9182 xmlGenericError(xmlGenericErrorContext, 9183 "PP: entering MISC\n"); 9184#endif 9185 } 9186 break; 9187 case XML_PARSER_START_TAG: { 9188 const xmlChar *name; 9189 const xmlChar *prefix; 9190 const xmlChar *URI; 9191 int nsNr = ctxt->nsNr; 9192 9193 if ((avail < 2) && (ctxt->inputNr == 1)) 9194 goto done; 9195 cur = ctxt->input->cur[0]; 9196 if (cur != '<') { 9197 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9198 ctxt->instate = XML_PARSER_EOF; 9199 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9200 ctxt->sax->endDocument(ctxt->userData); 9201 goto done; 9202 } 9203 if (!terminate) { 9204 if (ctxt->progressive) { 9205 /* > can be found unescaped in attribute values */ 9206 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 9207 goto done; 9208 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9209 goto done; 9210 } 9211 } 9212 if (ctxt->spaceNr == 0) 9213 spacePush(ctxt, -1); 9214 else 9215 spacePush(ctxt, *ctxt->space); 9216#ifdef LIBXML_SAX1_ENABLED 9217 if (ctxt->sax2) 9218#endif /* LIBXML_SAX1_ENABLED */ 9219 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9220#ifdef LIBXML_SAX1_ENABLED 9221 else 9222 name = xmlParseStartTag(ctxt); 9223#endif /* LIBXML_SAX1_ENABLED */ 9224 if (name == NULL) { 9225 spacePop(ctxt); 9226 ctxt->instate = XML_PARSER_EOF; 9227 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9228 ctxt->sax->endDocument(ctxt->userData); 9229 goto done; 9230 } 9231#ifdef LIBXML_VALID_ENABLED 9232 /* 9233 * [ VC: Root Element Type ] 9234 * The Name in the document type declaration must match 9235 * the element type of the root element. 9236 */ 9237 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9238 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9239 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9240#endif /* LIBXML_VALID_ENABLED */ 9241 9242 /* 9243 * Check for an Empty Element. 9244 */ 9245 if ((RAW == '/') && (NXT(1) == '>')) { 9246 SKIP(2); 9247 9248 if (ctxt->sax2) { 9249 if ((ctxt->sax != NULL) && 9250 (ctxt->sax->endElementNs != NULL) && 9251 (!ctxt->disableSAX)) 9252 ctxt->sax->endElementNs(ctxt->userData, name, 9253 prefix, URI); 9254#ifdef LIBXML_SAX1_ENABLED 9255 } else { 9256 if ((ctxt->sax != NULL) && 9257 (ctxt->sax->endElement != NULL) && 9258 (!ctxt->disableSAX)) 9259 ctxt->sax->endElement(ctxt->userData, name); 9260#endif /* LIBXML_SAX1_ENABLED */ 9261 } 9262 spacePop(ctxt); 9263 if (ctxt->nameNr == 0) { 9264 ctxt->instate = XML_PARSER_EPILOG; 9265 } else { 9266 ctxt->instate = XML_PARSER_CONTENT; 9267 } 9268 break; 9269 } 9270 if (RAW == '>') { 9271 NEXT; 9272 } else { 9273 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 9274 "Couldn't find end of Start Tag %s\n", 9275 name); 9276 nodePop(ctxt); 9277 spacePop(ctxt); 9278 } 9279 if (ctxt->sax2) 9280 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9281#ifdef LIBXML_SAX1_ENABLED 9282 else 9283 namePush(ctxt, name); 9284#endif /* LIBXML_SAX1_ENABLED */ 9285 9286 ctxt->instate = XML_PARSER_CONTENT; 9287 break; 9288 } 9289 case XML_PARSER_CONTENT: { 9290 const xmlChar *test; 9291 unsigned int cons; 9292 if ((avail < 2) && (ctxt->inputNr == 1)) 9293 goto done; 9294 cur = ctxt->input->cur[0]; 9295 next = ctxt->input->cur[1]; 9296 9297 test = CUR_PTR; 9298 cons = ctxt->input->consumed; 9299 if ((cur == '<') && (next == '/')) { 9300 ctxt->instate = XML_PARSER_END_TAG; 9301 break; 9302 } else if ((cur == '<') && (next == '?')) { 9303 if ((!terminate) && 9304 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9305 goto done; 9306 xmlParsePI(ctxt); 9307 } else if ((cur == '<') && (next != '!')) { 9308 ctxt->instate = XML_PARSER_START_TAG; 9309 break; 9310 } else if ((cur == '<') && (next == '!') && 9311 (ctxt->input->cur[2] == '-') && 9312 (ctxt->input->cur[3] == '-')) { 9313 if ((!terminate) && 9314 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9315 goto done; 9316 xmlParseComment(ctxt); 9317 ctxt->instate = XML_PARSER_CONTENT; 9318 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 9319 (ctxt->input->cur[2] == '[') && 9320 (ctxt->input->cur[3] == 'C') && 9321 (ctxt->input->cur[4] == 'D') && 9322 (ctxt->input->cur[5] == 'A') && 9323 (ctxt->input->cur[6] == 'T') && 9324 (ctxt->input->cur[7] == 'A') && 9325 (ctxt->input->cur[8] == '[')) { 9326 SKIP(9); 9327 ctxt->instate = XML_PARSER_CDATA_SECTION; 9328 break; 9329 } else if ((cur == '<') && (next == '!') && 9330 (avail < 9)) { 9331 goto done; 9332 } else if (cur == '&') { 9333 if ((!terminate) && 9334 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 9335 goto done; 9336 xmlParseReference(ctxt); 9337 } else { 9338 /* TODO Avoid the extra copy, handle directly !!! */ 9339 /* 9340 * Goal of the following test is: 9341 * - minimize calls to the SAX 'character' callback 9342 * when they are mergeable 9343 * - handle an problem for isBlank when we only parse 9344 * a sequence of blank chars and the next one is 9345 * not available to check against '<' presence. 9346 * - tries to homogenize the differences in SAX 9347 * callbacks between the push and pull versions 9348 * of the parser. 9349 */ 9350 if ((ctxt->inputNr == 1) && 9351 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 9352 if (!terminate) { 9353 if (ctxt->progressive) { 9354 if ((lastlt == NULL) || 9355 (ctxt->input->cur > lastlt)) 9356 goto done; 9357 } else if (xmlParseLookupSequence(ctxt, 9358 '<', 0, 0) < 0) { 9359 goto done; 9360 } 9361 } 9362 } 9363 ctxt->checkIndex = 0; 9364 xmlParseCharData(ctxt, 0); 9365 } 9366 /* 9367 * Pop-up of finished entities. 9368 */ 9369 while ((RAW == 0) && (ctxt->inputNr > 1)) 9370 xmlPopInput(ctxt); 9371 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9372 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9373 "detected an error in element content\n"); 9374 ctxt->instate = XML_PARSER_EOF; 9375 break; 9376 } 9377 break; 9378 } 9379 case XML_PARSER_END_TAG: 9380 if (avail < 2) 9381 goto done; 9382 if (!terminate) { 9383 if (ctxt->progressive) { 9384 /* > can be found unescaped in attribute values */ 9385 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 9386 goto done; 9387 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9388 goto done; 9389 } 9390 } 9391 if (ctxt->sax2) { 9392 xmlParseEndTag2(ctxt, 9393 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 9394 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 9395 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 9396 nameNsPop(ctxt); 9397 } 9398#ifdef LIBXML_SAX1_ENABLED 9399 else 9400 xmlParseEndTag1(ctxt, 0); 9401#endif /* LIBXML_SAX1_ENABLED */ 9402 if (ctxt->nameNr == 0) { 9403 ctxt->instate = XML_PARSER_EPILOG; 9404 } else { 9405 ctxt->instate = XML_PARSER_CONTENT; 9406 } 9407 break; 9408 case XML_PARSER_CDATA_SECTION: { 9409 /* 9410 * The Push mode need to have the SAX callback for 9411 * cdataBlock merge back contiguous callbacks. 9412 */ 9413 int base; 9414 9415 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 9416 if (base < 0) { 9417 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 9418 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9419 if (ctxt->sax->cdataBlock != NULL) 9420 ctxt->sax->cdataBlock(ctxt->userData, 9421 ctxt->input->cur, 9422 XML_PARSER_BIG_BUFFER_SIZE); 9423 else if (ctxt->sax->characters != NULL) 9424 ctxt->sax->characters(ctxt->userData, 9425 ctxt->input->cur, 9426 XML_PARSER_BIG_BUFFER_SIZE); 9427 } 9428 SKIPL(XML_PARSER_BIG_BUFFER_SIZE); 9429 ctxt->checkIndex = 0; 9430 } 9431 goto done; 9432 } else { 9433 if ((ctxt->sax != NULL) && (base > 0) && 9434 (!ctxt->disableSAX)) { 9435 if (ctxt->sax->cdataBlock != NULL) 9436 ctxt->sax->cdataBlock(ctxt->userData, 9437 ctxt->input->cur, base); 9438 else if (ctxt->sax->characters != NULL) 9439 ctxt->sax->characters(ctxt->userData, 9440 ctxt->input->cur, base); 9441 } 9442 SKIPL(base + 3); 9443 ctxt->checkIndex = 0; 9444 ctxt->instate = XML_PARSER_CONTENT; 9445#ifdef DEBUG_PUSH 9446 xmlGenericError(xmlGenericErrorContext, 9447 "PP: entering CONTENT\n"); 9448#endif 9449 } 9450 break; 9451 } 9452 case XML_PARSER_MISC: 9453 SKIP_BLANKS; 9454 if (ctxt->input->buf == NULL) 9455 avail = ctxt->input->length - 9456 (ctxt->input->cur - ctxt->input->base); 9457 else 9458 avail = ctxt->input->buf->buffer->use - 9459 (ctxt->input->cur - ctxt->input->base); 9460 if (avail < 2) 9461 goto done; 9462 cur = ctxt->input->cur[0]; 9463 next = ctxt->input->cur[1]; 9464 if ((cur == '<') && (next == '?')) { 9465 if ((!terminate) && 9466 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9467 goto done; 9468#ifdef DEBUG_PUSH 9469 xmlGenericError(xmlGenericErrorContext, 9470 "PP: Parsing PI\n"); 9471#endif 9472 xmlParsePI(ctxt); 9473 } else if ((cur == '<') && (next == '!') && 9474 (ctxt->input->cur[2] == '-') && 9475 (ctxt->input->cur[3] == '-')) { 9476 if ((!terminate) && 9477 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9478 goto done; 9479#ifdef DEBUG_PUSH 9480 xmlGenericError(xmlGenericErrorContext, 9481 "PP: Parsing Comment\n"); 9482#endif 9483 xmlParseComment(ctxt); 9484 ctxt->instate = XML_PARSER_MISC; 9485 } else if ((cur == '<') && (next == '!') && 9486 (ctxt->input->cur[2] == 'D') && 9487 (ctxt->input->cur[3] == 'O') && 9488 (ctxt->input->cur[4] == 'C') && 9489 (ctxt->input->cur[5] == 'T') && 9490 (ctxt->input->cur[6] == 'Y') && 9491 (ctxt->input->cur[7] == 'P') && 9492 (ctxt->input->cur[8] == 'E')) { 9493 if ((!terminate) && 9494 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 9495 goto done; 9496#ifdef DEBUG_PUSH 9497 xmlGenericError(xmlGenericErrorContext, 9498 "PP: Parsing internal subset\n"); 9499#endif 9500 ctxt->inSubset = 1; 9501 xmlParseDocTypeDecl(ctxt); 9502 if (RAW == '[') { 9503 ctxt->instate = XML_PARSER_DTD; 9504#ifdef DEBUG_PUSH 9505 xmlGenericError(xmlGenericErrorContext, 9506 "PP: entering DTD\n"); 9507#endif 9508 } else { 9509 /* 9510 * Create and update the external subset. 9511 */ 9512 ctxt->inSubset = 2; 9513 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9514 (ctxt->sax->externalSubset != NULL)) 9515 ctxt->sax->externalSubset(ctxt->userData, 9516 ctxt->intSubName, ctxt->extSubSystem, 9517 ctxt->extSubURI); 9518 ctxt->inSubset = 0; 9519 ctxt->instate = XML_PARSER_PROLOG; 9520#ifdef DEBUG_PUSH 9521 xmlGenericError(xmlGenericErrorContext, 9522 "PP: entering PROLOG\n"); 9523#endif 9524 } 9525 } else if ((cur == '<') && (next == '!') && 9526 (avail < 9)) { 9527 goto done; 9528 } else { 9529 ctxt->instate = XML_PARSER_START_TAG; 9530 ctxt->progressive = 1; 9531 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9532#ifdef DEBUG_PUSH 9533 xmlGenericError(xmlGenericErrorContext, 9534 "PP: entering START_TAG\n"); 9535#endif 9536 } 9537 break; 9538 case XML_PARSER_PROLOG: 9539 SKIP_BLANKS; 9540 if (ctxt->input->buf == NULL) 9541 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9542 else 9543 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9544 if (avail < 2) 9545 goto done; 9546 cur = ctxt->input->cur[0]; 9547 next = ctxt->input->cur[1]; 9548 if ((cur == '<') && (next == '?')) { 9549 if ((!terminate) && 9550 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9551 goto done; 9552#ifdef DEBUG_PUSH 9553 xmlGenericError(xmlGenericErrorContext, 9554 "PP: Parsing PI\n"); 9555#endif 9556 xmlParsePI(ctxt); 9557 } else if ((cur == '<') && (next == '!') && 9558 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9559 if ((!terminate) && 9560 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9561 goto done; 9562#ifdef DEBUG_PUSH 9563 xmlGenericError(xmlGenericErrorContext, 9564 "PP: Parsing Comment\n"); 9565#endif 9566 xmlParseComment(ctxt); 9567 ctxt->instate = XML_PARSER_PROLOG; 9568 } else if ((cur == '<') && (next == '!') && 9569 (avail < 4)) { 9570 goto done; 9571 } else { 9572 ctxt->instate = XML_PARSER_START_TAG; 9573 if (ctxt->progressive == 0) 9574 ctxt->progressive = 1; 9575 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9576#ifdef DEBUG_PUSH 9577 xmlGenericError(xmlGenericErrorContext, 9578 "PP: entering START_TAG\n"); 9579#endif 9580 } 9581 break; 9582 case XML_PARSER_EPILOG: 9583 SKIP_BLANKS; 9584 if (ctxt->input->buf == NULL) 9585 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 9586 else 9587 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 9588 if (avail < 2) 9589 goto done; 9590 cur = ctxt->input->cur[0]; 9591 next = ctxt->input->cur[1]; 9592 if ((cur == '<') && (next == '?')) { 9593 if ((!terminate) && 9594 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9595 goto done; 9596#ifdef DEBUG_PUSH 9597 xmlGenericError(xmlGenericErrorContext, 9598 "PP: Parsing PI\n"); 9599#endif 9600 xmlParsePI(ctxt); 9601 ctxt->instate = XML_PARSER_EPILOG; 9602 } else if ((cur == '<') && (next == '!') && 9603 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 9604 if ((!terminate) && 9605 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 9606 goto done; 9607#ifdef DEBUG_PUSH 9608 xmlGenericError(xmlGenericErrorContext, 9609 "PP: Parsing Comment\n"); 9610#endif 9611 xmlParseComment(ctxt); 9612 ctxt->instate = XML_PARSER_EPILOG; 9613 } else if ((cur == '<') && (next == '!') && 9614 (avail < 4)) { 9615 goto done; 9616 } else { 9617 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9618 ctxt->instate = XML_PARSER_EOF; 9619#ifdef DEBUG_PUSH 9620 xmlGenericError(xmlGenericErrorContext, 9621 "PP: entering EOF\n"); 9622#endif 9623 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9624 ctxt->sax->endDocument(ctxt->userData); 9625 goto done; 9626 } 9627 break; 9628 case XML_PARSER_DTD: { 9629 /* 9630 * Sorry but progressive parsing of the internal subset 9631 * is not expected to be supported. We first check that 9632 * the full content of the internal subset is available and 9633 * the parsing is launched only at that point. 9634 * Internal subset ends up with "']' S? '>'" in an unescaped 9635 * section and not in a ']]>' sequence which are conditional 9636 * sections (whoever argued to keep that crap in XML deserve 9637 * a place in hell !). 9638 */ 9639 int base, i; 9640 xmlChar *buf; 9641 xmlChar quote = 0; 9642 9643 base = ctxt->input->cur - ctxt->input->base; 9644 if (base < 0) return(0); 9645 if (ctxt->checkIndex > base) 9646 base = ctxt->checkIndex; 9647 buf = ctxt->input->buf->buffer->content; 9648 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 9649 base++) { 9650 if (quote != 0) { 9651 if (buf[base] == quote) 9652 quote = 0; 9653 continue; 9654 } 9655 if ((quote == 0) && (buf[base] == '<')) { 9656 int found = 0; 9657 /* special handling of comments */ 9658 if (((unsigned int) base + 4 < 9659 ctxt->input->buf->buffer->use) && 9660 (buf[base + 1] == '!') && 9661 (buf[base + 2] == '-') && 9662 (buf[base + 3] == '-')) { 9663 for (;(unsigned int) base + 3 < 9664 ctxt->input->buf->buffer->use; base++) { 9665 if ((buf[base] == '-') && 9666 (buf[base + 1] == '-') && 9667 (buf[base + 2] == '>')) { 9668 found = 1; 9669 base += 2; 9670 break; 9671 } 9672 } 9673 if (!found) 9674 break; 9675 continue; 9676 } 9677 } 9678 if (buf[base] == '"') { 9679 quote = '"'; 9680 continue; 9681 } 9682 if (buf[base] == '\'') { 9683 quote = '\''; 9684 continue; 9685 } 9686 if (buf[base] == ']') { 9687 if ((unsigned int) base +1 >= 9688 ctxt->input->buf->buffer->use) 9689 break; 9690 if (buf[base + 1] == ']') { 9691 /* conditional crap, skip both ']' ! */ 9692 base++; 9693 continue; 9694 } 9695 for (i = 0; 9696 (unsigned int) base + i < ctxt->input->buf->buffer->use; 9697 i++) { 9698 if (buf[base + i] == '>') 9699 goto found_end_int_subset; 9700 } 9701 break; 9702 } 9703 } 9704 /* 9705 * We didn't found the end of the Internal subset 9706 */ 9707 if (quote == 0) 9708 ctxt->checkIndex = base; 9709#ifdef DEBUG_PUSH 9710 if (next == 0) 9711 xmlGenericError(xmlGenericErrorContext, 9712 "PP: lookup of int subset end filed\n"); 9713#endif 9714 goto done; 9715 9716found_end_int_subset: 9717 xmlParseInternalSubset(ctxt); 9718 ctxt->inSubset = 2; 9719 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 9720 (ctxt->sax->externalSubset != NULL)) 9721 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9722 ctxt->extSubSystem, ctxt->extSubURI); 9723 ctxt->inSubset = 0; 9724 ctxt->instate = XML_PARSER_PROLOG; 9725 ctxt->checkIndex = 0; 9726#ifdef DEBUG_PUSH 9727 xmlGenericError(xmlGenericErrorContext, 9728 "PP: entering PROLOG\n"); 9729#endif 9730 break; 9731 } 9732 case XML_PARSER_COMMENT: 9733 xmlGenericError(xmlGenericErrorContext, 9734 "PP: internal error, state == COMMENT\n"); 9735 ctxt->instate = XML_PARSER_CONTENT; 9736#ifdef DEBUG_PUSH 9737 xmlGenericError(xmlGenericErrorContext, 9738 "PP: entering CONTENT\n"); 9739#endif 9740 break; 9741 case XML_PARSER_IGNORE: 9742 xmlGenericError(xmlGenericErrorContext, 9743 "PP: internal error, state == IGNORE"); 9744 ctxt->instate = XML_PARSER_DTD; 9745#ifdef DEBUG_PUSH 9746 xmlGenericError(xmlGenericErrorContext, 9747 "PP: entering DTD\n"); 9748#endif 9749 break; 9750 case XML_PARSER_PI: 9751 xmlGenericError(xmlGenericErrorContext, 9752 "PP: internal error, state == PI\n"); 9753 ctxt->instate = XML_PARSER_CONTENT; 9754#ifdef DEBUG_PUSH 9755 xmlGenericError(xmlGenericErrorContext, 9756 "PP: entering CONTENT\n"); 9757#endif 9758 break; 9759 case XML_PARSER_ENTITY_DECL: 9760 xmlGenericError(xmlGenericErrorContext, 9761 "PP: internal error, state == ENTITY_DECL\n"); 9762 ctxt->instate = XML_PARSER_DTD; 9763#ifdef DEBUG_PUSH 9764 xmlGenericError(xmlGenericErrorContext, 9765 "PP: entering DTD\n"); 9766#endif 9767 break; 9768 case XML_PARSER_ENTITY_VALUE: 9769 xmlGenericError(xmlGenericErrorContext, 9770 "PP: internal error, state == ENTITY_VALUE\n"); 9771 ctxt->instate = XML_PARSER_CONTENT; 9772#ifdef DEBUG_PUSH 9773 xmlGenericError(xmlGenericErrorContext, 9774 "PP: entering DTD\n"); 9775#endif 9776 break; 9777 case XML_PARSER_ATTRIBUTE_VALUE: 9778 xmlGenericError(xmlGenericErrorContext, 9779 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 9780 ctxt->instate = XML_PARSER_START_TAG; 9781#ifdef DEBUG_PUSH 9782 xmlGenericError(xmlGenericErrorContext, 9783 "PP: entering START_TAG\n"); 9784#endif 9785 break; 9786 case XML_PARSER_SYSTEM_LITERAL: 9787 xmlGenericError(xmlGenericErrorContext, 9788 "PP: internal error, state == SYSTEM_LITERAL\n"); 9789 ctxt->instate = XML_PARSER_START_TAG; 9790#ifdef DEBUG_PUSH 9791 xmlGenericError(xmlGenericErrorContext, 9792 "PP: entering START_TAG\n"); 9793#endif 9794 break; 9795 case XML_PARSER_PUBLIC_LITERAL: 9796 xmlGenericError(xmlGenericErrorContext, 9797 "PP: internal error, state == PUBLIC_LITERAL\n"); 9798 ctxt->instate = XML_PARSER_START_TAG; 9799#ifdef DEBUG_PUSH 9800 xmlGenericError(xmlGenericErrorContext, 9801 "PP: entering START_TAG\n"); 9802#endif 9803 break; 9804 } 9805 } 9806done: 9807#ifdef DEBUG_PUSH 9808 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 9809#endif 9810 return(ret); 9811} 9812 9813/** 9814 * xmlParseChunk: 9815 * @ctxt: an XML parser context 9816 * @chunk: an char array 9817 * @size: the size in byte of the chunk 9818 * @terminate: last chunk indicator 9819 * 9820 * Parse a Chunk of memory 9821 * 9822 * Returns zero if no error, the xmlParserErrors otherwise. 9823 */ 9824int 9825xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 9826 int terminate) { 9827 if (ctxt == NULL) 9828 return(XML_ERR_INTERNAL_ERROR); 9829 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9830 return(ctxt->errNo); 9831 if (ctxt->instate == XML_PARSER_START) 9832 xmlDetectSAX2(ctxt); 9833 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 9834 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 9835 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 9836 int cur = ctxt->input->cur - ctxt->input->base; 9837 int res; 9838 9839 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 9840 if (res < 0) { 9841 ctxt->errNo = XML_PARSER_EOF; 9842 ctxt->disableSAX = 1; 9843 return (XML_PARSER_EOF); 9844 } 9845 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9846 ctxt->input->cur = ctxt->input->base + cur; 9847 ctxt->input->end = 9848 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 9849#ifdef DEBUG_PUSH 9850 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 9851#endif 9852 9853 } else if (ctxt->instate != XML_PARSER_EOF) { 9854 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 9855 xmlParserInputBufferPtr in = ctxt->input->buf; 9856 if ((in->encoder != NULL) && (in->buffer != NULL) && 9857 (in->raw != NULL)) { 9858 int nbchars; 9859 9860 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 9861 if (nbchars < 0) { 9862 /* TODO 2.6.0 */ 9863 xmlGenericError(xmlGenericErrorContext, 9864 "xmlParseChunk: encoder error\n"); 9865 return(XML_ERR_INVALID_ENCODING); 9866 } 9867 } 9868 } 9869 } 9870 xmlParseTryOrFinish(ctxt, terminate); 9871 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9872 return(ctxt->errNo); 9873 if (terminate) { 9874 /* 9875 * Check for termination 9876 */ 9877 int avail = 0; 9878 9879 if (ctxt->input != NULL) { 9880 if (ctxt->input->buf == NULL) 9881 avail = ctxt->input->length - 9882 (ctxt->input->cur - ctxt->input->base); 9883 else 9884 avail = ctxt->input->buf->buffer->use - 9885 (ctxt->input->cur - ctxt->input->base); 9886 } 9887 9888 if ((ctxt->instate != XML_PARSER_EOF) && 9889 (ctxt->instate != XML_PARSER_EPILOG)) { 9890 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9891 } 9892 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 9893 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9894 } 9895 if (ctxt->instate != XML_PARSER_EOF) { 9896 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9897 ctxt->sax->endDocument(ctxt->userData); 9898 } 9899 ctxt->instate = XML_PARSER_EOF; 9900 } 9901 return((xmlParserErrors) ctxt->errNo); 9902} 9903 9904/************************************************************************ 9905 * * 9906 * I/O front end functions to the parser * 9907 * * 9908 ************************************************************************/ 9909 9910/** 9911 * xmlStopParser: 9912 * @ctxt: an XML parser context 9913 * 9914 * Blocks further parser processing 9915 */ 9916void 9917xmlStopParser(xmlParserCtxtPtr ctxt) { 9918 if (ctxt == NULL) 9919 return; 9920 ctxt->instate = XML_PARSER_EOF; 9921 ctxt->disableSAX = 1; 9922 if (ctxt->input != NULL) 9923 ctxt->input->cur = BAD_CAST""; 9924} 9925 9926/** 9927 * xmlCreatePushParserCtxt: 9928 * @sax: a SAX handler 9929 * @user_data: The user data returned on SAX callbacks 9930 * @chunk: a pointer to an array of chars 9931 * @size: number of chars in the array 9932 * @filename: an optional file name or URI 9933 * 9934 * Create a parser context for using the XML parser in push mode. 9935 * If @buffer and @size are non-NULL, the data is used to detect 9936 * the encoding. The remaining characters will be parsed so they 9937 * don't need to be fed in again through xmlParseChunk. 9938 * To allow content encoding detection, @size should be >= 4 9939 * The value of @filename is used for fetching external entities 9940 * and error/warning reports. 9941 * 9942 * Returns the new parser context or NULL 9943 */ 9944 9945xmlParserCtxtPtr 9946xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 9947 const char *chunk, int size, const char *filename) { 9948 xmlParserCtxtPtr ctxt; 9949 xmlParserInputPtr inputStream; 9950 xmlParserInputBufferPtr buf; 9951 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 9952 9953 /* 9954 * plug some encoding conversion routines 9955 */ 9956 if ((chunk != NULL) && (size >= 4)) 9957 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 9958 9959 buf = xmlAllocParserInputBuffer(enc); 9960 if (buf == NULL) return(NULL); 9961 9962 ctxt = xmlNewParserCtxt(); 9963 if (ctxt == NULL) { 9964 xmlErrMemory(NULL, "creating parser: out of memory\n"); 9965 xmlFreeParserInputBuffer(buf); 9966 return(NULL); 9967 } 9968 ctxt->dictNames = 1; 9969 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 9970 if (ctxt->pushTab == NULL) { 9971 xmlErrMemory(ctxt, NULL); 9972 xmlFreeParserInputBuffer(buf); 9973 xmlFreeParserCtxt(ctxt); 9974 return(NULL); 9975 } 9976 if (sax != NULL) { 9977#ifdef LIBXML_SAX1_ENABLED 9978 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 9979#endif /* LIBXML_SAX1_ENABLED */ 9980 xmlFree(ctxt->sax); 9981 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 9982 if (ctxt->sax == NULL) { 9983 xmlErrMemory(ctxt, NULL); 9984 xmlFreeParserInputBuffer(buf); 9985 xmlFreeParserCtxt(ctxt); 9986 return(NULL); 9987 } 9988 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 9989 if (sax->initialized == XML_SAX2_MAGIC) 9990 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 9991 else 9992 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 9993 if (user_data != NULL) 9994 ctxt->userData = user_data; 9995 } 9996 if (filename == NULL) { 9997 ctxt->directory = NULL; 9998 } else { 9999 ctxt->directory = xmlParserGetDirectory(filename); 10000 } 10001 10002 inputStream = xmlNewInputStream(ctxt); 10003 if (inputStream == NULL) { 10004 xmlFreeParserCtxt(ctxt); 10005 xmlFreeParserInputBuffer(buf); 10006 return(NULL); 10007 } 10008 10009 if (filename == NULL) 10010 inputStream->filename = NULL; 10011 else { 10012 inputStream->filename = (char *) 10013 xmlCanonicPath((const xmlChar *) filename); 10014 if (inputStream->filename == NULL) { 10015 xmlFreeParserCtxt(ctxt); 10016 xmlFreeParserInputBuffer(buf); 10017 return(NULL); 10018 } 10019 } 10020 inputStream->buf = buf; 10021 inputStream->base = inputStream->buf->buffer->content; 10022 inputStream->cur = inputStream->buf->buffer->content; 10023 inputStream->end = 10024 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 10025 10026 inputPush(ctxt, inputStream); 10027 10028 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10029 (ctxt->input->buf != NULL)) { 10030 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10031 int cur = ctxt->input->cur - ctxt->input->base; 10032 10033 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10034 10035 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10036 ctxt->input->cur = ctxt->input->base + cur; 10037 ctxt->input->end = 10038 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10039#ifdef DEBUG_PUSH 10040 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10041#endif 10042 } 10043 10044 if (enc != XML_CHAR_ENCODING_NONE) { 10045 xmlSwitchEncoding(ctxt, enc); 10046 } 10047 10048 return(ctxt); 10049} 10050#endif /* LIBXML_PUSH_ENABLED */ 10051 10052/** 10053 * xmlCreateIOParserCtxt: 10054 * @sax: a SAX handler 10055 * @user_data: The user data returned on SAX callbacks 10056 * @ioread: an I/O read function 10057 * @ioclose: an I/O close function 10058 * @ioctx: an I/O handler 10059 * @enc: the charset encoding if known 10060 * 10061 * Create a parser context for using the XML parser with an existing 10062 * I/O stream 10063 * 10064 * Returns the new parser context or NULL 10065 */ 10066xmlParserCtxtPtr 10067xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10068 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 10069 void *ioctx, xmlCharEncoding enc) { 10070 xmlParserCtxtPtr ctxt; 10071 xmlParserInputPtr inputStream; 10072 xmlParserInputBufferPtr buf; 10073 10074 if (ioread == NULL) return(NULL); 10075 10076 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 10077 if (buf == NULL) return(NULL); 10078 10079 ctxt = xmlNewParserCtxt(); 10080 if (ctxt == NULL) { 10081 xmlFree(buf); 10082 return(NULL); 10083 } 10084 if (sax != NULL) { 10085#ifdef LIBXML_SAX1_ENABLED 10086 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 10087#endif /* LIBXML_SAX1_ENABLED */ 10088 xmlFree(ctxt->sax); 10089 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10090 if (ctxt->sax == NULL) { 10091 xmlErrMemory(ctxt, NULL); 10092 xmlFree(ctxt); 10093 return(NULL); 10094 } 10095 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 10096 if (sax->initialized == XML_SAX2_MAGIC) 10097 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10098 else 10099 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 10100 if (user_data != NULL) 10101 ctxt->userData = user_data; 10102 } 10103 10104 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 10105 if (inputStream == NULL) { 10106 xmlFreeParserCtxt(ctxt); 10107 return(NULL); 10108 } 10109 inputPush(ctxt, inputStream); 10110 10111 return(ctxt); 10112} 10113 10114#ifdef LIBXML_VALID_ENABLED 10115/************************************************************************ 10116 * * 10117 * Front ends when parsing a DTD * 10118 * * 10119 ************************************************************************/ 10120 10121/** 10122 * xmlIOParseDTD: 10123 * @sax: the SAX handler block or NULL 10124 * @input: an Input Buffer 10125 * @enc: the charset encoding if known 10126 * 10127 * Load and parse a DTD 10128 * 10129 * Returns the resulting xmlDtdPtr or NULL in case of error. 10130 * @input will be freed at parsing end. 10131 */ 10132 10133xmlDtdPtr 10134xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 10135 xmlCharEncoding enc) { 10136 xmlDtdPtr ret = NULL; 10137 xmlParserCtxtPtr ctxt; 10138 xmlParserInputPtr pinput = NULL; 10139 xmlChar start[4]; 10140 10141 if (input == NULL) 10142 return(NULL); 10143 10144 ctxt = xmlNewParserCtxt(); 10145 if (ctxt == NULL) { 10146 return(NULL); 10147 } 10148 10149 /* 10150 * Set-up the SAX context 10151 */ 10152 if (sax != NULL) { 10153 if (ctxt->sax != NULL) 10154 xmlFree(ctxt->sax); 10155 ctxt->sax = sax; 10156 ctxt->userData = ctxt; 10157 } 10158 xmlDetectSAX2(ctxt); 10159 10160 /* 10161 * generate a parser input from the I/O handler 10162 */ 10163 10164 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 10165 if (pinput == NULL) { 10166 if (sax != NULL) ctxt->sax = NULL; 10167 xmlFreeParserCtxt(ctxt); 10168 return(NULL); 10169 } 10170 10171 /* 10172 * plug some encoding conversion routines here. 10173 */ 10174 xmlPushInput(ctxt, pinput); 10175 if (enc != XML_CHAR_ENCODING_NONE) { 10176 xmlSwitchEncoding(ctxt, enc); 10177 } 10178 10179 pinput->filename = NULL; 10180 pinput->line = 1; 10181 pinput->col = 1; 10182 pinput->base = ctxt->input->cur; 10183 pinput->cur = ctxt->input->cur; 10184 pinput->free = NULL; 10185 10186 /* 10187 * let's parse that entity knowing it's an external subset. 10188 */ 10189 ctxt->inSubset = 2; 10190 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10191 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10192 BAD_CAST "none", BAD_CAST "none"); 10193 10194 if ((enc == XML_CHAR_ENCODING_NONE) && 10195 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10196 /* 10197 * Get the 4 first bytes and decode the charset 10198 * if enc != XML_CHAR_ENCODING_NONE 10199 * plug some encoding conversion routines. 10200 */ 10201 start[0] = RAW; 10202 start[1] = NXT(1); 10203 start[2] = NXT(2); 10204 start[3] = NXT(3); 10205 enc = xmlDetectCharEncoding(start, 4); 10206 if (enc != XML_CHAR_ENCODING_NONE) { 10207 xmlSwitchEncoding(ctxt, enc); 10208 } 10209 } 10210 10211 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 10212 10213 if (ctxt->myDoc != NULL) { 10214 if (ctxt->wellFormed) { 10215 ret = ctxt->myDoc->extSubset; 10216 ctxt->myDoc->extSubset = NULL; 10217 if (ret != NULL) { 10218 xmlNodePtr tmp; 10219 10220 ret->doc = NULL; 10221 tmp = ret->children; 10222 while (tmp != NULL) { 10223 tmp->doc = NULL; 10224 tmp = tmp->next; 10225 } 10226 } 10227 } else { 10228 ret = NULL; 10229 } 10230 xmlFreeDoc(ctxt->myDoc); 10231 ctxt->myDoc = NULL; 10232 } 10233 if (sax != NULL) ctxt->sax = NULL; 10234 xmlFreeParserCtxt(ctxt); 10235 10236 return(ret); 10237} 10238 10239/** 10240 * xmlSAXParseDTD: 10241 * @sax: the SAX handler block 10242 * @ExternalID: a NAME* containing the External ID of the DTD 10243 * @SystemID: a NAME* containing the URL to the DTD 10244 * 10245 * Load and parse an external subset. 10246 * 10247 * Returns the resulting xmlDtdPtr or NULL in case of error. 10248 */ 10249 10250xmlDtdPtr 10251xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 10252 const xmlChar *SystemID) { 10253 xmlDtdPtr ret = NULL; 10254 xmlParserCtxtPtr ctxt; 10255 xmlParserInputPtr input = NULL; 10256 xmlCharEncoding enc; 10257 xmlChar* systemIdCanonic; 10258 10259 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 10260 10261 ctxt = xmlNewParserCtxt(); 10262 if (ctxt == NULL) { 10263 return(NULL); 10264 } 10265 10266 /* 10267 * Set-up the SAX context 10268 */ 10269 if (sax != NULL) { 10270 if (ctxt->sax != NULL) 10271 xmlFree(ctxt->sax); 10272 ctxt->sax = sax; 10273 ctxt->userData = ctxt; 10274 } 10275 10276 /* 10277 * Canonicalise the system ID 10278 */ 10279 systemIdCanonic = xmlCanonicPath(SystemID); 10280 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 10281 xmlFreeParserCtxt(ctxt); 10282 return(NULL); 10283 } 10284 10285 /* 10286 * Ask the Entity resolver to load the damn thing 10287 */ 10288 10289 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 10290 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic); 10291 if (input == NULL) { 10292 if (sax != NULL) ctxt->sax = NULL; 10293 xmlFreeParserCtxt(ctxt); 10294 if (systemIdCanonic != NULL) 10295 xmlFree(systemIdCanonic); 10296 return(NULL); 10297 } 10298 10299 /* 10300 * plug some encoding conversion routines here. 10301 */ 10302 xmlPushInput(ctxt, input); 10303 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10304 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 10305 xmlSwitchEncoding(ctxt, enc); 10306 } 10307 10308 if (input->filename == NULL) 10309 input->filename = (char *) systemIdCanonic; 10310 else 10311 xmlFree(systemIdCanonic); 10312 input->line = 1; 10313 input->col = 1; 10314 input->base = ctxt->input->cur; 10315 input->cur = ctxt->input->cur; 10316 input->free = NULL; 10317 10318 /* 10319 * let's parse that entity knowing it's an external subset. 10320 */ 10321 ctxt->inSubset = 2; 10322 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10323 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10324 ExternalID, SystemID); 10325 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 10326 10327 if (ctxt->myDoc != NULL) { 10328 if (ctxt->wellFormed) { 10329 ret = ctxt->myDoc->extSubset; 10330 ctxt->myDoc->extSubset = NULL; 10331 if (ret != NULL) { 10332 xmlNodePtr tmp; 10333 10334 ret->doc = NULL; 10335 tmp = ret->children; 10336 while (tmp != NULL) { 10337 tmp->doc = NULL; 10338 tmp = tmp->next; 10339 } 10340 } 10341 } else { 10342 ret = NULL; 10343 } 10344 xmlFreeDoc(ctxt->myDoc); 10345 ctxt->myDoc = NULL; 10346 } 10347 if (sax != NULL) ctxt->sax = NULL; 10348 xmlFreeParserCtxt(ctxt); 10349 10350 return(ret); 10351} 10352 10353 10354/** 10355 * xmlParseDTD: 10356 * @ExternalID: a NAME* containing the External ID of the DTD 10357 * @SystemID: a NAME* containing the URL to the DTD 10358 * 10359 * Load and parse an external subset. 10360 * 10361 * Returns the resulting xmlDtdPtr or NULL in case of error. 10362 */ 10363 10364xmlDtdPtr 10365xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 10366 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 10367} 10368#endif /* LIBXML_VALID_ENABLED */ 10369 10370/************************************************************************ 10371 * * 10372 * Front ends when parsing an Entity * 10373 * * 10374 ************************************************************************/ 10375 10376/** 10377 * xmlParseCtxtExternalEntity: 10378 * @ctx: the existing parsing context 10379 * @URL: the URL for the entity to load 10380 * @ID: the System ID for the entity to load 10381 * @lst: the return value for the set of parsed nodes 10382 * 10383 * Parse an external general entity within an existing parsing context 10384 * An external general parsed entity is well-formed if it matches the 10385 * production labeled extParsedEnt. 10386 * 10387 * [78] extParsedEnt ::= TextDecl? content 10388 * 10389 * Returns 0 if the entity is well formed, -1 in case of args problem and 10390 * the parser error code otherwise 10391 */ 10392 10393int 10394xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 10395 const xmlChar *ID, xmlNodePtr *lst) { 10396 xmlParserCtxtPtr ctxt; 10397 xmlDocPtr newDoc; 10398 xmlNodePtr newRoot; 10399 xmlSAXHandlerPtr oldsax = NULL; 10400 int ret = 0; 10401 xmlChar start[4]; 10402 xmlCharEncoding enc; 10403 10404 if (ctx == NULL) return(-1); 10405 10406 if (ctx->depth > 40) { 10407 return(XML_ERR_ENTITY_LOOP); 10408 } 10409 10410 if (lst != NULL) 10411 *lst = NULL; 10412 if ((URL == NULL) && (ID == NULL)) 10413 return(-1); 10414 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 10415 return(-1); 10416 10417 10418 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10419 if (ctxt == NULL) return(-1); 10420 ctxt->userData = ctxt; 10421 ctxt->_private = ctx->_private; 10422 oldsax = ctxt->sax; 10423 ctxt->sax = ctx->sax; 10424 xmlDetectSAX2(ctxt); 10425 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10426 if (newDoc == NULL) { 10427 xmlFreeParserCtxt(ctxt); 10428 return(-1); 10429 } 10430 if (ctx->myDoc->dict) { 10431 newDoc->dict = ctx->myDoc->dict; 10432 xmlDictReference(newDoc->dict); 10433 } 10434 if (ctx->myDoc != NULL) { 10435 newDoc->intSubset = ctx->myDoc->intSubset; 10436 newDoc->extSubset = ctx->myDoc->extSubset; 10437 } 10438 if (ctx->myDoc->URL != NULL) { 10439 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 10440 } 10441 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10442 if (newRoot == NULL) { 10443 ctxt->sax = oldsax; 10444 xmlFreeParserCtxt(ctxt); 10445 newDoc->intSubset = NULL; 10446 newDoc->extSubset = NULL; 10447 xmlFreeDoc(newDoc); 10448 return(-1); 10449 } 10450 xmlAddChild((xmlNodePtr) newDoc, newRoot); 10451 nodePush(ctxt, newDoc->children); 10452 if (ctx->myDoc == NULL) { 10453 ctxt->myDoc = newDoc; 10454 } else { 10455 ctxt->myDoc = ctx->myDoc; 10456 newDoc->children->doc = ctx->myDoc; 10457 } 10458 10459 /* 10460 * Get the 4 first bytes and decode the charset 10461 * if enc != XML_CHAR_ENCODING_NONE 10462 * plug some encoding conversion routines. 10463 */ 10464 GROW 10465 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10466 start[0] = RAW; 10467 start[1] = NXT(1); 10468 start[2] = NXT(2); 10469 start[3] = NXT(3); 10470 enc = xmlDetectCharEncoding(start, 4); 10471 if (enc != XML_CHAR_ENCODING_NONE) { 10472 xmlSwitchEncoding(ctxt, enc); 10473 } 10474 } 10475 10476 /* 10477 * Parse a possible text declaration first 10478 */ 10479 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10480 xmlParseTextDecl(ctxt); 10481 } 10482 10483 /* 10484 * Doing validity checking on chunk doesn't make sense 10485 */ 10486 ctxt->instate = XML_PARSER_CONTENT; 10487 ctxt->validate = ctx->validate; 10488 ctxt->valid = ctx->valid; 10489 ctxt->loadsubset = ctx->loadsubset; 10490 ctxt->depth = ctx->depth + 1; 10491 ctxt->replaceEntities = ctx->replaceEntities; 10492 if (ctxt->validate) { 10493 ctxt->vctxt.error = ctx->vctxt.error; 10494 ctxt->vctxt.warning = ctx->vctxt.warning; 10495 } else { 10496 ctxt->vctxt.error = NULL; 10497 ctxt->vctxt.warning = NULL; 10498 } 10499 ctxt->vctxt.nodeTab = NULL; 10500 ctxt->vctxt.nodeNr = 0; 10501 ctxt->vctxt.nodeMax = 0; 10502 ctxt->vctxt.node = NULL; 10503 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10504 ctxt->dict = ctx->dict; 10505 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 10506 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 10507 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 10508 ctxt->dictNames = ctx->dictNames; 10509 ctxt->attsDefault = ctx->attsDefault; 10510 ctxt->attsSpecial = ctx->attsSpecial; 10511 ctxt->linenumbers = ctx->linenumbers; 10512 10513 xmlParseContent(ctxt); 10514 10515 ctx->validate = ctxt->validate; 10516 ctx->valid = ctxt->valid; 10517 if ((RAW == '<') && (NXT(1) == '/')) { 10518 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10519 } else if (RAW != 0) { 10520 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10521 } 10522 if (ctxt->node != newDoc->children) { 10523 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10524 } 10525 10526 if (!ctxt->wellFormed) { 10527 if (ctxt->errNo == 0) 10528 ret = 1; 10529 else 10530 ret = ctxt->errNo; 10531 } else { 10532 if (lst != NULL) { 10533 xmlNodePtr cur; 10534 10535 /* 10536 * Return the newly created nodeset after unlinking it from 10537 * they pseudo parent. 10538 */ 10539 cur = newDoc->children->children; 10540 *lst = cur; 10541 while (cur != NULL) { 10542 cur->parent = NULL; 10543 cur = cur->next; 10544 } 10545 newDoc->children->children = NULL; 10546 } 10547 ret = 0; 10548 } 10549 ctxt->sax = oldsax; 10550 ctxt->dict = NULL; 10551 ctxt->attsDefault = NULL; 10552 ctxt->attsSpecial = NULL; 10553 xmlFreeParserCtxt(ctxt); 10554 newDoc->intSubset = NULL; 10555 newDoc->extSubset = NULL; 10556 xmlFreeDoc(newDoc); 10557 10558 return(ret); 10559} 10560 10561/** 10562 * xmlParseExternalEntityPrivate: 10563 * @doc: the document the chunk pertains to 10564 * @oldctxt: the previous parser context if available 10565 * @sax: the SAX handler bloc (possibly NULL) 10566 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10567 * @depth: Used for loop detection, use 0 10568 * @URL: the URL for the entity to load 10569 * @ID: the System ID for the entity to load 10570 * @list: the return value for the set of parsed nodes 10571 * 10572 * Private version of xmlParseExternalEntity() 10573 * 10574 * Returns 0 if the entity is well formed, -1 in case of args problem and 10575 * the parser error code otherwise 10576 */ 10577 10578static xmlParserErrors 10579xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 10580 xmlSAXHandlerPtr sax, 10581 void *user_data, int depth, const xmlChar *URL, 10582 const xmlChar *ID, xmlNodePtr *list) { 10583 xmlParserCtxtPtr ctxt; 10584 xmlDocPtr newDoc; 10585 xmlNodePtr newRoot; 10586 xmlSAXHandlerPtr oldsax = NULL; 10587 xmlParserErrors ret = XML_ERR_OK; 10588 xmlChar start[4]; 10589 xmlCharEncoding enc; 10590 10591 if (depth > 40) { 10592 return(XML_ERR_ENTITY_LOOP); 10593 } 10594 10595 10596 10597 if (list != NULL) 10598 *list = NULL; 10599 if ((URL == NULL) && (ID == NULL)) 10600 return(XML_ERR_INTERNAL_ERROR); 10601 if (doc == NULL) /* @@ relax but check for dereferences */ 10602 return(XML_ERR_INTERNAL_ERROR); 10603 10604 10605 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 10606 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10607 ctxt->userData = ctxt; 10608 if (oldctxt != NULL) { 10609 ctxt->_private = oldctxt->_private; 10610 ctxt->loadsubset = oldctxt->loadsubset; 10611 ctxt->validate = oldctxt->validate; 10612 ctxt->external = oldctxt->external; 10613 ctxt->record_info = oldctxt->record_info; 10614 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 10615 ctxt->node_seq.length = oldctxt->node_seq.length; 10616 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 10617 } else { 10618 /* 10619 * Doing validity checking on chunk without context 10620 * doesn't make sense 10621 */ 10622 ctxt->_private = NULL; 10623 ctxt->validate = 0; 10624 ctxt->external = 2; 10625 ctxt->loadsubset = 0; 10626 } 10627 if (sax != NULL) { 10628 oldsax = ctxt->sax; 10629 ctxt->sax = sax; 10630 if (user_data != NULL) 10631 ctxt->userData = user_data; 10632 } 10633 xmlDetectSAX2(ctxt); 10634 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10635 if (newDoc == NULL) { 10636 ctxt->node_seq.maximum = 0; 10637 ctxt->node_seq.length = 0; 10638 ctxt->node_seq.buffer = NULL; 10639 xmlFreeParserCtxt(ctxt); 10640 return(XML_ERR_INTERNAL_ERROR); 10641 } 10642 if (doc != NULL) { 10643 newDoc->intSubset = doc->intSubset; 10644 newDoc->extSubset = doc->extSubset; 10645 newDoc->dict = doc->dict; 10646 } else if (oldctxt != NULL) { 10647 newDoc->dict = oldctxt->dict; 10648 } 10649 xmlDictReference(newDoc->dict); 10650 10651 if (doc->URL != NULL) { 10652 newDoc->URL = xmlStrdup(doc->URL); 10653 } 10654 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 10655 if (newRoot == NULL) { 10656 if (sax != NULL) 10657 ctxt->sax = oldsax; 10658 ctxt->node_seq.maximum = 0; 10659 ctxt->node_seq.length = 0; 10660 ctxt->node_seq.buffer = NULL; 10661 xmlFreeParserCtxt(ctxt); 10662 newDoc->intSubset = NULL; 10663 newDoc->extSubset = NULL; 10664 xmlFreeDoc(newDoc); 10665 return(XML_ERR_INTERNAL_ERROR); 10666 } 10667 xmlAddChild((xmlNodePtr) newDoc, newRoot); 10668 nodePush(ctxt, newDoc->children); 10669 if (doc == NULL) { 10670 ctxt->myDoc = newDoc; 10671 } else { 10672 ctxt->myDoc = doc; 10673 newRoot->doc = doc; 10674 } 10675 10676 /* 10677 * Get the 4 first bytes and decode the charset 10678 * if enc != XML_CHAR_ENCODING_NONE 10679 * plug some encoding conversion routines. 10680 */ 10681 GROW; 10682 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10683 start[0] = RAW; 10684 start[1] = NXT(1); 10685 start[2] = NXT(2); 10686 start[3] = NXT(3); 10687 enc = xmlDetectCharEncoding(start, 4); 10688 if (enc != XML_CHAR_ENCODING_NONE) { 10689 xmlSwitchEncoding(ctxt, enc); 10690 } 10691 } 10692 10693 /* 10694 * Parse a possible text declaration first 10695 */ 10696 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10697 xmlParseTextDecl(ctxt); 10698 } 10699 10700 ctxt->instate = XML_PARSER_CONTENT; 10701 ctxt->depth = depth; 10702 10703 xmlParseContent(ctxt); 10704 10705 if ((RAW == '<') && (NXT(1) == '/')) { 10706 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10707 } else if (RAW != 0) { 10708 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10709 } 10710 if (ctxt->node != newDoc->children) { 10711 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10712 } 10713 10714 if (!ctxt->wellFormed) { 10715 if (ctxt->errNo == 0) 10716 ret = XML_ERR_INTERNAL_ERROR; 10717 else 10718 ret = (xmlParserErrors)ctxt->errNo; 10719 } else { 10720 if (list != NULL) { 10721 xmlNodePtr cur; 10722 10723 /* 10724 * Return the newly created nodeset after unlinking it from 10725 * they pseudo parent. 10726 */ 10727 cur = newDoc->children->children; 10728 *list = cur; 10729 while (cur != NULL) { 10730 cur->parent = NULL; 10731 cur = cur->next; 10732 } 10733 newDoc->children->children = NULL; 10734 } 10735 ret = XML_ERR_OK; 10736 } 10737 if (sax != NULL) 10738 ctxt->sax = oldsax; 10739 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 10740 oldctxt->node_seq.length = ctxt->node_seq.length; 10741 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 10742 ctxt->node_seq.maximum = 0; 10743 ctxt->node_seq.length = 0; 10744 ctxt->node_seq.buffer = NULL; 10745 xmlFreeParserCtxt(ctxt); 10746 newDoc->intSubset = NULL; 10747 newDoc->extSubset = NULL; 10748 xmlFreeDoc(newDoc); 10749 10750 return(ret); 10751} 10752 10753#ifdef LIBXML_SAX1_ENABLED 10754/** 10755 * xmlParseExternalEntity: 10756 * @doc: the document the chunk pertains to 10757 * @sax: the SAX handler bloc (possibly NULL) 10758 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10759 * @depth: Used for loop detection, use 0 10760 * @URL: the URL for the entity to load 10761 * @ID: the System ID for the entity to load 10762 * @lst: the return value for the set of parsed nodes 10763 * 10764 * Parse an external general entity 10765 * An external general parsed entity is well-formed if it matches the 10766 * production labeled extParsedEnt. 10767 * 10768 * [78] extParsedEnt ::= TextDecl? content 10769 * 10770 * Returns 0 if the entity is well formed, -1 in case of args problem and 10771 * the parser error code otherwise 10772 */ 10773 10774int 10775xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 10776 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 10777 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 10778 ID, lst)); 10779} 10780 10781/** 10782 * xmlParseBalancedChunkMemory: 10783 * @doc: the document the chunk pertains to 10784 * @sax: the SAX handler bloc (possibly NULL) 10785 * @user_data: The user data returned on SAX callbacks (possibly NULL) 10786 * @depth: Used for loop detection, use 0 10787 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10788 * @lst: the return value for the set of parsed nodes 10789 * 10790 * Parse a well-balanced chunk of an XML document 10791 * called by the parser 10792 * The allowed sequence for the Well Balanced Chunk is the one defined by 10793 * the content production in the XML grammar: 10794 * 10795 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10796 * 10797 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 10798 * the parser error code otherwise 10799 */ 10800 10801int 10802xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 10803 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 10804 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 10805 depth, string, lst, 0 ); 10806} 10807#endif /* LIBXML_SAX1_ENABLED */ 10808 10809/** 10810 * xmlParseBalancedChunkMemoryInternal: 10811 * @oldctxt: the existing parsing context 10812 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 10813 * @user_data: the user data field for the parser context 10814 * @lst: the return value for the set of parsed nodes 10815 * 10816 * 10817 * Parse a well-balanced chunk of an XML document 10818 * called by the parser 10819 * The allowed sequence for the Well Balanced Chunk is the one defined by 10820 * the content production in the XML grammar: 10821 * 10822 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10823 * 10824 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 10825 * error code otherwise 10826 * 10827 * In case recover is set to 1, the nodelist will not be empty even if 10828 * the parsed chunk is not well balanced. 10829 */ 10830static xmlParserErrors 10831xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 10832 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 10833 xmlParserCtxtPtr ctxt; 10834 xmlDocPtr newDoc = NULL; 10835 xmlNodePtr newRoot; 10836 xmlSAXHandlerPtr oldsax = NULL; 10837 xmlNodePtr content = NULL; 10838 xmlNodePtr last = NULL; 10839 int size; 10840 xmlParserErrors ret = XML_ERR_OK; 10841 10842 if (oldctxt->depth > 40) { 10843 return(XML_ERR_ENTITY_LOOP); 10844 } 10845 10846 10847 if (lst != NULL) 10848 *lst = NULL; 10849 if (string == NULL) 10850 return(XML_ERR_INTERNAL_ERROR); 10851 10852 size = xmlStrlen(string); 10853 10854 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 10855 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 10856 if (user_data != NULL) 10857 ctxt->userData = user_data; 10858 else 10859 ctxt->userData = ctxt; 10860 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 10861 ctxt->dict = oldctxt->dict; 10862 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 10863 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 10864 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 10865 10866 oldsax = ctxt->sax; 10867 ctxt->sax = oldctxt->sax; 10868 xmlDetectSAX2(ctxt); 10869 ctxt->replaceEntities = oldctxt->replaceEntities; 10870 ctxt->options = oldctxt->options; 10871 10872 ctxt->_private = oldctxt->_private; 10873 if (oldctxt->myDoc == NULL) { 10874 newDoc = xmlNewDoc(BAD_CAST "1.0"); 10875 if (newDoc == NULL) { 10876 ctxt->sax = oldsax; 10877 ctxt->dict = NULL; 10878 xmlFreeParserCtxt(ctxt); 10879 return(XML_ERR_INTERNAL_ERROR); 10880 } 10881 newDoc->dict = ctxt->dict; 10882 xmlDictReference(newDoc->dict); 10883 ctxt->myDoc = newDoc; 10884 } else { 10885 ctxt->myDoc = oldctxt->myDoc; 10886 content = ctxt->myDoc->children; 10887 last = ctxt->myDoc->last; 10888 } 10889 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 10890 if (newRoot == NULL) { 10891 ctxt->sax = oldsax; 10892 ctxt->dict = NULL; 10893 xmlFreeParserCtxt(ctxt); 10894 if (newDoc != NULL) { 10895 xmlFreeDoc(newDoc); 10896 } 10897 return(XML_ERR_INTERNAL_ERROR); 10898 } 10899 ctxt->myDoc->children = NULL; 10900 ctxt->myDoc->last = NULL; 10901 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 10902 nodePush(ctxt, ctxt->myDoc->children); 10903 ctxt->instate = XML_PARSER_CONTENT; 10904 ctxt->depth = oldctxt->depth + 1; 10905 10906 ctxt->validate = 0; 10907 ctxt->loadsubset = oldctxt->loadsubset; 10908 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 10909 /* 10910 * ID/IDREF registration will be done in xmlValidateElement below 10911 */ 10912 ctxt->loadsubset |= XML_SKIP_IDS; 10913 } 10914 ctxt->dictNames = oldctxt->dictNames; 10915 ctxt->attsDefault = oldctxt->attsDefault; 10916 ctxt->attsSpecial = oldctxt->attsSpecial; 10917 10918 xmlParseContent(ctxt); 10919 if ((RAW == '<') && (NXT(1) == '/')) { 10920 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10921 } else if (RAW != 0) { 10922 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10923 } 10924 if (ctxt->node != ctxt->myDoc->children) { 10925 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10926 } 10927 10928 if (!ctxt->wellFormed) { 10929 if (ctxt->errNo == 0) 10930 ret = XML_ERR_INTERNAL_ERROR; 10931 else 10932 ret = (xmlParserErrors)ctxt->errNo; 10933 } else { 10934 ret = XML_ERR_OK; 10935 } 10936 10937 if ((lst != NULL) && (ret == XML_ERR_OK)) { 10938 xmlNodePtr cur; 10939 10940 /* 10941 * Return the newly created nodeset after unlinking it from 10942 * they pseudo parent. 10943 */ 10944 cur = ctxt->myDoc->children->children; 10945 *lst = cur; 10946 while (cur != NULL) { 10947#ifdef LIBXML_VALID_ENABLED 10948 if (oldctxt->validate && oldctxt->wellFormed && 10949 oldctxt->myDoc && oldctxt->myDoc->intSubset) { 10950 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 10951 oldctxt->myDoc, cur); 10952 } 10953#endif /* LIBXML_VALID_ENABLED */ 10954 cur->parent = NULL; 10955 cur = cur->next; 10956 } 10957 ctxt->myDoc->children->children = NULL; 10958 } 10959 if (ctxt->myDoc != NULL) { 10960 xmlFreeNode(ctxt->myDoc->children); 10961 ctxt->myDoc->children = content; 10962 ctxt->myDoc->last = last; 10963 } 10964 10965 ctxt->sax = oldsax; 10966 ctxt->dict = NULL; 10967 ctxt->attsDefault = NULL; 10968 ctxt->attsSpecial = NULL; 10969 xmlFreeParserCtxt(ctxt); 10970 if (newDoc != NULL) { 10971 xmlFreeDoc(newDoc); 10972 } 10973 10974 return(ret); 10975} 10976 10977/** 10978 * xmlParseInNodeContext: 10979 * @node: the context node 10980 * @data: the input string 10981 * @datalen: the input string length in bytes 10982 * @options: a combination of xmlParserOption 10983 * @lst: the return value for the set of parsed nodes 10984 * 10985 * Parse a well-balanced chunk of an XML document 10986 * within the context (DTD, namespaces, etc ...) of the given node. 10987 * 10988 * The allowed sequence for the data is a Well Balanced Chunk defined by 10989 * the content production in the XML grammar: 10990 * 10991 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10992 * 10993 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 10994 * error code otherwise 10995 */ 10996xmlParserErrors 10997xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 10998 int options, xmlNodePtr *lst) { 10999#ifdef SAX2 11000 xmlParserCtxtPtr ctxt; 11001 xmlDocPtr doc = NULL; 11002 xmlNodePtr fake, cur; 11003 int nsnr = 0; 11004 11005 xmlParserErrors ret = XML_ERR_OK; 11006 11007 /* 11008 * check all input parameters, grab the document 11009 */ 11010 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 11011 return(XML_ERR_INTERNAL_ERROR); 11012 switch (node->type) { 11013 case XML_ELEMENT_NODE: 11014 case XML_ATTRIBUTE_NODE: 11015 case XML_TEXT_NODE: 11016 case XML_CDATA_SECTION_NODE: 11017 case XML_ENTITY_REF_NODE: 11018 case XML_PI_NODE: 11019 case XML_COMMENT_NODE: 11020 case XML_DOCUMENT_NODE: 11021 case XML_HTML_DOCUMENT_NODE: 11022 break; 11023 default: 11024 return(XML_ERR_INTERNAL_ERROR); 11025 11026 } 11027 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 11028 (node->type != XML_DOCUMENT_NODE) && 11029 (node->type != XML_HTML_DOCUMENT_NODE)) 11030 node = node->parent; 11031 if (node == NULL) 11032 return(XML_ERR_INTERNAL_ERROR); 11033 if (node->type == XML_ELEMENT_NODE) 11034 doc = node->doc; 11035 else 11036 doc = (xmlDocPtr) node; 11037 if (doc == NULL) 11038 return(XML_ERR_INTERNAL_ERROR); 11039 11040 /* 11041 * allocate a context and set-up everything not related to the 11042 * node position in the tree 11043 */ 11044 if (doc->type == XML_DOCUMENT_NODE) 11045 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 11046#ifdef LIBXML_HTML_ENABLED 11047 else if (doc->type == XML_HTML_DOCUMENT_NODE) 11048 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 11049#endif 11050 else 11051 return(XML_ERR_INTERNAL_ERROR); 11052 11053 if (ctxt == NULL) 11054 return(XML_ERR_NO_MEMORY); 11055 fake = xmlNewComment(NULL); 11056 if (fake == NULL) { 11057 xmlFreeParserCtxt(ctxt); 11058 return(XML_ERR_NO_MEMORY); 11059 } 11060 xmlAddChild(node, fake); 11061 11062 /* 11063 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 11064 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 11065 * we must wait until the last moment to free the original one. 11066 */ 11067 if (doc->dict != NULL) { 11068 if (ctxt->dict != NULL) 11069 xmlDictFree(ctxt->dict); 11070 ctxt->dict = doc->dict; 11071 } else 11072 options |= XML_PARSE_NODICT; 11073 11074 xmlCtxtUseOptions(ctxt, options); 11075 xmlDetectSAX2(ctxt); 11076 ctxt->myDoc = doc; 11077 11078 if (node->type == XML_ELEMENT_NODE) { 11079 nodePush(ctxt, node); 11080 /* 11081 * initialize the SAX2 namespaces stack 11082 */ 11083 cur = node; 11084 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 11085 xmlNsPtr ns = cur->nsDef; 11086 const xmlChar *iprefix, *ihref; 11087 11088 while (ns != NULL) { 11089 if (ctxt->dict) { 11090 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 11091 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 11092 } else { 11093 iprefix = ns->prefix; 11094 ihref = ns->href; 11095 } 11096 11097 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 11098 nsPush(ctxt, iprefix, ihref); 11099 nsnr++; 11100 } 11101 ns = ns->next; 11102 } 11103 cur = cur->parent; 11104 } 11105 ctxt->instate = XML_PARSER_CONTENT; 11106 } 11107 11108 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 11109 /* 11110 * ID/IDREF registration will be done in xmlValidateElement below 11111 */ 11112 ctxt->loadsubset |= XML_SKIP_IDS; 11113 } 11114 11115 xmlParseContent(ctxt); 11116 nsPop(ctxt, nsnr); 11117 if ((RAW == '<') && (NXT(1) == '/')) { 11118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11119 } else if (RAW != 0) { 11120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11121 } 11122 if ((ctxt->node != NULL) && (ctxt->node != node)) { 11123 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11124 ctxt->wellFormed = 0; 11125 } 11126 11127 if (!ctxt->wellFormed) { 11128 if (ctxt->errNo == 0) 11129 ret = XML_ERR_INTERNAL_ERROR; 11130 else 11131 ret = (xmlParserErrors)ctxt->errNo; 11132 } else { 11133 ret = XML_ERR_OK; 11134 } 11135 11136 /* 11137 * Return the newly created nodeset after unlinking it from 11138 * the pseudo sibling. 11139 */ 11140 11141 cur = fake->next; 11142 fake->next = NULL; 11143 node->last = fake; 11144 11145 if (cur != NULL) { 11146 cur->prev = NULL; 11147 } 11148 11149 *lst = cur; 11150 11151 while (cur != NULL) { 11152 cur->parent = NULL; 11153 cur = cur->next; 11154 } 11155 11156 xmlUnlinkNode(fake); 11157 xmlFreeNode(fake); 11158 11159 11160 if (ret != XML_ERR_OK) { 11161 xmlFreeNodeList(*lst); 11162 *lst = NULL; 11163 } 11164 11165 if (doc->dict != NULL) 11166 ctxt->dict = NULL; 11167 xmlFreeParserCtxt(ctxt); 11168 11169 return(ret); 11170#else /* !SAX2 */ 11171 return(XML_ERR_INTERNAL_ERROR); 11172#endif 11173} 11174 11175#ifdef LIBXML_SAX1_ENABLED 11176/** 11177 * xmlParseBalancedChunkMemoryRecover: 11178 * @doc: the document the chunk pertains to 11179 * @sax: the SAX handler bloc (possibly NULL) 11180 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11181 * @depth: Used for loop detection, use 0 11182 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11183 * @lst: the return value for the set of parsed nodes 11184 * @recover: return nodes even if the data is broken (use 0) 11185 * 11186 * 11187 * Parse a well-balanced chunk of an XML document 11188 * called by the parser 11189 * The allowed sequence for the Well Balanced Chunk is the one defined by 11190 * the content production in the XML grammar: 11191 * 11192 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11193 * 11194 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11195 * the parser error code otherwise 11196 * 11197 * In case recover is set to 1, the nodelist will not be empty even if 11198 * the parsed chunk is not well balanced. 11199 */ 11200int 11201xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11202 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 11203 int recover) { 11204 xmlParserCtxtPtr ctxt; 11205 xmlDocPtr newDoc; 11206 xmlSAXHandlerPtr oldsax = NULL; 11207 xmlNodePtr content, newRoot; 11208 int size; 11209 int ret = 0; 11210 11211 if (depth > 40) { 11212 return(XML_ERR_ENTITY_LOOP); 11213 } 11214 11215 11216 if (lst != NULL) 11217 *lst = NULL; 11218 if (string == NULL) 11219 return(-1); 11220 11221 size = xmlStrlen(string); 11222 11223 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11224 if (ctxt == NULL) return(-1); 11225 ctxt->userData = ctxt; 11226 if (sax != NULL) { 11227 oldsax = ctxt->sax; 11228 ctxt->sax = sax; 11229 if (user_data != NULL) 11230 ctxt->userData = user_data; 11231 } 11232 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11233 if (newDoc == NULL) { 11234 xmlFreeParserCtxt(ctxt); 11235 return(-1); 11236 } 11237 if ((doc != NULL) && (doc->dict != NULL)) { 11238 xmlDictFree(ctxt->dict); 11239 ctxt->dict = doc->dict; 11240 xmlDictReference(ctxt->dict); 11241 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11242 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11243 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11244 ctxt->dictNames = 1; 11245 } else { 11246 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); 11247 } 11248 if (doc != NULL) { 11249 newDoc->intSubset = doc->intSubset; 11250 newDoc->extSubset = doc->extSubset; 11251 } 11252 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11253 if (newRoot == NULL) { 11254 if (sax != NULL) 11255 ctxt->sax = oldsax; 11256 xmlFreeParserCtxt(ctxt); 11257 newDoc->intSubset = NULL; 11258 newDoc->extSubset = NULL; 11259 xmlFreeDoc(newDoc); 11260 return(-1); 11261 } 11262 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11263 nodePush(ctxt, newRoot); 11264 if (doc == NULL) { 11265 ctxt->myDoc = newDoc; 11266 } else { 11267 ctxt->myDoc = newDoc; 11268 newDoc->children->doc = doc; 11269 } 11270 ctxt->instate = XML_PARSER_CONTENT; 11271 ctxt->depth = depth; 11272 11273 /* 11274 * Doing validity checking on chunk doesn't make sense 11275 */ 11276 ctxt->validate = 0; 11277 ctxt->loadsubset = 0; 11278 xmlDetectSAX2(ctxt); 11279 11280 if ( doc != NULL ){ 11281 content = doc->children; 11282 doc->children = NULL; 11283 xmlParseContent(ctxt); 11284 doc->children = content; 11285 } 11286 else { 11287 xmlParseContent(ctxt); 11288 } 11289 if ((RAW == '<') && (NXT(1) == '/')) { 11290 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11291 } else if (RAW != 0) { 11292 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11293 } 11294 if (ctxt->node != newDoc->children) { 11295 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11296 } 11297 11298 if (!ctxt->wellFormed) { 11299 if (ctxt->errNo == 0) 11300 ret = 1; 11301 else 11302 ret = ctxt->errNo; 11303 } else { 11304 ret = 0; 11305 } 11306 11307 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 11308 xmlNodePtr cur; 11309 11310 /* 11311 * Return the newly created nodeset after unlinking it from 11312 * they pseudo parent. 11313 */ 11314 cur = newDoc->children->children; 11315 *lst = cur; 11316 while (cur != NULL) { 11317 xmlSetTreeDoc(cur, doc); 11318 cur->parent = NULL; 11319 cur = cur->next; 11320 } 11321 newDoc->children->children = NULL; 11322 } 11323 11324 if (sax != NULL) 11325 ctxt->sax = oldsax; 11326 xmlFreeParserCtxt(ctxt); 11327 newDoc->intSubset = NULL; 11328 newDoc->extSubset = NULL; 11329 xmlFreeDoc(newDoc); 11330 11331 return(ret); 11332} 11333 11334/** 11335 * xmlSAXParseEntity: 11336 * @sax: the SAX handler block 11337 * @filename: the filename 11338 * 11339 * parse an XML external entity out of context and build a tree. 11340 * It use the given SAX function block to handle the parsing callback. 11341 * If sax is NULL, fallback to the default DOM tree building routines. 11342 * 11343 * [78] extParsedEnt ::= TextDecl? content 11344 * 11345 * This correspond to a "Well Balanced" chunk 11346 * 11347 * Returns the resulting document tree 11348 */ 11349 11350xmlDocPtr 11351xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 11352 xmlDocPtr ret; 11353 xmlParserCtxtPtr ctxt; 11354 11355 ctxt = xmlCreateFileParserCtxt(filename); 11356 if (ctxt == NULL) { 11357 return(NULL); 11358 } 11359 if (sax != NULL) { 11360 if (ctxt->sax != NULL) 11361 xmlFree(ctxt->sax); 11362 ctxt->sax = sax; 11363 ctxt->userData = NULL; 11364 } 11365 11366 xmlParseExtParsedEnt(ctxt); 11367 11368 if (ctxt->wellFormed) 11369 ret = ctxt->myDoc; 11370 else { 11371 ret = NULL; 11372 xmlFreeDoc(ctxt->myDoc); 11373 ctxt->myDoc = NULL; 11374 } 11375 if (sax != NULL) 11376 ctxt->sax = NULL; 11377 xmlFreeParserCtxt(ctxt); 11378 11379 return(ret); 11380} 11381 11382/** 11383 * xmlParseEntity: 11384 * @filename: the filename 11385 * 11386 * parse an XML external entity out of context and build a tree. 11387 * 11388 * [78] extParsedEnt ::= TextDecl? content 11389 * 11390 * This correspond to a "Well Balanced" chunk 11391 * 11392 * Returns the resulting document tree 11393 */ 11394 11395xmlDocPtr 11396xmlParseEntity(const char *filename) { 11397 return(xmlSAXParseEntity(NULL, filename)); 11398} 11399#endif /* LIBXML_SAX1_ENABLED */ 11400 11401/** 11402 * xmlCreateEntityParserCtxt: 11403 * @URL: the entity URL 11404 * @ID: the entity PUBLIC ID 11405 * @base: a possible base for the target URI 11406 * 11407 * Create a parser context for an external entity 11408 * Automatic support for ZLIB/Compress compressed document is provided 11409 * by default if found at compile-time. 11410 * 11411 * Returns the new parser context or NULL 11412 */ 11413xmlParserCtxtPtr 11414xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 11415 const xmlChar *base) { 11416 xmlParserCtxtPtr ctxt; 11417 xmlParserInputPtr inputStream; 11418 char *directory = NULL; 11419 xmlChar *uri; 11420 11421 ctxt = xmlNewParserCtxt(); 11422 if (ctxt == NULL) { 11423 return(NULL); 11424 } 11425 11426 uri = xmlBuildURI(URL, base); 11427 11428 if (uri == NULL) { 11429 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11430 if (inputStream == NULL) { 11431 xmlFreeParserCtxt(ctxt); 11432 return(NULL); 11433 } 11434 11435 inputPush(ctxt, inputStream); 11436 11437 if ((ctxt->directory == NULL) && (directory == NULL)) 11438 directory = xmlParserGetDirectory((char *)URL); 11439 if ((ctxt->directory == NULL) && (directory != NULL)) 11440 ctxt->directory = directory; 11441 } else { 11442 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 11443 if (inputStream == NULL) { 11444 xmlFree(uri); 11445 xmlFreeParserCtxt(ctxt); 11446 return(NULL); 11447 } 11448 11449 inputPush(ctxt, inputStream); 11450 11451 if ((ctxt->directory == NULL) && (directory == NULL)) 11452 directory = xmlParserGetDirectory((char *)uri); 11453 if ((ctxt->directory == NULL) && (directory != NULL)) 11454 ctxt->directory = directory; 11455 xmlFree(uri); 11456 } 11457 return(ctxt); 11458} 11459 11460/************************************************************************ 11461 * * 11462 * Front ends when parsing from a file * 11463 * * 11464 ************************************************************************/ 11465 11466/** 11467 * xmlCreateURLParserCtxt: 11468 * @filename: the filename or URL 11469 * @options: a combination of xmlParserOption 11470 * 11471 * Create a parser context for a file or URL content. 11472 * Automatic support for ZLIB/Compress compressed document is provided 11473 * by default if found at compile-time and for file accesses 11474 * 11475 * Returns the new parser context or NULL 11476 */ 11477xmlParserCtxtPtr 11478xmlCreateURLParserCtxt(const char *filename, int options) 11479{ 11480 xmlParserCtxtPtr ctxt; 11481 xmlParserInputPtr inputStream; 11482 char *directory = NULL; 11483 11484 ctxt = xmlNewParserCtxt(); 11485 if (ctxt == NULL) { 11486 xmlErrMemory(NULL, "cannot allocate parser context"); 11487 return(NULL); 11488 } 11489 11490 if (options != 0) 11491 xmlCtxtUseOptions(ctxt, options); 11492 11493 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 11494 if (inputStream == NULL) { 11495 xmlFreeParserCtxt(ctxt); 11496 return(NULL); 11497 } 11498 11499 inputPush(ctxt, inputStream); 11500 if ((ctxt->directory == NULL) && (directory == NULL)) 11501 directory = xmlParserGetDirectory(filename); 11502 if ((ctxt->directory == NULL) && (directory != NULL)) 11503 ctxt->directory = directory; 11504 11505 return(ctxt); 11506} 11507 11508/** 11509 * xmlCreateFileParserCtxt: 11510 * @filename: the filename 11511 * 11512 * Create a parser context for a file content. 11513 * Automatic support for ZLIB/Compress compressed document is provided 11514 * by default if found at compile-time. 11515 * 11516 * Returns the new parser context or NULL 11517 */ 11518xmlParserCtxtPtr 11519xmlCreateFileParserCtxt(const char *filename) 11520{ 11521 return(xmlCreateURLParserCtxt(filename, 0)); 11522} 11523 11524#ifdef LIBXML_SAX1_ENABLED 11525/** 11526 * xmlSAXParseFileWithData: 11527 * @sax: the SAX handler block 11528 * @filename: the filename 11529 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11530 * documents 11531 * @data: the userdata 11532 * 11533 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11534 * compressed document is provided by default if found at compile-time. 11535 * It use the given SAX function block to handle the parsing callback. 11536 * If sax is NULL, fallback to the default DOM tree building routines. 11537 * 11538 * User data (void *) is stored within the parser context in the 11539 * context's _private member, so it is available nearly everywhere in libxml 11540 * 11541 * Returns the resulting document tree 11542 */ 11543 11544xmlDocPtr 11545xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 11546 int recovery, void *data) { 11547 xmlDocPtr ret; 11548 xmlParserCtxtPtr ctxt; 11549 char *directory = NULL; 11550 11551 xmlInitParser(); 11552 11553 ctxt = xmlCreateFileParserCtxt(filename); 11554 if (ctxt == NULL) { 11555 return(NULL); 11556 } 11557 if (sax != NULL) { 11558 if (ctxt->sax != NULL) 11559 xmlFree(ctxt->sax); 11560 ctxt->sax = sax; 11561 } 11562 xmlDetectSAX2(ctxt); 11563 if (data!=NULL) { 11564 ctxt->_private = data; 11565 } 11566 11567 if ((ctxt->directory == NULL) && (directory == NULL)) 11568 directory = xmlParserGetDirectory(filename); 11569 if ((ctxt->directory == NULL) && (directory != NULL)) 11570 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 11571 11572 ctxt->recovery = recovery; 11573 11574 xmlParseDocument(ctxt); 11575 11576 if ((ctxt->wellFormed) || recovery) { 11577 ret = ctxt->myDoc; 11578 if (ret != NULL) { 11579 if (ctxt->input->buf->compressed > 0) 11580 ret->compression = 9; 11581 else 11582 ret->compression = ctxt->input->buf->compressed; 11583 } 11584 } 11585 else { 11586 ret = NULL; 11587 xmlFreeDoc(ctxt->myDoc); 11588 ctxt->myDoc = NULL; 11589 } 11590 if (sax != NULL) 11591 ctxt->sax = NULL; 11592 xmlFreeParserCtxt(ctxt); 11593 11594 return(ret); 11595} 11596 11597/** 11598 * xmlSAXParseFile: 11599 * @sax: the SAX handler block 11600 * @filename: the filename 11601 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11602 * documents 11603 * 11604 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11605 * compressed document is provided by default if found at compile-time. 11606 * It use the given SAX function block to handle the parsing callback. 11607 * If sax is NULL, fallback to the default DOM tree building routines. 11608 * 11609 * Returns the resulting document tree 11610 */ 11611 11612xmlDocPtr 11613xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 11614 int recovery) { 11615 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 11616} 11617 11618/** 11619 * xmlRecoverDoc: 11620 * @cur: a pointer to an array of xmlChar 11621 * 11622 * parse an XML in-memory document and build a tree. 11623 * In the case the document is not Well Formed, a tree is built anyway 11624 * 11625 * Returns the resulting document tree 11626 */ 11627 11628xmlDocPtr 11629xmlRecoverDoc(xmlChar *cur) { 11630 return(xmlSAXParseDoc(NULL, cur, 1)); 11631} 11632 11633/** 11634 * xmlParseFile: 11635 * @filename: the filename 11636 * 11637 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11638 * compressed document is provided by default if found at compile-time. 11639 * 11640 * Returns the resulting document tree if the file was wellformed, 11641 * NULL otherwise. 11642 */ 11643 11644xmlDocPtr 11645xmlParseFile(const char *filename) { 11646 return(xmlSAXParseFile(NULL, filename, 0)); 11647} 11648 11649/** 11650 * xmlRecoverFile: 11651 * @filename: the filename 11652 * 11653 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 11654 * compressed document is provided by default if found at compile-time. 11655 * In the case the document is not Well Formed, a tree is built anyway 11656 * 11657 * Returns the resulting document tree 11658 */ 11659 11660xmlDocPtr 11661xmlRecoverFile(const char *filename) { 11662 return(xmlSAXParseFile(NULL, filename, 1)); 11663} 11664 11665 11666/** 11667 * xmlSetupParserForBuffer: 11668 * @ctxt: an XML parser context 11669 * @buffer: a xmlChar * buffer 11670 * @filename: a file name 11671 * 11672 * Setup the parser context to parse a new buffer; Clears any prior 11673 * contents from the parser context. The buffer parameter must not be 11674 * NULL, but the filename parameter can be 11675 */ 11676void 11677xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 11678 const char* filename) 11679{ 11680 xmlParserInputPtr input; 11681 11682 if ((ctxt == NULL) || (buffer == NULL)) 11683 return; 11684 11685 input = xmlNewInputStream(ctxt); 11686 if (input == NULL) { 11687 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 11688 xmlClearParserCtxt(ctxt); 11689 return; 11690 } 11691 11692 xmlClearParserCtxt(ctxt); 11693 if (filename != NULL) 11694 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 11695 input->base = buffer; 11696 input->cur = buffer; 11697 input->end = &buffer[xmlStrlen(buffer)]; 11698 inputPush(ctxt, input); 11699} 11700 11701/** 11702 * xmlSAXUserParseFile: 11703 * @sax: a SAX handler 11704 * @user_data: The user data returned on SAX callbacks 11705 * @filename: a file name 11706 * 11707 * parse an XML file and call the given SAX handler routines. 11708 * Automatic support for ZLIB/Compress compressed document is provided 11709 * 11710 * Returns 0 in case of success or a error number otherwise 11711 */ 11712int 11713xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 11714 const char *filename) { 11715 int ret = 0; 11716 xmlParserCtxtPtr ctxt; 11717 11718 ctxt = xmlCreateFileParserCtxt(filename); 11719 if (ctxt == NULL) return -1; 11720#ifdef LIBXML_SAX1_ENABLED 11721 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11722#endif /* LIBXML_SAX1_ENABLED */ 11723 xmlFree(ctxt->sax); 11724 ctxt->sax = sax; 11725 xmlDetectSAX2(ctxt); 11726 11727 if (user_data != NULL) 11728 ctxt->userData = user_data; 11729 11730 xmlParseDocument(ctxt); 11731 11732 if (ctxt->wellFormed) 11733 ret = 0; 11734 else { 11735 if (ctxt->errNo != 0) 11736 ret = ctxt->errNo; 11737 else 11738 ret = -1; 11739 } 11740 if (sax != NULL) 11741 ctxt->sax = NULL; 11742 if (ctxt->myDoc != NULL) { 11743 xmlFreeDoc(ctxt->myDoc); 11744 ctxt->myDoc = NULL; 11745 } 11746 xmlFreeParserCtxt(ctxt); 11747 11748 return ret; 11749} 11750#endif /* LIBXML_SAX1_ENABLED */ 11751 11752/************************************************************************ 11753 * * 11754 * Front ends when parsing from memory * 11755 * * 11756 ************************************************************************/ 11757 11758/** 11759 * xmlCreateMemoryParserCtxt: 11760 * @buffer: a pointer to a char array 11761 * @size: the size of the array 11762 * 11763 * Create a parser context for an XML in-memory document. 11764 * 11765 * Returns the new parser context or NULL 11766 */ 11767xmlParserCtxtPtr 11768xmlCreateMemoryParserCtxt(const char *buffer, int size) { 11769 xmlParserCtxtPtr ctxt; 11770 xmlParserInputPtr input; 11771 xmlParserInputBufferPtr buf; 11772 11773 if (buffer == NULL) 11774 return(NULL); 11775 if (size <= 0) 11776 return(NULL); 11777 11778 ctxt = xmlNewParserCtxt(); 11779 if (ctxt == NULL) 11780 return(NULL); 11781 11782 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 11783 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 11784 if (buf == NULL) { 11785 xmlFreeParserCtxt(ctxt); 11786 return(NULL); 11787 } 11788 11789 input = xmlNewInputStream(ctxt); 11790 if (input == NULL) { 11791 xmlFreeParserInputBuffer(buf); 11792 xmlFreeParserCtxt(ctxt); 11793 return(NULL); 11794 } 11795 11796 input->filename = NULL; 11797 input->buf = buf; 11798 input->base = input->buf->buffer->content; 11799 input->cur = input->buf->buffer->content; 11800 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 11801 11802 inputPush(ctxt, input); 11803 return(ctxt); 11804} 11805 11806#ifdef LIBXML_SAX1_ENABLED 11807/** 11808 * xmlSAXParseMemoryWithData: 11809 * @sax: the SAX handler block 11810 * @buffer: an pointer to a char array 11811 * @size: the size of the array 11812 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11813 * documents 11814 * @data: the userdata 11815 * 11816 * parse an XML in-memory block and use the given SAX function block 11817 * to handle the parsing callback. If sax is NULL, fallback to the default 11818 * DOM tree building routines. 11819 * 11820 * User data (void *) is stored within the parser context in the 11821 * context's _private member, so it is available nearly everywhere in libxml 11822 * 11823 * Returns the resulting document tree 11824 */ 11825 11826xmlDocPtr 11827xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 11828 int size, int recovery, void *data) { 11829 xmlDocPtr ret; 11830 xmlParserCtxtPtr ctxt; 11831 11832 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11833 if (ctxt == NULL) return(NULL); 11834 if (sax != NULL) { 11835 if (ctxt->sax != NULL) 11836 xmlFree(ctxt->sax); 11837 ctxt->sax = sax; 11838 } 11839 xmlDetectSAX2(ctxt); 11840 if (data!=NULL) { 11841 ctxt->_private=data; 11842 } 11843 11844 ctxt->recovery = recovery; 11845 11846 xmlParseDocument(ctxt); 11847 11848 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 11849 else { 11850 ret = NULL; 11851 xmlFreeDoc(ctxt->myDoc); 11852 ctxt->myDoc = NULL; 11853 } 11854 if (sax != NULL) 11855 ctxt->sax = NULL; 11856 xmlFreeParserCtxt(ctxt); 11857 11858 return(ret); 11859} 11860 11861/** 11862 * xmlSAXParseMemory: 11863 * @sax: the SAX handler block 11864 * @buffer: an pointer to a char array 11865 * @size: the size of the array 11866 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 11867 * documents 11868 * 11869 * parse an XML in-memory block and use the given SAX function block 11870 * to handle the parsing callback. If sax is NULL, fallback to the default 11871 * DOM tree building routines. 11872 * 11873 * Returns the resulting document tree 11874 */ 11875xmlDocPtr 11876xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 11877 int size, int recovery) { 11878 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 11879} 11880 11881/** 11882 * xmlParseMemory: 11883 * @buffer: an pointer to a char array 11884 * @size: the size of the array 11885 * 11886 * parse an XML in-memory block and build a tree. 11887 * 11888 * Returns the resulting document tree 11889 */ 11890 11891xmlDocPtr xmlParseMemory(const char *buffer, int size) { 11892 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 11893} 11894 11895/** 11896 * xmlRecoverMemory: 11897 * @buffer: an pointer to a char array 11898 * @size: the size of the array 11899 * 11900 * parse an XML in-memory block and build a tree. 11901 * In the case the document is not Well Formed, a tree is built anyway 11902 * 11903 * Returns the resulting document tree 11904 */ 11905 11906xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 11907 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 11908} 11909 11910/** 11911 * xmlSAXUserParseMemory: 11912 * @sax: a SAX handler 11913 * @user_data: The user data returned on SAX callbacks 11914 * @buffer: an in-memory XML document input 11915 * @size: the length of the XML document in bytes 11916 * 11917 * A better SAX parsing routine. 11918 * parse an XML in-memory buffer and call the given SAX handler routines. 11919 * 11920 * Returns 0 in case of success or a error number otherwise 11921 */ 11922int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 11923 const char *buffer, int size) { 11924 int ret = 0; 11925 xmlParserCtxtPtr ctxt; 11926 xmlSAXHandlerPtr oldsax = NULL; 11927 11928 if (sax == NULL) return -1; 11929 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 11930 if (ctxt == NULL) return -1; 11931 oldsax = ctxt->sax; 11932 ctxt->sax = sax; 11933 xmlDetectSAX2(ctxt); 11934 if (user_data != NULL) 11935 ctxt->userData = user_data; 11936 11937 xmlParseDocument(ctxt); 11938 11939 if (ctxt->wellFormed) 11940 ret = 0; 11941 else { 11942 if (ctxt->errNo != 0) 11943 ret = ctxt->errNo; 11944 else 11945 ret = -1; 11946 } 11947 ctxt->sax = oldsax; 11948 if (ctxt->myDoc != NULL) { 11949 xmlFreeDoc(ctxt->myDoc); 11950 ctxt->myDoc = NULL; 11951 } 11952 xmlFreeParserCtxt(ctxt); 11953 11954 return ret; 11955} 11956#endif /* LIBXML_SAX1_ENABLED */ 11957 11958/** 11959 * xmlCreateDocParserCtxt: 11960 * @cur: a pointer to an array of xmlChar 11961 * 11962 * Creates a parser context for an XML in-memory document. 11963 * 11964 * Returns the new parser context or NULL 11965 */ 11966xmlParserCtxtPtr 11967xmlCreateDocParserCtxt(const xmlChar *cur) { 11968 int len; 11969 11970 if (cur == NULL) 11971 return(NULL); 11972 len = xmlStrlen(cur); 11973 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 11974} 11975 11976#ifdef LIBXML_SAX1_ENABLED 11977/** 11978 * xmlSAXParseDoc: 11979 * @sax: the SAX handler block 11980 * @cur: a pointer to an array of xmlChar 11981 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 11982 * documents 11983 * 11984 * parse an XML in-memory document and build a tree. 11985 * It use the given SAX function block to handle the parsing callback. 11986 * If sax is NULL, fallback to the default DOM tree building routines. 11987 * 11988 * Returns the resulting document tree 11989 */ 11990 11991xmlDocPtr 11992xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 11993 xmlDocPtr ret; 11994 xmlParserCtxtPtr ctxt; 11995 xmlSAXHandlerPtr oldsax = NULL; 11996 11997 if (cur == NULL) return(NULL); 11998 11999 12000 ctxt = xmlCreateDocParserCtxt(cur); 12001 if (ctxt == NULL) return(NULL); 12002 if (sax != NULL) { 12003 oldsax = ctxt->sax; 12004 ctxt->sax = sax; 12005 ctxt->userData = NULL; 12006 } 12007 xmlDetectSAX2(ctxt); 12008 12009 xmlParseDocument(ctxt); 12010 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 12011 else { 12012 ret = NULL; 12013 xmlFreeDoc(ctxt->myDoc); 12014 ctxt->myDoc = NULL; 12015 } 12016 if (sax != NULL) 12017 ctxt->sax = oldsax; 12018 xmlFreeParserCtxt(ctxt); 12019 12020 return(ret); 12021} 12022 12023/** 12024 * xmlParseDoc: 12025 * @cur: a pointer to an array of xmlChar 12026 * 12027 * parse an XML in-memory document and build a tree. 12028 * 12029 * Returns the resulting document tree 12030 */ 12031 12032xmlDocPtr 12033xmlParseDoc(xmlChar *cur) { 12034 return(xmlSAXParseDoc(NULL, cur, 0)); 12035} 12036#endif /* LIBXML_SAX1_ENABLED */ 12037 12038#ifdef LIBXML_LEGACY_ENABLED 12039/************************************************************************ 12040 * * 12041 * Specific function to keep track of entities references * 12042 * and used by the XSLT debugger * 12043 * * 12044 ************************************************************************/ 12045 12046static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 12047 12048/** 12049 * xmlAddEntityReference: 12050 * @ent : A valid entity 12051 * @firstNode : A valid first node for children of entity 12052 * @lastNode : A valid last node of children entity 12053 * 12054 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 12055 */ 12056static void 12057xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 12058 xmlNodePtr lastNode) 12059{ 12060 if (xmlEntityRefFunc != NULL) { 12061 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 12062 } 12063} 12064 12065 12066/** 12067 * xmlSetEntityReferenceFunc: 12068 * @func: A valid function 12069 * 12070 * Set the function to call call back when a xml reference has been made 12071 */ 12072void 12073xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 12074{ 12075 xmlEntityRefFunc = func; 12076} 12077#endif /* LIBXML_LEGACY_ENABLED */ 12078 12079/************************************************************************ 12080 * * 12081 * Miscellaneous * 12082 * * 12083 ************************************************************************/ 12084 12085#ifdef LIBXML_XPATH_ENABLED 12086#include <libxml/xpath.h> 12087#endif 12088 12089extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 12090static int xmlParserInitialized = 0; 12091 12092/** 12093 * xmlInitParser: 12094 * 12095 * Initialization function for the XML parser. 12096 * This is not reentrant. Call once before processing in case of 12097 * use in multithreaded programs. 12098 */ 12099 12100void 12101xmlInitParser(void) { 12102 if (xmlParserInitialized != 0) 12103 return; 12104 12105 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 12106 (xmlGenericError == NULL)) 12107 initGenericErrorDefaultFunc(NULL); 12108 xmlInitGlobals(); 12109 xmlInitThreads(); 12110 xmlInitMemory(); 12111 xmlInitCharEncodingHandlers(); 12112 xmlDefaultSAXHandlerInit(); 12113 xmlRegisterDefaultInputCallbacks(); 12114#ifdef LIBXML_OUTPUT_ENABLED 12115 xmlRegisterDefaultOutputCallbacks(); 12116#endif /* LIBXML_OUTPUT_ENABLED */ 12117#ifdef LIBXML_HTML_ENABLED 12118 htmlInitAutoClose(); 12119 htmlDefaultSAXHandlerInit(); 12120#endif 12121#ifdef LIBXML_XPATH_ENABLED 12122 xmlXPathInit(); 12123#endif 12124 xmlParserInitialized = 1; 12125} 12126 12127/** 12128 * xmlCleanupParser: 12129 * 12130 * Cleanup function for the XML library. It tries to reclaim all 12131 * parsing related global memory allocated for the library processing. 12132 * It doesn't deallocate any document related memory. Calling this 12133 * function should not prevent reusing the library but one should 12134 * call xmlCleanupParser() only when the process has 12135 * finished using the library or XML document built with it. 12136 */ 12137 12138void 12139xmlCleanupParser(void) { 12140 if (!xmlParserInitialized) 12141 return; 12142 12143 xmlCleanupCharEncodingHandlers(); 12144#ifdef LIBXML_CATALOG_ENABLED 12145 xmlCatalogCleanup(); 12146#endif 12147 xmlCleanupInputCallbacks(); 12148#ifdef LIBXML_OUTPUT_ENABLED 12149 xmlCleanupOutputCallbacks(); 12150#endif 12151#ifdef LIBXML_SCHEMAS_ENABLED 12152 xmlSchemaCleanupTypes(); 12153 xmlRelaxNGCleanupTypes(); 12154#endif 12155 xmlCleanupGlobals(); 12156 xmlResetLastError(); 12157 xmlCleanupThreads(); /* must be last if called not from the main thread */ 12158 xmlCleanupMemory(); 12159 xmlParserInitialized = 0; 12160} 12161 12162/************************************************************************ 12163 * * 12164 * New set (2.6.0) of simpler and more flexible APIs * 12165 * * 12166 ************************************************************************/ 12167 12168/** 12169 * DICT_FREE: 12170 * @str: a string 12171 * 12172 * Free a string if it is not owned by the "dict" dictionnary in the 12173 * current scope 12174 */ 12175#define DICT_FREE(str) \ 12176 if ((str) && ((!dict) || \ 12177 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 12178 xmlFree((char *)(str)); 12179 12180/** 12181 * xmlCtxtReset: 12182 * @ctxt: an XML parser context 12183 * 12184 * Reset a parser context 12185 */ 12186void 12187xmlCtxtReset(xmlParserCtxtPtr ctxt) 12188{ 12189 xmlParserInputPtr input; 12190 xmlDictPtr dict; 12191 12192 if (ctxt == NULL) 12193 return; 12194 12195 dict = ctxt->dict; 12196 12197 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 12198 xmlFreeInputStream(input); 12199 } 12200 ctxt->inputNr = 0; 12201 ctxt->input = NULL; 12202 12203 ctxt->spaceNr = 0; 12204 ctxt->spaceTab[0] = -1; 12205 ctxt->space = &ctxt->spaceTab[0]; 12206 12207 12208 ctxt->nodeNr = 0; 12209 ctxt->node = NULL; 12210 12211 ctxt->nameNr = 0; 12212 ctxt->name = NULL; 12213 12214 DICT_FREE(ctxt->version); 12215 ctxt->version = NULL; 12216 DICT_FREE(ctxt->encoding); 12217 ctxt->encoding = NULL; 12218 DICT_FREE(ctxt->directory); 12219 ctxt->directory = NULL; 12220 DICT_FREE(ctxt->extSubURI); 12221 ctxt->extSubURI = NULL; 12222 DICT_FREE(ctxt->extSubSystem); 12223 ctxt->extSubSystem = NULL; 12224 if (ctxt->myDoc != NULL) 12225 xmlFreeDoc(ctxt->myDoc); 12226 ctxt->myDoc = NULL; 12227 12228 ctxt->standalone = -1; 12229 ctxt->hasExternalSubset = 0; 12230 ctxt->hasPErefs = 0; 12231 ctxt->html = 0; 12232 ctxt->external = 0; 12233 ctxt->instate = XML_PARSER_START; 12234 ctxt->token = 0; 12235 12236 ctxt->wellFormed = 1; 12237 ctxt->nsWellFormed = 1; 12238 ctxt->disableSAX = 0; 12239 ctxt->valid = 1; 12240#if 0 12241 ctxt->vctxt.userData = ctxt; 12242 ctxt->vctxt.error = xmlParserValidityError; 12243 ctxt->vctxt.warning = xmlParserValidityWarning; 12244#endif 12245 ctxt->record_info = 0; 12246 ctxt->nbChars = 0; 12247 ctxt->checkIndex = 0; 12248 ctxt->inSubset = 0; 12249 ctxt->errNo = XML_ERR_OK; 12250 ctxt->depth = 0; 12251 ctxt->charset = XML_CHAR_ENCODING_UTF8; 12252 ctxt->catalogs = NULL; 12253 xmlInitNodeInfoSeq(&ctxt->node_seq); 12254 12255 if (ctxt->attsDefault != NULL) { 12256 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 12257 ctxt->attsDefault = NULL; 12258 } 12259 if (ctxt->attsSpecial != NULL) { 12260 xmlHashFree(ctxt->attsSpecial, NULL); 12261 ctxt->attsSpecial = NULL; 12262 } 12263 12264#ifdef LIBXML_CATALOG_ENABLED 12265 if (ctxt->catalogs != NULL) 12266 xmlCatalogFreeLocal(ctxt->catalogs); 12267#endif 12268 if (ctxt->lastError.code != XML_ERR_OK) 12269 xmlResetError(&ctxt->lastError); 12270} 12271 12272/** 12273 * xmlCtxtResetPush: 12274 * @ctxt: an XML parser context 12275 * @chunk: a pointer to an array of chars 12276 * @size: number of chars in the array 12277 * @filename: an optional file name or URI 12278 * @encoding: the document encoding, or NULL 12279 * 12280 * Reset a push parser context 12281 * 12282 * Returns 0 in case of success and 1 in case of error 12283 */ 12284int 12285xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 12286 int size, const char *filename, const char *encoding) 12287{ 12288 xmlParserInputPtr inputStream; 12289 xmlParserInputBufferPtr buf; 12290 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12291 12292 if (ctxt == NULL) 12293 return(1); 12294 12295 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 12296 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12297 12298 buf = xmlAllocParserInputBuffer(enc); 12299 if (buf == NULL) 12300 return(1); 12301 12302 if (ctxt == NULL) { 12303 xmlFreeParserInputBuffer(buf); 12304 return(1); 12305 } 12306 12307 xmlCtxtReset(ctxt); 12308 12309 if (ctxt->pushTab == NULL) { 12310 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 12311 sizeof(xmlChar *)); 12312 if (ctxt->pushTab == NULL) { 12313 xmlErrMemory(ctxt, NULL); 12314 xmlFreeParserInputBuffer(buf); 12315 return(1); 12316 } 12317 } 12318 12319 if (filename == NULL) { 12320 ctxt->directory = NULL; 12321 } else { 12322 ctxt->directory = xmlParserGetDirectory(filename); 12323 } 12324 12325 inputStream = xmlNewInputStream(ctxt); 12326 if (inputStream == NULL) { 12327 xmlFreeParserInputBuffer(buf); 12328 return(1); 12329 } 12330 12331 if (filename == NULL) 12332 inputStream->filename = NULL; 12333 else 12334 inputStream->filename = (char *) 12335 xmlCanonicPath((const xmlChar *) filename); 12336 inputStream->buf = buf; 12337 inputStream->base = inputStream->buf->buffer->content; 12338 inputStream->cur = inputStream->buf->buffer->content; 12339 inputStream->end = 12340 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 12341 12342 inputPush(ctxt, inputStream); 12343 12344 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12345 (ctxt->input->buf != NULL)) { 12346 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 12347 int cur = ctxt->input->cur - ctxt->input->base; 12348 12349 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12350 12351 ctxt->input->base = ctxt->input->buf->buffer->content + base; 12352 ctxt->input->cur = ctxt->input->base + cur; 12353 ctxt->input->end = 12354 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 12355 use]; 12356#ifdef DEBUG_PUSH 12357 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12358#endif 12359 } 12360 12361 if (encoding != NULL) { 12362 xmlCharEncodingHandlerPtr hdlr; 12363 12364 hdlr = xmlFindCharEncodingHandler(encoding); 12365 if (hdlr != NULL) { 12366 xmlSwitchToEncoding(ctxt, hdlr); 12367 } else { 12368 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 12369 "Unsupported encoding %s\n", BAD_CAST encoding); 12370 } 12371 } else if (enc != XML_CHAR_ENCODING_NONE) { 12372 xmlSwitchEncoding(ctxt, enc); 12373 } 12374 12375 return(0); 12376} 12377 12378/** 12379 * xmlCtxtUseOptions: 12380 * @ctxt: an XML parser context 12381 * @options: a combination of xmlParserOption 12382 * 12383 * Applies the options to the parser context 12384 * 12385 * Returns 0 in case of success, the set of unknown or unimplemented options 12386 * in case of error. 12387 */ 12388int 12389xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 12390{ 12391 if (ctxt == NULL) 12392 return(-1); 12393 if (options & XML_PARSE_RECOVER) { 12394 ctxt->recovery = 1; 12395 options -= XML_PARSE_RECOVER; 12396 } else 12397 ctxt->recovery = 0; 12398 if (options & XML_PARSE_DTDLOAD) { 12399 ctxt->loadsubset = XML_DETECT_IDS; 12400 options -= XML_PARSE_DTDLOAD; 12401 } else 12402 ctxt->loadsubset = 0; 12403 if (options & XML_PARSE_DTDATTR) { 12404 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 12405 options -= XML_PARSE_DTDATTR; 12406 } 12407 if (options & XML_PARSE_NOENT) { 12408 ctxt->replaceEntities = 1; 12409 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 12410 options -= XML_PARSE_NOENT; 12411 } else 12412 ctxt->replaceEntities = 0; 12413 if (options & XML_PARSE_NOWARNING) { 12414 ctxt->sax->warning = NULL; 12415 options -= XML_PARSE_NOWARNING; 12416 } 12417 if (options & XML_PARSE_NOERROR) { 12418 ctxt->sax->error = NULL; 12419 ctxt->sax->fatalError = NULL; 12420 options -= XML_PARSE_NOERROR; 12421 } 12422 if (options & XML_PARSE_PEDANTIC) { 12423 ctxt->pedantic = 1; 12424 options -= XML_PARSE_PEDANTIC; 12425 } else 12426 ctxt->pedantic = 0; 12427 if (options & XML_PARSE_NOBLANKS) { 12428 ctxt->keepBlanks = 0; 12429 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 12430 options -= XML_PARSE_NOBLANKS; 12431 } else 12432 ctxt->keepBlanks = 1; 12433 if (options & XML_PARSE_DTDVALID) { 12434 ctxt->validate = 1; 12435 if (options & XML_PARSE_NOWARNING) 12436 ctxt->vctxt.warning = NULL; 12437 if (options & XML_PARSE_NOERROR) 12438 ctxt->vctxt.error = NULL; 12439 options -= XML_PARSE_DTDVALID; 12440 } else 12441 ctxt->validate = 0; 12442#ifdef LIBXML_SAX1_ENABLED 12443 if (options & XML_PARSE_SAX1) { 12444 ctxt->sax->startElement = xmlSAX2StartElement; 12445 ctxt->sax->endElement = xmlSAX2EndElement; 12446 ctxt->sax->startElementNs = NULL; 12447 ctxt->sax->endElementNs = NULL; 12448 ctxt->sax->initialized = 1; 12449 options -= XML_PARSE_SAX1; 12450 } 12451#endif /* LIBXML_SAX1_ENABLED */ 12452 if (options & XML_PARSE_NODICT) { 12453 ctxt->dictNames = 0; 12454 options -= XML_PARSE_NODICT; 12455 } else { 12456 ctxt->dictNames = 1; 12457 } 12458 if (options & XML_PARSE_NOCDATA) { 12459 ctxt->sax->cdataBlock = NULL; 12460 options -= XML_PARSE_NOCDATA; 12461 } 12462 if (options & XML_PARSE_NSCLEAN) { 12463 ctxt->options |= XML_PARSE_NSCLEAN; 12464 options -= XML_PARSE_NSCLEAN; 12465 } 12466 if (options & XML_PARSE_NONET) { 12467 ctxt->options |= XML_PARSE_NONET; 12468 options -= XML_PARSE_NONET; 12469 } 12470 ctxt->linenumbers = 1; 12471 return (options); 12472} 12473 12474/** 12475 * xmlDoRead: 12476 * @ctxt: an XML parser context 12477 * @URL: the base URL to use for the document 12478 * @encoding: the document encoding, or NULL 12479 * @options: a combination of xmlParserOption 12480 * @reuse: keep the context for reuse 12481 * 12482 * Common front-end for the xmlRead functions 12483 * 12484 * Returns the resulting document tree or NULL 12485 */ 12486static xmlDocPtr 12487xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 12488 int options, int reuse) 12489{ 12490 xmlDocPtr ret; 12491 12492 xmlCtxtUseOptions(ctxt, options); 12493 if (encoding != NULL) { 12494 xmlCharEncodingHandlerPtr hdlr; 12495 12496 hdlr = xmlFindCharEncodingHandler(encoding); 12497 if (hdlr != NULL) 12498 xmlSwitchToEncoding(ctxt, hdlr); 12499 } 12500 if ((URL != NULL) && (ctxt->input != NULL) && 12501 (ctxt->input->filename == NULL)) 12502 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 12503 xmlParseDocument(ctxt); 12504 if ((ctxt->wellFormed) || ctxt->recovery) 12505 ret = ctxt->myDoc; 12506 else { 12507 ret = NULL; 12508 if (ctxt->myDoc != NULL) { 12509 xmlFreeDoc(ctxt->myDoc); 12510 } 12511 } 12512 ctxt->myDoc = NULL; 12513 if (!reuse) { 12514 xmlFreeParserCtxt(ctxt); 12515 } 12516 12517 return (ret); 12518} 12519 12520/** 12521 * xmlReadDoc: 12522 * @cur: a pointer to a zero terminated string 12523 * @URL: the base URL to use for the document 12524 * @encoding: the document encoding, or NULL 12525 * @options: a combination of xmlParserOption 12526 * 12527 * parse an XML in-memory document and build a tree. 12528 * 12529 * Returns the resulting document tree 12530 */ 12531xmlDocPtr 12532xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 12533{ 12534 xmlParserCtxtPtr ctxt; 12535 12536 if (cur == NULL) 12537 return (NULL); 12538 12539 ctxt = xmlCreateDocParserCtxt(cur); 12540 if (ctxt == NULL) 12541 return (NULL); 12542 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12543} 12544 12545/** 12546 * xmlReadFile: 12547 * @filename: a file or URL 12548 * @encoding: the document encoding, or NULL 12549 * @options: a combination of xmlParserOption 12550 * 12551 * parse an XML file from the filesystem or the network. 12552 * 12553 * Returns the resulting document tree 12554 */ 12555xmlDocPtr 12556xmlReadFile(const char *filename, const char *encoding, int options) 12557{ 12558 xmlParserCtxtPtr ctxt; 12559 12560 ctxt = xmlCreateURLParserCtxt(filename, options); 12561 if (ctxt == NULL) 12562 return (NULL); 12563 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 12564} 12565 12566/** 12567 * xmlReadMemory: 12568 * @buffer: a pointer to a char array 12569 * @size: the size of the array 12570 * @URL: the base URL to use for the document 12571 * @encoding: the document encoding, or NULL 12572 * @options: a combination of xmlParserOption 12573 * 12574 * parse an XML in-memory document and build a tree. 12575 * 12576 * Returns the resulting document tree 12577 */ 12578xmlDocPtr 12579xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 12580{ 12581 xmlParserCtxtPtr ctxt; 12582 12583 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12584 if (ctxt == NULL) 12585 return (NULL); 12586 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12587} 12588 12589/** 12590 * xmlReadFd: 12591 * @fd: an open file descriptor 12592 * @URL: the base URL to use for the document 12593 * @encoding: the document encoding, or NULL 12594 * @options: a combination of xmlParserOption 12595 * 12596 * parse an XML from a file descriptor and build a tree. 12597 * NOTE that the file descriptor will not be closed when the 12598 * reader is closed or reset. 12599 * 12600 * Returns the resulting document tree 12601 */ 12602xmlDocPtr 12603xmlReadFd(int fd, const char *URL, const char *encoding, int options) 12604{ 12605 xmlParserCtxtPtr ctxt; 12606 xmlParserInputBufferPtr input; 12607 xmlParserInputPtr stream; 12608 12609 if (fd < 0) 12610 return (NULL); 12611 12612 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12613 if (input == NULL) 12614 return (NULL); 12615 input->closecallback = NULL; 12616 ctxt = xmlNewParserCtxt(); 12617 if (ctxt == NULL) { 12618 xmlFreeParserInputBuffer(input); 12619 return (NULL); 12620 } 12621 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12622 if (stream == NULL) { 12623 xmlFreeParserInputBuffer(input); 12624 xmlFreeParserCtxt(ctxt); 12625 return (NULL); 12626 } 12627 inputPush(ctxt, stream); 12628 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12629} 12630 12631/** 12632 * xmlReadIO: 12633 * @ioread: an I/O read function 12634 * @ioclose: an I/O close function 12635 * @ioctx: an I/O handler 12636 * @URL: the base URL to use for the document 12637 * @encoding: the document encoding, or NULL 12638 * @options: a combination of xmlParserOption 12639 * 12640 * parse an XML document from I/O functions and source and build a tree. 12641 * 12642 * Returns the resulting document tree 12643 */ 12644xmlDocPtr 12645xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12646 void *ioctx, const char *URL, const char *encoding, int options) 12647{ 12648 xmlParserCtxtPtr ctxt; 12649 xmlParserInputBufferPtr input; 12650 xmlParserInputPtr stream; 12651 12652 if (ioread == NULL) 12653 return (NULL); 12654 12655 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12656 XML_CHAR_ENCODING_NONE); 12657 if (input == NULL) 12658 return (NULL); 12659 ctxt = xmlNewParserCtxt(); 12660 if (ctxt == NULL) { 12661 xmlFreeParserInputBuffer(input); 12662 return (NULL); 12663 } 12664 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12665 if (stream == NULL) { 12666 xmlFreeParserInputBuffer(input); 12667 xmlFreeParserCtxt(ctxt); 12668 return (NULL); 12669 } 12670 inputPush(ctxt, stream); 12671 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 12672} 12673 12674/** 12675 * xmlCtxtReadDoc: 12676 * @ctxt: an XML parser context 12677 * @cur: a pointer to a zero terminated string 12678 * @URL: the base URL to use for the document 12679 * @encoding: the document encoding, or NULL 12680 * @options: a combination of xmlParserOption 12681 * 12682 * parse an XML in-memory document and build a tree. 12683 * This reuses the existing @ctxt parser context 12684 * 12685 * Returns the resulting document tree 12686 */ 12687xmlDocPtr 12688xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 12689 const char *URL, const char *encoding, int options) 12690{ 12691 xmlParserInputPtr stream; 12692 12693 if (cur == NULL) 12694 return (NULL); 12695 if (ctxt == NULL) 12696 return (NULL); 12697 12698 xmlCtxtReset(ctxt); 12699 12700 stream = xmlNewStringInputStream(ctxt, cur); 12701 if (stream == NULL) { 12702 return (NULL); 12703 } 12704 inputPush(ctxt, stream); 12705 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12706} 12707 12708/** 12709 * xmlCtxtReadFile: 12710 * @ctxt: an XML parser context 12711 * @filename: a file or URL 12712 * @encoding: the document encoding, or NULL 12713 * @options: a combination of xmlParserOption 12714 * 12715 * parse an XML file from the filesystem or the network. 12716 * This reuses the existing @ctxt parser context 12717 * 12718 * Returns the resulting document tree 12719 */ 12720xmlDocPtr 12721xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 12722 const char *encoding, int options) 12723{ 12724 xmlParserInputPtr stream; 12725 12726 if (filename == NULL) 12727 return (NULL); 12728 if (ctxt == NULL) 12729 return (NULL); 12730 12731 xmlCtxtReset(ctxt); 12732 12733 stream = xmlNewInputFromFile(ctxt, filename); 12734 if (stream == NULL) { 12735 return (NULL); 12736 } 12737 inputPush(ctxt, stream); 12738 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 12739} 12740 12741/** 12742 * xmlCtxtReadMemory: 12743 * @ctxt: an XML parser context 12744 * @buffer: a pointer to a char array 12745 * @size: the size of the array 12746 * @URL: the base URL to use for the document 12747 * @encoding: the document encoding, or NULL 12748 * @options: a combination of xmlParserOption 12749 * 12750 * parse an XML in-memory document and build a tree. 12751 * This reuses the existing @ctxt parser context 12752 * 12753 * Returns the resulting document tree 12754 */ 12755xmlDocPtr 12756xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 12757 const char *URL, const char *encoding, int options) 12758{ 12759 xmlParserInputBufferPtr input; 12760 xmlParserInputPtr stream; 12761 12762 if (ctxt == NULL) 12763 return (NULL); 12764 if (buffer == NULL) 12765 return (NULL); 12766 12767 xmlCtxtReset(ctxt); 12768 12769 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12770 if (input == NULL) { 12771 return(NULL); 12772 } 12773 12774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12775 if (stream == NULL) { 12776 xmlFreeParserInputBuffer(input); 12777 return(NULL); 12778 } 12779 12780 inputPush(ctxt, stream); 12781 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12782} 12783 12784/** 12785 * xmlCtxtReadFd: 12786 * @ctxt: an XML parser context 12787 * @fd: an open file descriptor 12788 * @URL: the base URL to use for the document 12789 * @encoding: the document encoding, or NULL 12790 * @options: a combination of xmlParserOption 12791 * 12792 * parse an XML from a file descriptor and build a tree. 12793 * This reuses the existing @ctxt parser context 12794 * NOTE that the file descriptor will not be closed when the 12795 * reader is closed or reset. 12796 * 12797 * Returns the resulting document tree 12798 */ 12799xmlDocPtr 12800xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 12801 const char *URL, const char *encoding, int options) 12802{ 12803 xmlParserInputBufferPtr input; 12804 xmlParserInputPtr stream; 12805 12806 if (fd < 0) 12807 return (NULL); 12808 if (ctxt == NULL) 12809 return (NULL); 12810 12811 xmlCtxtReset(ctxt); 12812 12813 12814 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 12815 if (input == NULL) 12816 return (NULL); 12817 input->closecallback = NULL; 12818 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12819 if (stream == NULL) { 12820 xmlFreeParserInputBuffer(input); 12821 return (NULL); 12822 } 12823 inputPush(ctxt, stream); 12824 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12825} 12826 12827/** 12828 * xmlCtxtReadIO: 12829 * @ctxt: an XML parser context 12830 * @ioread: an I/O read function 12831 * @ioclose: an I/O close function 12832 * @ioctx: an I/O handler 12833 * @URL: the base URL to use for the document 12834 * @encoding: the document encoding, or NULL 12835 * @options: a combination of xmlParserOption 12836 * 12837 * parse an XML document from I/O functions and source and build a tree. 12838 * This reuses the existing @ctxt parser context 12839 * 12840 * Returns the resulting document tree 12841 */ 12842xmlDocPtr 12843xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 12844 xmlInputCloseCallback ioclose, void *ioctx, 12845 const char *URL, 12846 const char *encoding, int options) 12847{ 12848 xmlParserInputBufferPtr input; 12849 xmlParserInputPtr stream; 12850 12851 if (ioread == NULL) 12852 return (NULL); 12853 if (ctxt == NULL) 12854 return (NULL); 12855 12856 xmlCtxtReset(ctxt); 12857 12858 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 12859 XML_CHAR_ENCODING_NONE); 12860 if (input == NULL) 12861 return (NULL); 12862 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12863 if (stream == NULL) { 12864 xmlFreeParserInputBuffer(input); 12865 return (NULL); 12866 } 12867 inputPush(ctxt, stream); 12868 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 12869} 12870